Spaces:
Sleeping
Sleeping
supercat666
commited on
Commit
·
a5afc1a
1
Parent(s):
adf804d
fixed cas9off
Browse files- app.py +1 -1
- cas9off.py +9 -29
app.py
CHANGED
@@ -94,8 +94,8 @@ if selected_model == 'Cas9':
|
|
94 |
if target_selection == 'on-target':
|
95 |
|
96 |
pass
|
97 |
-
elif target_selection == 'off-target':
|
98 |
|
|
|
99 |
ENTRY_METHODS = dict(
|
100 |
manual='Manual entry of target sequence',
|
101 |
txt="txt file upload"
|
|
|
94 |
if target_selection == 'on-target':
|
95 |
|
96 |
pass
|
|
|
97 |
|
98 |
+
elif target_selection == 'off-target':
|
99 |
ENTRY_METHODS = dict(
|
100 |
manual='Manual entry of target sequence',
|
101 |
txt="txt file upload"
|
cas9off.py
CHANGED
@@ -4,28 +4,12 @@ import pandas as pd
|
|
4 |
import os
|
5 |
import argparse
|
6 |
|
7 |
-
# column names
|
8 |
-
ID_COL = 'Transcript ID'
|
9 |
-
SEQ_COL = 'Transcript Sequence'
|
10 |
-
|
11 |
# configure GPUs
|
12 |
for gpu in tf.config.list_physical_devices('GPU'):
|
13 |
tf.config.experimental.set_memory_growth(gpu, enable=True)
|
14 |
if len(tf.config.list_physical_devices('GPU')) > 0:
|
15 |
tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
|
16 |
|
17 |
-
# application configuration
|
18 |
-
BATCH_SIZE_COMPUTE = 500
|
19 |
-
BATCH_SIZE_SCAN = 20
|
20 |
-
BATCH_SIZE_TRANSCRIPTS = 50
|
21 |
-
NUM_TOP_GUIDES = 10
|
22 |
-
NUM_MISMATCHES = 3
|
23 |
-
RUN_MODES = dict(
|
24 |
-
all='All on-target guides per transcript',
|
25 |
-
top_guides='Top {:d} guides per transcript'.format(NUM_TOP_GUIDES),
|
26 |
-
titration='Top {:d} guides per transcript & their titration candidates'.format(NUM_TOP_GUIDES)
|
27 |
-
)
|
28 |
-
|
29 |
class Encoder:
|
30 |
def __init__(self, on_seq, off_seq, with_category = False, label = None, with_reg_val = False, value = None):
|
31 |
tlen = 24
|
@@ -81,9 +65,6 @@ class Encoder:
|
|
81 |
on_off_dim7_codes.append(np.concatenate((diff_code, dir_code)))
|
82 |
self.on_off_code = np.array(on_off_dim7_codes)
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
def encode_on_off_seq_pairs(input_file):
|
88 |
inputs = pd.read_csv(input_file, delimiter=",", header=None, names=['on_seq', 'off_seq'])
|
89 |
input_codes = []
|
@@ -107,37 +88,36 @@ def CRISPR_net_predict(X_test):
|
|
107 |
y_pred = loaded_model.predict(X_test).flatten()
|
108 |
return y_pred
|
109 |
|
|
|
110 |
def process_input_and_predict(input_data, input_type='manual'):
|
111 |
if input_type == 'manual':
|
112 |
-
# Process manual input string into DataFrame
|
113 |
sequences = [seq.split(',') for seq in input_data.split('\n')]
|
114 |
inputs = pd.DataFrame(sequences, columns=['on_seq', 'off_seq'])
|
115 |
elif input_type == 'file':
|
116 |
-
# Read sequences from a file into DataFrame
|
117 |
inputs = pd.read_csv(input_data, delimiter=",", header=None, names=['on_seq', 'off_seq'])
|
118 |
|
119 |
-
|
120 |
input_codes = []
|
121 |
for idx, row in inputs.iterrows():
|
122 |
on_seq = row['on_seq']
|
123 |
off_seq = row['off_seq']
|
124 |
-
|
125 |
-
# Validate on_seq and off_seq
|
126 |
if not on_seq or not off_seq:
|
127 |
-
# Skip the current row if either on_seq or off_seq is missing or empty
|
128 |
continue
|
129 |
|
130 |
en = Encoder(on_seq=on_seq, off_seq=off_seq)
|
131 |
input_codes.append(en.on_off_code)
|
|
|
132 |
|
133 |
-
# Convert to numpy array and reshape for the model
|
134 |
input_codes = np.array(input_codes)
|
135 |
input_codes = input_codes.reshape((len(input_codes), 1, 24, 7))
|
136 |
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
139 |
|
140 |
-
return
|
141 |
|
142 |
if __name__ == '__main__':
|
143 |
parser = argparse.ArgumentParser(description="CRISPR-Net v1.0 (Aug 10 2019)")
|
|
|
4 |
import os
|
5 |
import argparse
|
6 |
|
|
|
|
|
|
|
|
|
7 |
# configure GPUs
|
8 |
for gpu in tf.config.list_physical_devices('GPU'):
|
9 |
tf.config.experimental.set_memory_growth(gpu, enable=True)
|
10 |
if len(tf.config.list_physical_devices('GPU')) > 0:
|
11 |
tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
class Encoder:
|
14 |
def __init__(self, on_seq, off_seq, with_category = False, label = None, with_reg_val = False, value = None):
|
15 |
tlen = 24
|
|
|
65 |
on_off_dim7_codes.append(np.concatenate((diff_code, dir_code)))
|
66 |
self.on_off_code = np.array(on_off_dim7_codes)
|
67 |
|
|
|
|
|
|
|
68 |
def encode_on_off_seq_pairs(input_file):
|
69 |
inputs = pd.read_csv(input_file, delimiter=",", header=None, names=['on_seq', 'off_seq'])
|
70 |
input_codes = []
|
|
|
88 |
y_pred = loaded_model.predict(X_test).flatten()
|
89 |
return y_pred
|
90 |
|
91 |
+
|
92 |
def process_input_and_predict(input_data, input_type='manual'):
|
93 |
if input_type == 'manual':
|
|
|
94 |
sequences = [seq.split(',') for seq in input_data.split('\n')]
|
95 |
inputs = pd.DataFrame(sequences, columns=['on_seq', 'off_seq'])
|
96 |
elif input_type == 'file':
|
|
|
97 |
inputs = pd.read_csv(input_data, delimiter=",", header=None, names=['on_seq', 'off_seq'])
|
98 |
|
99 |
+
valid_inputs = []
|
100 |
input_codes = []
|
101 |
for idx, row in inputs.iterrows():
|
102 |
on_seq = row['on_seq']
|
103 |
off_seq = row['off_seq']
|
|
|
|
|
104 |
if not on_seq or not off_seq:
|
|
|
105 |
continue
|
106 |
|
107 |
en = Encoder(on_seq=on_seq, off_seq=off_seq)
|
108 |
input_codes.append(en.on_off_code)
|
109 |
+
valid_inputs.append((on_seq, off_seq))
|
110 |
|
|
|
111 |
input_codes = np.array(input_codes)
|
112 |
input_codes = input_codes.reshape((len(input_codes), 1, 24, 7))
|
113 |
|
114 |
+
y_pred = CRISPR_net_predict(input_codes)
|
115 |
+
|
116 |
+
# Create a new DataFrame from valid inputs and predictions
|
117 |
+
result_df = pd.DataFrame(valid_inputs, columns=['on_seq', 'off_seq'])
|
118 |
+
result_df['CRISPR_Net_score'] = y_pred
|
119 |
|
120 |
+
return result_df
|
121 |
|
122 |
if __name__ == '__main__':
|
123 |
parser = argparse.ArgumentParser(description="CRISPR-Net v1.0 (Aug 10 2019)")
|