Spaces:

NiniCat
/

CRISPRTool

Sleeping

App Files Files Community

supercat666 commited on Jan 8, 2024

Commit

a5afc1a

1 Parent(s): adf804d

fixed cas9off

Browse files

Files changed (2) hide show

app.py +1 -1
cas9off.py +9 -29

app.py CHANGED Viewed

@@ -94,8 +94,8 @@ if selected_model == 'Cas9':
     if target_selection == 'on-target':
         pass
-    elif target_selection == 'off-target':
         ENTRY_METHODS = dict(
             manual='Manual entry of target sequence',
             txt="txt file upload"

     if target_selection == 'on-target':
         pass
+    elif target_selection == 'off-target':
         ENTRY_METHODS = dict(
             manual='Manual entry of target sequence',
             txt="txt file upload"

cas9off.py CHANGED Viewed

@@ -4,28 +4,12 @@ import pandas as pd
 import os
 import argparse
-# column names
-ID_COL = 'Transcript ID'
-SEQ_COL = 'Transcript Sequence'
 # configure GPUs
 for gpu in tf.config.list_physical_devices('GPU'):
     tf.config.experimental.set_memory_growth(gpu, enable=True)
 if len(tf.config.list_physical_devices('GPU')) > 0:
     tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
-# application configuration
-BATCH_SIZE_COMPUTE = 500
-BATCH_SIZE_SCAN = 20
-BATCH_SIZE_TRANSCRIPTS = 50
-NUM_TOP_GUIDES = 10
-NUM_MISMATCHES = 3
-RUN_MODES = dict(
-    all='All on-target guides per transcript',
-    top_guides='Top {:d} guides per transcript'.format(NUM_TOP_GUIDES),
-    titration='Top {:d} guides per transcript & their titration candidates'.format(NUM_TOP_GUIDES)
-)
 class Encoder:
     def __init__(self, on_seq, off_seq, with_category = False, label = None, with_reg_val = False, value = None):
         tlen = 24
@@ -81,9 +65,6 @@ class Encoder:
             on_off_dim7_codes.append(np.concatenate((diff_code, dir_code)))
         self.on_off_code = np.array(on_off_dim7_codes)
 def encode_on_off_seq_pairs(input_file):
     inputs = pd.read_csv(input_file, delimiter=",", header=None, names=['on_seq', 'off_seq'])
     input_codes = []
@@ -107,37 +88,36 @@ def CRISPR_net_predict(X_test):
     y_pred = loaded_model.predict(X_test).flatten()
     return y_pred
 def process_input_and_predict(input_data, input_type='manual'):
     if input_type == 'manual':
-        # Process manual input string into DataFrame
         sequences = [seq.split(',') for seq in input_data.split('\n')]
         inputs = pd.DataFrame(sequences, columns=['on_seq', 'off_seq'])
     elif input_type == 'file':
-        # Read sequences from a file into DataFrame
         inputs = pd.read_csv(input_data, delimiter=",", header=None, names=['on_seq', 'off_seq'])
-    # Encode the sequences
     input_codes = []
     for idx, row in inputs.iterrows():
         on_seq = row['on_seq']
         off_seq = row['off_seq']
-        # Validate on_seq and off_seq
         if not on_seq or not off_seq:
-            # Skip the current row if either on_seq or off_seq is missing or empty
             continue
         en = Encoder(on_seq=on_seq, off_seq=off_seq)
         input_codes.append(en.on_off_code)
-    # Convert to numpy array and reshape for the model
     input_codes = np.array(input_codes)
     input_codes = input_codes.reshape((len(input_codes), 1, 24, 7))
-    # Predict with CRISPR-Net model
-    inputs['CRISPR_Net_score'] = CRISPR_net_predict(input_codes)
-    return inputs
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description="CRISPR-Net v1.0 (Aug 10 2019)")

 import os
 import argparse
 # configure GPUs
 for gpu in tf.config.list_physical_devices('GPU'):
     tf.config.experimental.set_memory_growth(gpu, enable=True)
 if len(tf.config.list_physical_devices('GPU')) > 0:
     tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
 class Encoder:
     def __init__(self, on_seq, off_seq, with_category = False, label = None, with_reg_val = False, value = None):
         tlen = 24
             on_off_dim7_codes.append(np.concatenate((diff_code, dir_code)))
         self.on_off_code = np.array(on_off_dim7_codes)
 def encode_on_off_seq_pairs(input_file):
     inputs = pd.read_csv(input_file, delimiter=",", header=None, names=['on_seq', 'off_seq'])
     input_codes = []
     y_pred = loaded_model.predict(X_test).flatten()
     return y_pred
 def process_input_and_predict(input_data, input_type='manual'):
     if input_type == 'manual':
         sequences = [seq.split(',') for seq in input_data.split('\n')]
         inputs = pd.DataFrame(sequences, columns=['on_seq', 'off_seq'])
     elif input_type == 'file':
         inputs = pd.read_csv(input_data, delimiter=",", header=None, names=['on_seq', 'off_seq'])
+    valid_inputs = []
     input_codes = []
     for idx, row in inputs.iterrows():
         on_seq = row['on_seq']
         off_seq = row['off_seq']
         if not on_seq or not off_seq:
             continue
         en = Encoder(on_seq=on_seq, off_seq=off_seq)
         input_codes.append(en.on_off_code)
+        valid_inputs.append((on_seq, off_seq))
     input_codes = np.array(input_codes)
     input_codes = input_codes.reshape((len(input_codes), 1, 24, 7))
+    y_pred = CRISPR_net_predict(input_codes)
+    # Create a new DataFrame from valid inputs and predictions
+    result_df = pd.DataFrame(valid_inputs, columns=['on_seq', 'off_seq'])
+    result_df['CRISPR_Net_score'] = y_pred
+    return result_df
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description="CRISPR-Net v1.0 (Aug 10 2019)")