Spaces:
Sleeping
Sleeping
supercat666
commited on
Commit
·
bc641c8
1
Parent(s):
6392fd6
fix cas9 and cas12 output
Browse files- app.py +2 -4
- cas12lstm.py +69 -3
- cas9att.py +1 -4
app.py
CHANGED
@@ -185,8 +185,7 @@ if selected_model == 'Cas9':
|
|
185 |
if predict_button and gene_symbol:
|
186 |
with st.spinner('Predicting... Please wait'):
|
187 |
predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)
|
188 |
-
|
189 |
-
sorted_predictions = sorted(predictions)[:10]
|
190 |
st.session_state['on_target_results'] = sorted_predictions
|
191 |
st.session_state['gene_sequence'] = gene_sequence # Save gene sequence in session state
|
192 |
st.session_state['exons'] = exons # Store exon data
|
@@ -436,8 +435,7 @@ elif selected_model == 'Cas12':
|
|
436 |
if predict_button and gene_symbol:
|
437 |
with st.spinner('Predicting... Please wait'):
|
438 |
predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas9att_path)
|
439 |
-
|
440 |
-
sorted_predictions = sorted(predictions)[:10]
|
441 |
st.session_state['on_target_results'] = sorted_predictions
|
442 |
st.session_state['gene_sequence'] = gene_sequence # Save gene sequence in session state
|
443 |
st.session_state['exons'] = exons # Store exon data
|
|
|
185 |
if predict_button and gene_symbol:
|
186 |
with st.spinner('Predicting... Please wait'):
|
187 |
predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)
|
188 |
+
sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
|
|
|
189 |
st.session_state['on_target_results'] = sorted_predictions
|
190 |
st.session_state['gene_sequence'] = gene_sequence # Save gene sequence in session state
|
191 |
st.session_state['exons'] = exons # Store exon data
|
|
|
435 |
if predict_button and gene_symbol:
|
436 |
with st.spinner('Predicting... Please wait'):
|
437 |
predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas9att_path)
|
438 |
+
sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
|
|
|
439 |
st.session_state['on_target_results'] = sorted_predictions
|
440 |
st.session_state['gene_sequence'] = gene_sequence # Save gene sequence in session state
|
441 |
st.session_state['exons'] = exons # Store exon data
|
cas12lstm.py
CHANGED
@@ -14,6 +14,10 @@ from functools import reduce
|
|
14 |
from operator import add
|
15 |
import tabulate
|
16 |
from difflib import SequenceMatcher
|
|
|
|
|
|
|
|
|
17 |
|
18 |
import cyvcf2
|
19 |
import parasail
|
@@ -184,9 +188,71 @@ def process_gene(gene_symbol, model_path):
|
|
184 |
for result in results:
|
185 |
for item in result:
|
186 |
output.append(item)
|
187 |
-
# Sort results based on prediction score (assuming score is at the 8th index)
|
188 |
-
sorted_results = sorted(output, key=lambda x: x[8], reverse=True)
|
189 |
|
190 |
# Return the sorted output, combined gene sequences, and all exons
|
191 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
|
|
14 |
from operator import add
|
15 |
import tabulate
|
16 |
from difflib import SequenceMatcher
|
17 |
+
from Bio import SeqIO
|
18 |
+
from Bio.SeqRecord import SeqRecord
|
19 |
+
from Bio.SeqFeature import SeqFeature, FeatureLocation
|
20 |
+
from Bio.Seq import Seq
|
21 |
|
22 |
import cyvcf2
|
23 |
import parasail
|
|
|
188 |
for result in results:
|
189 |
for item in result:
|
190 |
output.append(item)
|
|
|
|
|
191 |
|
192 |
# Return the sorted output, combined gene sequences, and all exons
|
193 |
+
return results, all_gene_sequences, all_exons
|
194 |
+
|
195 |
+
def create_genbank_features(data):
|
196 |
+
features = []
|
197 |
+
|
198 |
+
# If the input data is a DataFrame, convert it to a list of lists
|
199 |
+
if isinstance(data, pd.DataFrame):
|
200 |
+
formatted_data = data.values.tolist()
|
201 |
+
elif isinstance(data, list):
|
202 |
+
formatted_data = data
|
203 |
+
else:
|
204 |
+
raise TypeError("Data should be either a list or a pandas DataFrame.")
|
205 |
+
|
206 |
+
for row in formatted_data:
|
207 |
+
try:
|
208 |
+
start = int(row[1])
|
209 |
+
end = int(row[2])
|
210 |
+
except ValueError as e:
|
211 |
+
print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
|
212 |
+
continue
|
213 |
+
|
214 |
+
strand = 1 if row[3] == '+' else -1
|
215 |
+
location = FeatureLocation(start=start, end=end, strand=strand)
|
216 |
+
feature = SeqFeature(location=location, type="misc_feature", qualifiers={
|
217 |
+
'label': row[7], # Use gRNA as the label
|
218 |
+
'note': f"Prediction: {row[8]}" # Include the prediction score
|
219 |
+
})
|
220 |
+
features.append(feature)
|
221 |
+
|
222 |
+
return features
|
223 |
+
|
224 |
+
|
225 |
+
def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
|
226 |
+
# Ensure gene_sequence is a string before creating Seq object
|
227 |
+
if not isinstance(gene_sequence, str):
|
228 |
+
gene_sequence = str(gene_sequence)
|
229 |
+
|
230 |
+
features = create_genbank_features(df)
|
231 |
+
|
232 |
+
# Now gene_sequence is guaranteed to be a string, suitable for Seq
|
233 |
+
seq_obj = Seq(gene_sequence)
|
234 |
+
record = SeqRecord(seq_obj, id=gene_symbol, name=gene_symbol,
|
235 |
+
description=f'CRISPR Cas12 predicted targets for {gene_symbol}', features=features)
|
236 |
+
record.annotations["molecule_type"] = "DNA"
|
237 |
+
SeqIO.write(record, output_path, "genbank")
|
238 |
+
|
239 |
+
|
240 |
+
def create_bed_file_from_df(df, output_path):
|
241 |
+
with open(output_path, 'w') as bed_file:
|
242 |
+
for index, row in df.iterrows():
|
243 |
+
chrom = row["Chr"]
|
244 |
+
start = int(row["Start Pos"])
|
245 |
+
end = int(row["End Pos"])
|
246 |
+
strand = '+' if row["Strand"] == '1' else '-'
|
247 |
+
gRNA = row["gRNA"]
|
248 |
+
score = str(row["Prediction"])
|
249 |
+
# transcript_id is not typically part of the standard BED columns but added here for completeness
|
250 |
+
transcript_id = row["Transcript"]
|
251 |
+
|
252 |
+
# Writing only standard BED columns; additional columns can be appended as needed
|
253 |
+
bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
|
254 |
+
|
255 |
+
|
256 |
+
def create_csv_from_df(df, output_path):
|
257 |
+
df.to_csv(output_path, index=False)
|
258 |
|
cas9att.py
CHANGED
@@ -228,12 +228,9 @@ def process_gene(gene_symbol, model_path):
|
|
228 |
for result in results:
|
229 |
for item in result:
|
230 |
output.append(item)
|
231 |
-
|
232 |
-
# Sort results based on prediction score (assuming score is at the 8th index)
|
233 |
-
sorted_results = sorted(output, key=lambda x: x[8], reverse=True)
|
234 |
|
235 |
# Return the sorted output, combined gene sequences, and all exons
|
236 |
-
return
|
237 |
|
238 |
|
239 |
def create_genbank_features(data):
|
|
|
228 |
for result in results:
|
229 |
for item in result:
|
230 |
output.append(item)
|
|
|
|
|
|
|
231 |
|
232 |
# Return the sorted output, combined gene sequences, and all exons
|
233 |
+
return results, all_gene_sequences, all_exons
|
234 |
|
235 |
|
236 |
def create_genbank_features(data):
|