Spaces:
Sleeping
Sleeping
supercat666
commited on
Commit
·
3023ae4
1
Parent(s):
379f333
fix
Browse files- app.py +19 -14
- cas9on.py +31 -32
- crisprTool.md +2 -1
app.py
CHANGED
@@ -181,12 +181,17 @@ if selected_model == 'Cas9':
|
|
181 |
# Include "Target" in the DataFrame's columns
|
182 |
try:
|
183 |
df = pd.DataFrame(st.session_state['on_target_results'],
|
184 |
-
columns=["
|
185 |
st.dataframe(df)
|
186 |
except ValueError as e:
|
187 |
st.error(f"DataFrame creation error: {e}")
|
188 |
# Optionally print or log the problematic data for debugging:
|
189 |
print(st.session_state['on_target_results'])
|
|
|
|
|
|
|
|
|
|
|
190 |
# Initialize Plotly figure
|
191 |
fig = go.Figure()
|
192 |
|
@@ -219,17 +224,16 @@ if selected_model == 'Cas9':
|
|
219 |
name='CDS'
|
220 |
))
|
221 |
|
222 |
-
#
|
223 |
-
|
224 |
-
MIN_STRAND_Y = -0.5 # Minimum Y value for negative strand
|
225 |
|
226 |
-
# Iterate over sorted predictions to create the plot
|
227 |
-
for i, prediction in enumerate(st.session_state['on_target_results'], start=1):
|
228 |
-
chrom, start, end, strand, target, gRNA, pred_score = prediction
|
229 |
midpoint = (int(start) + int(end)) / 2
|
230 |
|
231 |
-
#
|
232 |
-
y_value = MAX_STRAND_Y
|
233 |
|
234 |
fig.add_trace(go.Scatter(
|
235 |
x=[midpoint],
|
@@ -238,18 +242,19 @@ if selected_model == 'Cas9':
|
|
238 |
marker=dict(symbol='triangle-up' if strand == '1' else 'triangle-down', size=12),
|
239 |
text=f"Rank: {i}", # Text label
|
240 |
hoverinfo='text',
|
241 |
-
hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
|
242 |
))
|
243 |
|
244 |
# Update layout for clarity and interaction
|
245 |
fig.update_layout(
|
246 |
-
title='Top
|
247 |
xaxis_title='Genomic Position',
|
248 |
-
yaxis_title='Strand',
|
249 |
-
yaxis=dict(range=[
|
|
|
250 |
showlegend=False,
|
251 |
hovermode='closest', # Adjust hover mode
|
252 |
-
hoverdistance=
|
253 |
)
|
254 |
|
255 |
# Display the plot
|
|
|
181 |
# Include "Target" in the DataFrame's columns
|
182 |
try:
|
183 |
df = pd.DataFrame(st.session_state['on_target_results'],
|
184 |
+
columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Target", "gRNA", "pred_Score"])
|
185 |
st.dataframe(df)
|
186 |
except ValueError as e:
|
187 |
st.error(f"DataFrame creation error: {e}")
|
188 |
# Optionally print or log the problematic data for debugging:
|
189 |
print(st.session_state['on_target_results'])
|
190 |
+
|
191 |
+
# Initialize Plotly figure
|
192 |
+
# Adjust hover interaction and strand plotting
|
193 |
+
MAX_STRAND_Y = 0.5 # Maximum Y value for positive strand
|
194 |
+
MIN_STRAND_Y = -0.5 # Minimum Y value for negative strand
|
195 |
# Initialize Plotly figure
|
196 |
fig = go.Figure()
|
197 |
|
|
|
224 |
name='CDS'
|
225 |
))
|
226 |
|
227 |
+
# Define the vertical separation for each rank
|
228 |
+
VERTICAL_GAP = 0.2 # Gap between different ranks
|
|
|
229 |
|
230 |
+
# Iterate over top 5 sorted predictions to create the plot
|
231 |
+
for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1): # Only top 5
|
232 |
+
chrom, start, end, strand, transcript, target, gRNA, pred_score = prediction
|
233 |
midpoint = (int(start) + int(end)) / 2
|
234 |
|
235 |
+
# Vertical position based on rank, spaced by VERTICAL_GAP
|
236 |
+
y_value = MAX_STRAND_Y - (i - 1) * VERTICAL_GAP
|
237 |
|
238 |
fig.add_trace(go.Scatter(
|
239 |
x=[midpoint],
|
|
|
242 |
marker=dict(symbol='triangle-up' if strand == '1' else 'triangle-down', size=12),
|
243 |
text=f"Rank: {i}", # Text label
|
244 |
hoverinfo='text',
|
245 |
+
hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Transcript:{transcript}<br>Prediction Score: {pred_score:.4f}",
|
246 |
))
|
247 |
|
248 |
# Update layout for clarity and interaction
|
249 |
fig.update_layout(
|
250 |
+
title='Top 5 gRNA Sequences by Prediction Score',
|
251 |
xaxis_title='Genomic Position',
|
252 |
+
yaxis_title='Rank / Strand',
|
253 |
+
yaxis=dict(range=[MAX_STRAND_Y - 5 * VERTICAL_GAP, MAX_STRAND_Y + 0.1]),
|
254 |
+
# Adjust y-axis range to fit 5 ranks
|
255 |
showlegend=False,
|
256 |
hovermode='closest', # Adjust hover mode
|
257 |
+
hoverdistance=10, # Reduce hover distance to improve accuracy
|
258 |
)
|
259 |
|
260 |
# Display the plot
|
cas9on.py
CHANGED
@@ -52,12 +52,13 @@ def format_prediction_output(targets, model_path):
|
|
52 |
prediction = dcModel.ontar_predict(encoded_seq)
|
53 |
|
54 |
# Format output
|
55 |
-
|
56 |
chr = target[2]
|
57 |
start = target[3]
|
58 |
end = target[4]
|
59 |
strand = target[5]
|
60 |
-
|
|
|
61 |
|
62 |
return formatted_data
|
63 |
|
@@ -89,18 +90,41 @@ def fetch_ensembl_sequence(transcript_id):
|
|
89 |
print(f"Error fetching sequence data from Ensembl: {response.text}")
|
90 |
return None
|
91 |
|
92 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
targets = []
|
94 |
len_sequence = len(sequence)
|
|
|
95 |
|
|
|
|
|
96 |
for i in range(len_sequence - len(pam) + 1):
|
97 |
if sequence[i + 1:i + 3] == pam[1:]:
|
98 |
if i >= target_length:
|
99 |
target_seq = sequence[i - target_length:i + 3]
|
100 |
tar_start = start + i - target_length
|
101 |
tar_end = start + i + 3
|
102 |
-
|
103 |
-
targets.append([target_seq,
|
104 |
|
105 |
return targets
|
106 |
|
@@ -111,21 +135,16 @@ def process_gene(gene_symbol, model_path):
|
|
111 |
gene_sequence = '' # Initialize an empty string for the gene sequence
|
112 |
|
113 |
if transcripts:
|
|
|
114 |
for transcript in transcripts:
|
115 |
-
transcript_id = transcript['
|
116 |
chr = transcript.get('seq_region_name', 'unknown')
|
117 |
start = transcript.get('start', 0)
|
118 |
strand = transcript.get('strand', 'unknown')
|
119 |
# Fetch the sequence here and concatenate if multiple transcripts
|
120 |
gene_sequence += fetch_ensembl_sequence(transcript_id) or ''
|
121 |
-
|
122 |
# Fetch exon and CDS information
|
123 |
exons = fetch_ensembl_exons(transcript_id)
|
124 |
-
cds_list = fetch_ensembl_cds(transcript_id)
|
125 |
-
|
126 |
-
# You might want to do something specific with exons and CDS information here
|
127 |
-
# For example, store them, print them, or include them in your analysis
|
128 |
-
|
129 |
if gene_sequence:
|
130 |
gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
|
131 |
if gRNA_sites:
|
@@ -135,26 +154,6 @@ def process_gene(gene_symbol, model_path):
|
|
135 |
# Return the data, fetched sequence, and possibly exon/CDS data
|
136 |
return all_data, gene_sequence, exons, cds_list
|
137 |
|
138 |
-
def fetch_ensembl_exons(transcript_id):
|
139 |
-
"""Fetch exon information for a given transcript from Ensembl."""
|
140 |
-
url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=exon;content-type=application/json"
|
141 |
-
response = requests.get(url)
|
142 |
-
if response.status_code == 200:
|
143 |
-
return response.json() # Returns a list of exons for the transcript
|
144 |
-
else:
|
145 |
-
print(f"Error fetching exon data from Ensembl for transcript {transcript_id}: {response.text}")
|
146 |
-
return None
|
147 |
-
|
148 |
-
def fetch_ensembl_cds(transcript_id):
|
149 |
-
"""Fetch coding sequence (CDS) information for a given transcript from Ensembl."""
|
150 |
-
url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=cds;content-type=application/json"
|
151 |
-
response = requests.get(url)
|
152 |
-
if response.status_code == 200:
|
153 |
-
return response.json() # Returns a list of CDS regions for the transcript
|
154 |
-
else:
|
155 |
-
print(f"Error fetching CDS data from Ensembl for transcript {transcript_id}: {response.text}")
|
156 |
-
return None
|
157 |
-
|
158 |
def create_genbank_features(formatted_data):
|
159 |
features = []
|
160 |
for data in formatted_data:
|
|
|
52 |
prediction = dcModel.ontar_predict(encoded_seq)
|
53 |
|
54 |
# Format output
|
55 |
+
sgRNA = target[1]
|
56 |
chr = target[2]
|
57 |
start = target[3]
|
58 |
end = target[4]
|
59 |
strand = target[5]
|
60 |
+
transcript_id = target[6]
|
61 |
+
formatted_data.append([chr, start, end, strand, transcript_id, target[0], sgRNA, prediction[0]])
|
62 |
|
63 |
return formatted_data
|
64 |
|
|
|
90 |
print(f"Error fetching sequence data from Ensembl: {response.text}")
|
91 |
return None
|
92 |
|
93 |
+
def fetch_ensembl_exons(transcript_id):
|
94 |
+
"""Fetch exon information for a given transcript from Ensembl."""
|
95 |
+
url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=exon;content-type=application/json"
|
96 |
+
response = requests.get(url)
|
97 |
+
if response.status_code == 200:
|
98 |
+
return response.json() # Returns a list of exons for the transcript
|
99 |
+
else:
|
100 |
+
print(f"Error fetching exon data from Ensembl for transcript {transcript_id}: {response.text}")
|
101 |
+
return None
|
102 |
+
|
103 |
+
def fetch_ensembl_cds(transcript_id):
|
104 |
+
"""Fetch coding sequence (CDS) information for a given transcript from Ensembl."""
|
105 |
+
url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=cds;content-type=application/json"
|
106 |
+
response = requests.get(url)
|
107 |
+
if response.status_code == 200:
|
108 |
+
return response.json() # Returns a list of CDS regions for the transcript
|
109 |
+
else:
|
110 |
+
print(f"Error fetching CDS data from Ensembl for transcript {transcript_id}: {response.text}")
|
111 |
+
return None
|
112 |
+
|
113 |
+
def find_crispr_targets(sequence, chr, start, strand, transcript_id, pam="NGG", target_length=20):
|
114 |
targets = []
|
115 |
len_sequence = len(sequence)
|
116 |
+
complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
|
117 |
|
118 |
+
if strand == -1:
|
119 |
+
sequence = ''.join([complement[base] for base in reversed(sequence)])
|
120 |
for i in range(len_sequence - len(pam) + 1):
|
121 |
if sequence[i + 1:i + 3] == pam[1:]:
|
122 |
if i >= target_length:
|
123 |
target_seq = sequence[i - target_length:i + 3]
|
124 |
tar_start = start + i - target_length
|
125 |
tar_end = start + i + 3
|
126 |
+
sgRNA = sequence[i - target_length:i]
|
127 |
+
targets.append([target_seq, sgRNA, chr, str(tar_start), str(tar_end), str(strand), transcript_id])
|
128 |
|
129 |
return targets
|
130 |
|
|
|
135 |
gene_sequence = '' # Initialize an empty string for the gene sequence
|
136 |
|
137 |
if transcripts:
|
138 |
+
cds_list = fetch_ensembl_cds(transcripts)
|
139 |
for transcript in transcripts:
|
140 |
+
transcript_id = transcript['display_name']
|
141 |
chr = transcript.get('seq_region_name', 'unknown')
|
142 |
start = transcript.get('start', 0)
|
143 |
strand = transcript.get('strand', 'unknown')
|
144 |
# Fetch the sequence here and concatenate if multiple transcripts
|
145 |
gene_sequence += fetch_ensembl_sequence(transcript_id) or ''
|
|
|
146 |
# Fetch exon and CDS information
|
147 |
exons = fetch_ensembl_exons(transcript_id)
|
|
|
|
|
|
|
|
|
|
|
148 |
if gene_sequence:
|
149 |
gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
|
150 |
if gRNA_sites:
|
|
|
154 |
# Return the data, fetched sequence, and possibly exon/CDS data
|
155 |
return all_data, gene_sequence, exons, cds_list
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
def create_genbank_features(formatted_data):
|
158 |
features = []
|
159 |
for data in formatted_data:
|
crisprTool.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
CRISPR Online Tool for Cas9/Cas12/Cas13d Efficacy Prediction
|
2 |
|
3 |
|
4 |
-
You are using version
|
|
|
|
1 |
CRISPR Online Tool for Cas9/Cas12/Cas13d Efficacy Prediction
|
2 |
|
3 |
|
4 |
+
You are using version 1.0b of this tool.
|
5 |
+
Note: Once you click the download button, the page will automatically refresh
|