Spaces:
Running
Running
roni
commited on
Commit
·
b2a3d53
1
Parent(s):
1d25e2a
showing gene info
Browse files- app.py +41 -29
- get_index.py +5 -7
- protein_viz.py +4 -4
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
from get_index import get_engine
|
4 |
-
from protein_viz import get_protein_name, render_html
|
5 |
|
6 |
index_repo = "ronig/siamese_protein_index"
|
7 |
model_repo = "ronig/protein_search_engine"
|
@@ -21,10 +21,16 @@ def limit_n_results(n):
|
|
21 |
|
22 |
|
23 |
def update_dropdown_menu(search_res):
|
24 |
-
choices = [
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
return gr.Dropdown.update(
|
29 |
choices=choices, interactive=True, value=choices[0], visible=True
|
30 |
)
|
@@ -45,57 +51,63 @@ def format_search_results(raw_search_results):
|
|
45 |
|
46 |
|
47 |
def switch_viz(new_choice):
|
48 |
-
|
|
|
49 |
title_update = gr.Markdown.update(visible=True)
|
50 |
protein_name = get_protein_name(pdb_id)
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
55 |
return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
|
56 |
|
57 |
|
58 |
with gr.Blocks() as demo:
|
59 |
with gr.Column():
|
60 |
-
gr.Markdown(
|
|
|
61 |
# Protein Binding Search Engine
|
62 |
-
This application examines all files uploaded to [PDB](https://www.rcsb.org/)
|
|
|
63 |
If the results are linked to a specific gene, their IDs will also be displayed.
|
64 |
-
"""
|
|
|
65 |
with gr.Column():
|
66 |
with gr.Row():
|
67 |
with gr.Column():
|
68 |
seq_input = gr.Textbox(
|
69 |
-
value="KFLIYQMECSTMIFGL",
|
70 |
-
label="Input Sequence"
|
71 |
)
|
72 |
n_results = gr.Number(5, label="N Results")
|
73 |
-
search_button = gr.Button("Search", variant=
|
74 |
search_results = gr.Label(num_top_classes=20, label="Search Results")
|
75 |
viz_header = gr.Markdown("## Visualization", visible=False)
|
76 |
results_selector = gr.Dropdown(
|
77 |
-
choices=[],
|
78 |
-
|
|
|
|
|
79 |
)
|
80 |
viz_body = gr.Markdown("", visible=False)
|
81 |
protein_viz = gr.HTML(
|
82 |
value=render_html(pdb_id=None, chain=None),
|
83 |
-
label="Protein Visualization"
|
|
|
|
|
|
|
84 |
)
|
85 |
-
gr.Examples([
|
86 |
-
"KFLIYQMECSTMIFGL",
|
87 |
-
"PHFAMPPIHEDHLE",
|
88 |
-
"AEERIISLD"
|
89 |
-
], inputs=[seq_input])
|
90 |
search_button.click(
|
91 |
search_and_display,
|
92 |
inputs=[seq_input, n_results],
|
93 |
-
outputs=[search_results, results_selector]
|
94 |
)
|
95 |
results_selector.change(
|
96 |
-
switch_viz,
|
97 |
-
inputs=results_selector,
|
98 |
-
outputs=[protein_viz, viz_header, viz_body]
|
99 |
)
|
100 |
|
101 |
-
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
from get_index import get_engine
|
4 |
+
from protein_viz import get_gene_names, get_protein_name, render_html
|
5 |
|
6 |
index_repo = "ronig/siamese_protein_index"
|
7 |
model_repo = "ronig/protein_search_engine"
|
|
|
21 |
|
22 |
|
23 |
def update_dropdown_menu(search_res):
|
24 |
+
choices = []
|
25 |
+
for row in search_res:
|
26 |
+
gene = row["gene_ids"][0]
|
27 |
+
if gene != "Unknown":
|
28 |
+
choice_parts = [row["pdb_name"], row["chain_id"], gene]
|
29 |
+
else:
|
30 |
+
choice_parts = [row["pdb_name"], row["chain_id"]]
|
31 |
+
choice = ",".join(choice_parts)
|
32 |
+
choices.append(choice)
|
33 |
+
|
34 |
return gr.Dropdown.update(
|
35 |
choices=choices, interactive=True, value=choices[0], visible=True
|
36 |
)
|
|
|
51 |
|
52 |
|
53 |
def switch_viz(new_choice):
|
54 |
+
choice_parts = new_choice.split(",")
|
55 |
+
pdb_id, chain = choice_parts[0], choice_parts[1]
|
56 |
title_update = gr.Markdown.update(visible=True)
|
57 |
protein_name = get_protein_name(pdb_id)
|
58 |
+
|
59 |
+
new_value = f"""**PDB Title**: {protein_name}"""
|
60 |
+
if len(choice_parts) > 2:
|
61 |
+
gene = choice_parts[2]
|
62 |
+
gene_name = get_gene_names([gene])[0]
|
63 |
+
new_value += f"""\n\n**Gene Name**: {gene_name.title()}"""
|
64 |
+
|
65 |
+
description_update = gr.Markdown.update(value=new_value, visible=True)
|
66 |
return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
|
67 |
|
68 |
|
69 |
with gr.Blocks() as demo:
|
70 |
with gr.Column():
|
71 |
+
gr.Markdown(
|
72 |
+
"""
|
73 |
# Protein Binding Search Engine
|
74 |
+
This application examines all files uploaded to [PDB](https://www.rcsb.org/)
|
75 |
+
to find the chains with which a given protein sequence is most likely to bind.
|
76 |
If the results are linked to a specific gene, their IDs will also be displayed.
|
77 |
+
"""
|
78 |
+
)
|
79 |
with gr.Column():
|
80 |
with gr.Row():
|
81 |
with gr.Column():
|
82 |
seq_input = gr.Textbox(
|
83 |
+
value="KFLIYQMECSTMIFGL", label="Input Sequence"
|
|
|
84 |
)
|
85 |
n_results = gr.Number(5, label="N Results")
|
86 |
+
search_button = gr.Button("Search", variant="primary")
|
87 |
search_results = gr.Label(num_top_classes=20, label="Search Results")
|
88 |
viz_header = gr.Markdown("## Visualization", visible=False)
|
89 |
results_selector = gr.Dropdown(
|
90 |
+
choices=[],
|
91 |
+
multiselect=False,
|
92 |
+
visible=False,
|
93 |
+
label="Visualized Search Result",
|
94 |
)
|
95 |
viz_body = gr.Markdown("", visible=False)
|
96 |
protein_viz = gr.HTML(
|
97 |
value=render_html(pdb_id=None, chain=None),
|
98 |
+
label="Protein Visualization",
|
99 |
+
)
|
100 |
+
gr.Examples(
|
101 |
+
["KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"], inputs=[seq_input]
|
102 |
)
|
|
|
|
|
|
|
|
|
|
|
103 |
search_button.click(
|
104 |
search_and_display,
|
105 |
inputs=[seq_input, n_results],
|
106 |
+
outputs=[search_results, results_selector],
|
107 |
)
|
108 |
results_selector.change(
|
109 |
+
switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
|
|
|
|
|
110 |
)
|
111 |
|
112 |
+
if __name__ == "__main__":
|
113 |
+
demo.launch()
|
get_index.py
CHANGED
@@ -8,16 +8,14 @@ from credentials import get_token
|
|
8 |
|
9 |
def get_engine(index_repo: str, model_repo: str):
|
10 |
index_path = Path(
|
11 |
-
snapshot_download(
|
12 |
-
index_repo, use_auth_token=get_token(), repo_type="dataset"
|
13 |
-
)
|
14 |
)
|
15 |
local_arch_path = Path(
|
16 |
-
snapshot_download(
|
17 |
-
model_repo, use_auth_token=get_token(), repo_type="model"
|
18 |
-
)
|
19 |
)
|
20 |
sys.path.append(str(local_arch_path))
|
21 |
-
from protein_index import
|
|
|
|
|
22 |
|
23 |
return ProteinSearchEngine(data_path=index_path)
|
|
|
8 |
|
9 |
def get_engine(index_repo: str, model_repo: str):
|
10 |
index_path = Path(
|
11 |
+
snapshot_download(index_repo, use_auth_token=get_token(), repo_type="dataset")
|
|
|
|
|
12 |
)
|
13 |
local_arch_path = Path(
|
14 |
+
snapshot_download(model_repo, use_auth_token=get_token(), repo_type="model")
|
|
|
|
|
15 |
)
|
16 |
sys.path.append(str(local_arch_path))
|
17 |
+
from protein_index import ( # pylint: disable=import-error,import-outside-toplevel
|
18 |
+
ProteinSearchEngine,
|
19 |
+
)
|
20 |
|
21 |
return ProteinSearchEngine(data_path=index_path)
|
protein_viz.py
CHANGED
@@ -35,8 +35,8 @@ def render_html(pdb_id, chain):
|
|
35 |
|
36 |
def get_gene_names(genes: List[str]):
|
37 |
mg = mygene.MyGeneInfo()
|
38 |
-
ginfo = mg.querymany(genes, scopes=
|
39 |
-
gene_names = [gene[
|
40 |
return gene_names
|
41 |
|
42 |
|
@@ -45,7 +45,7 @@ def get_protein_name(pdb_id: str):
|
|
45 |
response = requests.get(url, timeout=1)
|
46 |
if response.ok:
|
47 |
data = response.json()
|
48 |
-
protein_name = data[
|
49 |
else:
|
50 |
-
protein_name =
|
51 |
return protein_name
|
|
|
35 |
|
36 |
def get_gene_names(genes: List[str]):
|
37 |
mg = mygene.MyGeneInfo()
|
38 |
+
ginfo = mg.querymany(genes, scopes="ensembl.gene", verbose=False, fields="name")
|
39 |
+
gene_names = [gene["name"] for gene in ginfo]
|
40 |
return gene_names
|
41 |
|
42 |
|
|
|
45 |
response = requests.get(url, timeout=1)
|
46 |
if response.ok:
|
47 |
data = response.json()
|
48 |
+
protein_name = data["struct"]["title"]
|
49 |
else:
|
50 |
+
protein_name = "Unknown"
|
51 |
return protein_name
|