Spaces:
Running
Running
roni
commited on
Commit
·
1d11011
1
Parent(s):
b2a3d53
changing gene info source
Browse files- app.py +19 -19
- concurrency.py +22 -0
- protein_viz.py +37 -8
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
|
|
|
3 |
from get_index import get_engine
|
4 |
-
from protein_viz import
|
5 |
|
6 |
index_repo = "ronig/siamese_protein_index"
|
7 |
model_repo = "ronig/protein_search_engine"
|
@@ -23,12 +24,7 @@ def limit_n_results(n):
|
|
23 |
def update_dropdown_menu(search_res):
|
24 |
choices = []
|
25 |
for row in search_res:
|
26 |
-
|
27 |
-
if gene != "Unknown":
|
28 |
-
choice_parts = [row["pdb_name"], row["chain_id"], gene]
|
29 |
-
else:
|
30 |
-
choice_parts = [row["pdb_name"], row["chain_id"]]
|
31 |
-
choice = ",".join(choice_parts)
|
32 |
choices.append(choice)
|
33 |
|
34 |
return gr.Dropdown.update(
|
@@ -38,18 +34,26 @@ def update_dropdown_menu(search_res):
|
|
38 |
|
39 |
def format_search_results(raw_search_results):
|
40 |
formatted_search_results = {}
|
41 |
-
for
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
key = f"PDB: {prot} | Chain: {chain}"
|
47 |
-
if genes != "Unknown":
|
48 |
-
key += f" | Gene: {genes}"
|
49 |
formatted_search_results[key] = value
|
50 |
return formatted_search_results
|
51 |
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def switch_viz(new_choice):
|
54 |
choice_parts = new_choice.split(",")
|
55 |
pdb_id, chain = choice_parts[0], choice_parts[1]
|
@@ -57,10 +61,6 @@ def switch_viz(new_choice):
|
|
57 |
protein_name = get_protein_name(pdb_id)
|
58 |
|
59 |
new_value = f"""**PDB Title**: {protein_name}"""
|
60 |
-
if len(choice_parts) > 2:
|
61 |
-
gene = choice_parts[2]
|
62 |
-
gene_name = get_gene_names([gene])[0]
|
63 |
-
new_value += f"""\n\n**Gene Name**: {gene_name.title()}"""
|
64 |
|
65 |
description_update = gr.Markdown.update(value=new_value, visible=True)
|
66 |
return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
+
from concurrency import execute_multithread
|
4 |
from get_index import get_engine
|
5 |
+
from protein_viz import get_gene_name, get_protein_name, render_html
|
6 |
|
7 |
index_repo = "ronig/siamese_protein_index"
|
8 |
model_repo = "ronig/protein_search_engine"
|
|
|
24 |
def update_dropdown_menu(search_res):
|
25 |
choices = []
|
26 |
for row in search_res:
|
27 |
+
choice = ",".join([row["pdb_name"], row["chain_id"]])
|
|
|
|
|
|
|
|
|
|
|
28 |
choices.append(choice)
|
29 |
|
30 |
return gr.Dropdown.update(
|
|
|
34 |
|
35 |
def format_search_results(raw_search_results):
|
36 |
formatted_search_results = {}
|
37 |
+
for key, value in execute_multithread(
|
38 |
+
func=format_search_result,
|
39 |
+
inputs=({"raw_result": res} for res in raw_search_results),
|
40 |
+
n_workers=len(raw_search_results),
|
41 |
+
):
|
|
|
|
|
|
|
42 |
formatted_search_results[key] = value
|
43 |
return formatted_search_results
|
44 |
|
45 |
|
46 |
+
def format_search_result(raw_result):
|
47 |
+
prot = raw_result["pdb_name"]
|
48 |
+
chain = raw_result["chain_id"]
|
49 |
+
value = raw_result["score"]
|
50 |
+
gene_name = get_gene_name(pdb_id=prot, chain_id=chain)
|
51 |
+
key = f"PDB: {prot} | Chain: {chain}"
|
52 |
+
if gene_name != "Unknown":
|
53 |
+
key += f" | Gene: {gene_name}"
|
54 |
+
return key, value
|
55 |
+
|
56 |
+
|
57 |
def switch_viz(new_choice):
|
58 |
choice_parts = new_choice.split(",")
|
59 |
pdb_id, chain = choice_parts[0], choice_parts[1]
|
|
|
61 |
protein_name = get_protein_name(pdb_id)
|
62 |
|
63 |
new_value = f"""**PDB Title**: {protein_name}"""
|
|
|
|
|
|
|
|
|
64 |
|
65 |
description_update = gr.Markdown.update(value=new_value, visible=True)
|
66 |
return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
|
concurrency.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import concurrent.futures
|
2 |
+
import itertools
|
3 |
+
from typing import Callable, Iterable
|
4 |
+
|
5 |
+
|
6 |
+
def execute_multithread(func: Callable, inputs: Iterable, n_workers):
|
7 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
|
8 |
+
futures = {
|
9 |
+
executor.submit(func, **task)
|
10 |
+
for task in itertools.islice(inputs, n_workers)
|
11 |
+
}
|
12 |
+
|
13 |
+
while futures:
|
14 |
+
done, futures = concurrent.futures.wait(
|
15 |
+
futures, return_when=concurrent.futures.FIRST_COMPLETED
|
16 |
+
)
|
17 |
+
|
18 |
+
for future in done:
|
19 |
+
yield future.result()
|
20 |
+
|
21 |
+
for task in itertools.islice(inputs, len(done)):
|
22 |
+
futures.add(executor.submit(func, **task))
|
protein_viz.py
CHANGED
@@ -1,6 +1,3 @@
|
|
1 |
-
from typing import List
|
2 |
-
|
3 |
-
import mygene
|
4 |
import requests
|
5 |
|
6 |
|
@@ -33,11 +30,42 @@ def render_html(pdb_id, chain):
|
|
33 |
return iframe
|
34 |
|
35 |
|
36 |
-
def
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
|
43 |
def get_protein_name(pdb_id: str):
|
@@ -48,4 +76,5 @@ def get_protein_name(pdb_id: str):
|
|
48 |
protein_name = data["struct"]["title"]
|
49 |
else:
|
50 |
protein_name = "Unknown"
|
|
|
51 |
return protein_name
|
|
|
|
|
|
|
|
|
1 |
import requests
|
2 |
|
3 |
|
|
|
30 |
return iframe
|
31 |
|
32 |
|
33 |
+
def get_gene_name(pdb_id, chain_id):
|
34 |
+
entity_id = get_polymer_entity_id(chain_id, pdb_id)
|
35 |
+
gene_name = get_gene_name_from_polymer_entity(pdb_id=pdb_id, entity_id=entity_id)
|
36 |
+
return gene_name
|
37 |
+
|
38 |
+
|
39 |
+
def get_polymer_entity_id(chain_id, pdb_id):
|
40 |
+
url = (
|
41 |
+
f"https://data.rcsb.org/rest/v1/core/"
|
42 |
+
f"polymer_entity_instance/{pdb_id}/{chain_id}"
|
43 |
+
)
|
44 |
+
response = requests.get(url, timeout=1)
|
45 |
+
if response.ok:
|
46 |
+
res_data = response.json()
|
47 |
+
entity_id = int(
|
48 |
+
res_data["rcsb_polymer_entity_instance_container_identifiers"]["entity_id"]
|
49 |
+
)
|
50 |
+
else:
|
51 |
+
entity_id = None
|
52 |
+
return entity_id
|
53 |
+
|
54 |
+
|
55 |
+
def get_gene_name_from_polymer_entity(pdb_id, entity_id):
|
56 |
+
if entity_id:
|
57 |
+
url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
|
58 |
+
response = requests.get(url, timeout=1)
|
59 |
+
if response.ok:
|
60 |
+
res_data = response.json()
|
61 |
+
try:
|
62 |
+
gene_name = res_data["rcsb_entity_source_organism"][0][
|
63 |
+
"rcsb_gene_name"
|
64 |
+
][0]["value"]
|
65 |
+
return gene_name
|
66 |
+
except KeyError:
|
67 |
+
pass
|
68 |
+
return "Unknown"
|
69 |
|
70 |
|
71 |
def get_protein_name(pdb_id: str):
|
|
|
76 |
protein_name = data["struct"]["title"]
|
77 |
else:
|
78 |
protein_name = "Unknown"
|
79 |
+
|
80 |
return protein_name
|