roni commited on
Commit
b2a3d53
·
1 Parent(s): 1d25e2a

showing gene info

Browse files
Files changed (3) hide show
  1. app.py +41 -29
  2. get_index.py +5 -7
  3. protein_viz.py +4 -4
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
 
3
  from get_index import get_engine
4
- from protein_viz import get_protein_name, render_html
5
 
6
  index_repo = "ronig/siamese_protein_index"
7
  model_repo = "ronig/protein_search_engine"
@@ -21,10 +21,16 @@ def limit_n_results(n):
21
 
22
 
23
  def update_dropdown_menu(search_res):
24
- choices = [
25
- ','.join([row['pdb_name'], row['chain_id']])
26
- for row in search_res
27
- ]
 
 
 
 
 
 
28
  return gr.Dropdown.update(
29
  choices=choices, interactive=True, value=choices[0], visible=True
30
  )
@@ -45,57 +51,63 @@ def format_search_results(raw_search_results):
45
 
46
 
47
  def switch_viz(new_choice):
48
- pdb_id, chain = new_choice.split(',')
 
49
  title_update = gr.Markdown.update(visible=True)
50
  protein_name = get_protein_name(pdb_id)
51
- description_update = gr.Markdown.update(
52
- value=f"""**PDB Title**: {protein_name}""",
53
- visible=True
54
- )
 
 
 
 
55
  return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
56
 
57
 
58
  with gr.Blocks() as demo:
59
  with gr.Column():
60
- gr.Markdown("""
 
61
  # Protein Binding Search Engine
62
- This application examines all files uploaded to [PDB](https://www.rcsb.org/) to find the chains with which a given protein sequence is most likely to bind.
 
63
  If the results are linked to a specific gene, their IDs will also be displayed.
64
- """)
 
65
  with gr.Column():
66
  with gr.Row():
67
  with gr.Column():
68
  seq_input = gr.Textbox(
69
- value="KFLIYQMECSTMIFGL",
70
- label="Input Sequence"
71
  )
72
  n_results = gr.Number(5, label="N Results")
73
- search_button = gr.Button("Search", variant='primary')
74
  search_results = gr.Label(num_top_classes=20, label="Search Results")
75
  viz_header = gr.Markdown("## Visualization", visible=False)
76
  results_selector = gr.Dropdown(
77
- choices=[], multiselect=False, visible=False,
78
- label="Visualized Search Result"
 
 
79
  )
80
  viz_body = gr.Markdown("", visible=False)
81
  protein_viz = gr.HTML(
82
  value=render_html(pdb_id=None, chain=None),
83
- label="Protein Visualization"
 
 
 
84
  )
85
- gr.Examples([
86
- "KFLIYQMECSTMIFGL",
87
- "PHFAMPPIHEDHLE",
88
- "AEERIISLD"
89
- ], inputs=[seq_input])
90
  search_button.click(
91
  search_and_display,
92
  inputs=[seq_input, n_results],
93
- outputs=[search_results, results_selector]
94
  )
95
  results_selector.change(
96
- switch_viz,
97
- inputs=results_selector,
98
- outputs=[protein_viz, viz_header, viz_body]
99
  )
100
 
101
- demo.launch()
 
 
1
  import gradio as gr
2
 
3
  from get_index import get_engine
4
+ from protein_viz import get_gene_names, get_protein_name, render_html
5
 
6
  index_repo = "ronig/siamese_protein_index"
7
  model_repo = "ronig/protein_search_engine"
 
21
 
22
 
23
  def update_dropdown_menu(search_res):
24
+ choices = []
25
+ for row in search_res:
26
+ gene = row["gene_ids"][0]
27
+ if gene != "Unknown":
28
+ choice_parts = [row["pdb_name"], row["chain_id"], gene]
29
+ else:
30
+ choice_parts = [row["pdb_name"], row["chain_id"]]
31
+ choice = ",".join(choice_parts)
32
+ choices.append(choice)
33
+
34
  return gr.Dropdown.update(
35
  choices=choices, interactive=True, value=choices[0], visible=True
36
  )
 
51
 
52
 
53
  def switch_viz(new_choice):
54
+ choice_parts = new_choice.split(",")
55
+ pdb_id, chain = choice_parts[0], choice_parts[1]
56
  title_update = gr.Markdown.update(visible=True)
57
  protein_name = get_protein_name(pdb_id)
58
+
59
+ new_value = f"""**PDB Title**: {protein_name}"""
60
+ if len(choice_parts) > 2:
61
+ gene = choice_parts[2]
62
+ gene_name = get_gene_names([gene])[0]
63
+ new_value += f"""\n\n**Gene Name**: {gene_name.title()}"""
64
+
65
+ description_update = gr.Markdown.update(value=new_value, visible=True)
66
  return render_html(pdb_id=pdb_id, chain=chain), title_update, description_update
67
 
68
 
69
  with gr.Blocks() as demo:
70
  with gr.Column():
71
+ gr.Markdown(
72
+ """
73
  # Protein Binding Search Engine
74
+ This application examines all files uploaded to [PDB](https://www.rcsb.org/)
75
+ to find the chains with which a given protein sequence is most likely to bind.
76
  If the results are linked to a specific gene, their IDs will also be displayed.
77
+ """
78
+ )
79
  with gr.Column():
80
  with gr.Row():
81
  with gr.Column():
82
  seq_input = gr.Textbox(
83
+ value="KFLIYQMECSTMIFGL", label="Input Sequence"
 
84
  )
85
  n_results = gr.Number(5, label="N Results")
86
+ search_button = gr.Button("Search", variant="primary")
87
  search_results = gr.Label(num_top_classes=20, label="Search Results")
88
  viz_header = gr.Markdown("## Visualization", visible=False)
89
  results_selector = gr.Dropdown(
90
+ choices=[],
91
+ multiselect=False,
92
+ visible=False,
93
+ label="Visualized Search Result",
94
  )
95
  viz_body = gr.Markdown("", visible=False)
96
  protein_viz = gr.HTML(
97
  value=render_html(pdb_id=None, chain=None),
98
+ label="Protein Visualization",
99
+ )
100
+ gr.Examples(
101
+ ["KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"], inputs=[seq_input]
102
  )
 
 
 
 
 
103
  search_button.click(
104
  search_and_display,
105
  inputs=[seq_input, n_results],
106
+ outputs=[search_results, results_selector],
107
  )
108
  results_selector.change(
109
+ switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
 
 
110
  )
111
 
112
+ if __name__ == "__main__":
113
+ demo.launch()
get_index.py CHANGED
@@ -8,16 +8,14 @@ from credentials import get_token
8
 
9
  def get_engine(index_repo: str, model_repo: str):
10
  index_path = Path(
11
- snapshot_download(
12
- index_repo, use_auth_token=get_token(), repo_type="dataset"
13
- )
14
  )
15
  local_arch_path = Path(
16
- snapshot_download(
17
- model_repo, use_auth_token=get_token(), repo_type="model"
18
- )
19
  )
20
  sys.path.append(str(local_arch_path))
21
- from protein_index import ProteinSearchEngine # pylint: disable=import-error,import-outside-toplevel
 
 
22
 
23
  return ProteinSearchEngine(data_path=index_path)
 
8
 
9
  def get_engine(index_repo: str, model_repo: str):
10
  index_path = Path(
11
+ snapshot_download(index_repo, use_auth_token=get_token(), repo_type="dataset")
 
 
12
  )
13
  local_arch_path = Path(
14
+ snapshot_download(model_repo, use_auth_token=get_token(), repo_type="model")
 
 
15
  )
16
  sys.path.append(str(local_arch_path))
17
+ from protein_index import ( # pylint: disable=import-error,import-outside-toplevel
18
+ ProteinSearchEngine,
19
+ )
20
 
21
  return ProteinSearchEngine(data_path=index_path)
protein_viz.py CHANGED
@@ -35,8 +35,8 @@ def render_html(pdb_id, chain):
35
 
36
  def get_gene_names(genes: List[str]):
37
  mg = mygene.MyGeneInfo()
38
- ginfo = mg.querymany(genes, scopes='ensembl.gene')
39
- gene_names = [gene['name'] for gene in ginfo]
40
  return gene_names
41
 
42
 
@@ -45,7 +45,7 @@ def get_protein_name(pdb_id: str):
45
  response = requests.get(url, timeout=1)
46
  if response.ok:
47
  data = response.json()
48
- protein_name = data['struct']['title']
49
  else:
50
- protein_name = 'Unknown'
51
  return protein_name
 
35
 
36
  def get_gene_names(genes: List[str]):
37
  mg = mygene.MyGeneInfo()
38
+ ginfo = mg.querymany(genes, scopes="ensembl.gene", verbose=False, fields="name")
39
+ gene_names = [gene["name"] for gene in ginfo]
40
  return gene_names
41
 
42
 
 
45
  response = requests.get(url, timeout=1)
46
  if response.ok:
47
  data = response.json()
48
+ protein_name = data["struct"]["title"]
49
  else:
50
+ protein_name = "Unknown"
51
  return protein_name