roni
using metadata instead of fetching it from the internet
1694358
raw
history blame
4.58 kB
import gradio as gr
from get_index import get_engines
from protein_viz import get_protein_name, render_html
index_repo = "ronig/siamese_protein_index"
model_repo = "ronig/protein_search_engine"
engines = get_engines(index_repo, model_repo)
available_indexes = list(engines.keys())
app_description = """
# Protein Binding Search Engine
This application enables a quick protein-peptide binding search based on sequences.
You can use it to search the full [PDB](https://www.rcsb.org/) database or in a specific organism genome.
"""
max_results = 100
def search_and_display(seq, n_res, index_selection):
n_res = int(limit_n_results(n_res))
engine = engines[index_selection]
search_res = engine.search_by_sequence(seq, n=n_res)
results_options = update_dropdown_menu(search_res)
formatted_search_results = format_search_results(search_res)
return formatted_search_results, results_options
def limit_n_results(n):
return max(min(n, max_results), 1)
def update_dropdown_menu(search_res):
choices = []
for row in search_res:
if "pdb_name" in row and "chain_id" in row:
choice = ".".join([row["pdb_name"], row["chain_id"]])
choices.append(choice)
if choices:
update = gr.Dropdown.update(
choices=choices, interactive=True, value=choices[0], visible=True
)
else:
update = gr.Dropdown.update(
choices=choices, interactive=True, visible=False, value=None
)
return update
def format_search_results(raw_search_results):
formatted_search_results = {}
for res in raw_search_results:
key, value = parse_pdb_search_result(res)
formatted_search_results[key] = value
return formatted_search_results
def parse_pdb_search_result(raw_result):
prot = raw_result["pdb_name"]
chain = raw_result["chain_id"]
value = raw_result["score"]
gene_names = raw_result["genes"]
species = raw_result["organism"]
key = f"PDB: {prot}.{chain}"
if gene_names is not None:
key += f" | Genes: {gene_names} | Organism: {species}"
return key, value
def switch_viz(new_choice):
if new_choice is None:
html = ""
title_update = gr.Markdown.update(visible=False)
description_update = gr.Markdown.update(value=None, visible=False)
else:
choice_parts = new_choice.split(".")
pdb_id, chain = choice_parts[0], choice_parts[1]
title_update = gr.Markdown.update(visible=True)
protein_name = get_protein_name(pdb_id)
new_value = f"""**PDB Title**: {protein_name}"""
description_update = gr.Markdown.update(value=new_value, visible=True)
html = render_html(pdb_id=pdb_id, chain=chain)
return html, title_update, description_update
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(app_description)
with gr.Column():
with gr.Row():
with gr.Column():
seq_input = gr.Textbox(value="APTMPPPLPP", label="Input Sequence")
n_results = gr.Number(10, label="N Results")
index_selector = gr.Dropdown(
choices=available_indexes,
value="PDB",
multiselect=False,
visible=True,
label="Index",
)
search_button = gr.Button("Search", variant="primary")
search_results = gr.Label(
num_top_classes=max_results, label="Search Results"
)
viz_header = gr.Markdown("## Visualization", visible=False)
results_selector = gr.Dropdown(
choices=[],
multiselect=False,
visible=False,
label="Visualized Search Result",
)
viz_body = gr.Markdown("", visible=False)
protein_viz = gr.HTML(
value=render_html(pdb_id=None, chain=None),
label="Protein Visualization",
)
gr.Examples(
["APTMPPPLPP", "KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"],
inputs=[seq_input],
)
search_button.click(
search_and_display,
inputs=[seq_input, n_results, index_selector],
outputs=[search_results, results_selector],
)
results_selector.change(
switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body]
)
if __name__ == "__main__":
demo.launch()