import gradio as gr from get_index import get_engines from protein_viz import get_protein_name, render_html index_repo = "ronig/siamese_protein_index" model_repo = "ronig/protein_search_engine" engines = get_engines(index_repo, model_repo) available_indexes = list(engines.keys()) app_description = """ # Protein Binding Search Engine This application enables a quick protein-peptide binding search based on sequences. You can use it to search the full [PDB](https://www.rcsb.org/) database or in a specific organism genome. """ max_results = 100 def search_and_display(seq, n_res, index_selection): n_res = int(limit_n_results(n_res)) engine = engines[index_selection] search_res = engine.search_by_sequence(seq, n=n_res) results_options = update_dropdown_menu(search_res) formatted_search_results = format_search_results(search_res) return formatted_search_results, results_options def limit_n_results(n): return max(min(n, max_results), 1) def update_dropdown_menu(search_res): choices = [] for row in search_res: if "pdb_name" in row and "chain_id" in row: choice = ".".join([row["pdb_name"], row["chain_id"]]) choices.append(choice) if choices: update = gr.Dropdown.update( choices=choices, interactive=True, value=choices[0], visible=True ) else: update = gr.Dropdown.update( choices=choices, interactive=True, visible=False, value=None ) return update def format_search_results(raw_search_results): formatted_search_results = {} for res in raw_search_results: key, value = parse_pdb_search_result(res) formatted_search_results[key] = value return formatted_search_results def parse_pdb_search_result(raw_result): prot = raw_result["pdb_name"] chain = raw_result["chain_id"] value = raw_result["score"] gene_names = raw_result["genes"] species = raw_result["organism"] key = f"PDB: {prot}.{chain}" if gene_names is not None: key += f" | Genes: {gene_names} | Organism: {species}" return key, value def switch_viz(new_choice): if new_choice is None: html = "" title_update = gr.Markdown.update(visible=False) description_update = gr.Markdown.update(value=None, visible=False) else: choice_parts = new_choice.split(".") pdb_id, chain = choice_parts[0], choice_parts[1] title_update = gr.Markdown.update(visible=True) protein_name = get_protein_name(pdb_id) new_value = f"""**PDB Title**: {protein_name}""" description_update = gr.Markdown.update(value=new_value, visible=True) html = render_html(pdb_id=pdb_id, chain=chain) return html, title_update, description_update with gr.Blocks() as demo: with gr.Column(): gr.Markdown(app_description) with gr.Column(): with gr.Row(): with gr.Column(): seq_input = gr.Textbox(value="APTMPPPLPP", label="Input Sequence") n_results = gr.Number(10, label="N Results") index_selector = gr.Dropdown( choices=available_indexes, value="PDB", multiselect=False, visible=True, label="Index", ) search_button = gr.Button("Search", variant="primary") search_results = gr.Label( num_top_classes=max_results, label="Search Results" ) viz_header = gr.Markdown("## Visualization", visible=False) results_selector = gr.Dropdown( choices=[], multiselect=False, visible=False, label="Visualized Search Result", ) viz_body = gr.Markdown("", visible=False) protein_viz = gr.HTML( value=render_html(pdb_id=None, chain=None), label="Protein Visualization", ) gr.Examples( ["APTMPPPLPP", "KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"], inputs=[seq_input], ) search_button.click( search_and_display, inputs=[seq_input, n_results, index_selector], outputs=[search_results, results_selector], ) results_selector.change( switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body] ) if __name__ == "__main__": demo.launch()