import gradio as gr from concurrency import execute_multithread from get_index import get_engines from protein_viz import get_gene_name, get_protein_name, render_html index_repo = "ronig/siamese_protein_index" model_repo = "ronig/protein_search_engine" engines = get_engines(index_repo, model_repo) available_indexes = list(engines.keys()) app_description = """ # Protein Binding Search Engine This application enables a quick protein-peptide binding search based on sequences. You can use it to search the full [PDB](https://www.rcsb.org/) database or in a specific organism genome. """ def search_and_display(seq, n_res, index_selection): n_res = int(limit_n_results(n_res)) engine = engines[index_selection] search_res = engine.search_by_sequence(seq, n=n_res) results_options = update_dropdown_menu(search_res) formatted_search_results = format_search_results(search_res) return formatted_search_results, results_options def limit_n_results(n): return max(min(n, 100), 1) def update_dropdown_menu(search_res): choices = [] for row in search_res: if "pdb_name" in row and "chain_id" in row: choice = ".".join([row["pdb_name"], row["chain_id"]]) choices.append(choice) if choices: update = gr.Dropdown.update( choices=choices, interactive=True, value=choices[0], visible=True ) else: update = gr.Dropdown.update( choices=choices, interactive=True, visible=False, value=None ) return update def format_search_results(raw_search_results): formatted_search_results = {} for key, value in execute_multithread( func=format_search_result, inputs=({"raw_result": res} for res in raw_search_results), n_workers=len(raw_search_results), ): formatted_search_results[key] = value return formatted_search_results def format_search_result(raw_result): is_pdb = "pdb_name" in raw_result if is_pdb: key, value = parse_pdb_search_result(raw_result) else: key, value = parse_fasta_search_result(raw_result) return key, value def parse_fasta_search_result(raw_result): gene = parse_gene_from_fasta_entry(raw_result["description"]) key = f"Gene: {gene}" value = raw_result["score"] return key, value def parse_pdb_search_result(raw_result): prot = raw_result["pdb_name"] chain = raw_result["chain_id"] value = raw_result["score"] gene_name, species = get_gene_name(pdb_id=prot, chain_id=chain) key = f"PDB: {prot}.{chain}" if gene_name is not None: key += f" | Gene: {gene_name} | Organism: {species}" return key, value def parse_gene_from_fasta_entry(description): after = description.split("GN=")[1] gene = after.split(" ")[0] return gene def switch_viz(new_choice): if new_choice is None: html = "" title_update = gr.Markdown.update(visible=False) description_update = gr.Markdown.update(value=None, visible=False) else: choice_parts = new_choice.split(".") pdb_id, chain = choice_parts[0], choice_parts[1] title_update = gr.Markdown.update(visible=True) protein_name = get_protein_name(pdb_id) new_value = f"""**PDB Title**: {protein_name}""" description_update = gr.Markdown.update(value=new_value, visible=True) html = render_html(pdb_id=pdb_id, chain=chain) return html, title_update, description_update with gr.Blocks() as demo: with gr.Column(): gr.Markdown(app_description) with gr.Column(): with gr.Row(): with gr.Column(): seq_input = gr.Textbox(value="APTMPPPLPP", label="Input Sequence") n_results = gr.Number(10, label="N Results") index_selector = gr.Dropdown( choices=available_indexes, value="PDB", multiselect=False, visible=True, label="Index", ) search_button = gr.Button("Search", variant="primary") search_results = gr.Label(num_top_classes=20, label="Search Results") viz_header = gr.Markdown("## Visualization", visible=False) results_selector = gr.Dropdown( choices=[], multiselect=False, visible=False, label="Visualized Search Result", ) viz_body = gr.Markdown("", visible=False) protein_viz = gr.HTML( value=render_html(pdb_id=None, chain=None), label="Protein Visualization", ) gr.Examples( ["APTMPPPLPP", "KFLIYQMECSTMIFGL", "PHFAMPPIHEDHLE", "AEERIISLD"], inputs=[seq_input], ) search_button.click( search_and_display, inputs=[seq_input, n_results, index_selector], outputs=[search_results, results_selector], ) results_selector.change( switch_viz, inputs=results_selector, outputs=[protein_viz, viz_header, viz_body] ) if __name__ == "__main__": demo.launch()