import requests def render_html(pdb_id, chain): if pdb_id is None or chain is None: return "" html = f""" "
""" iframe = f""" """ return iframe def get_gene_name(pdb_id, chain_id): entity_id = get_polymer_entity_id(chain_id, pdb_id) gene_name, species = get_gene_name_from_polymer_entity( pdb_id=pdb_id, entity_id=entity_id ) return gene_name, species def get_polymer_entity_id(chain_id, pdb_id): url = ( f"https://data.rcsb.org/rest/v1/core/" f"polymer_entity_instance/{pdb_id}/{chain_id}" ) response = requests.get(url, timeout=1) if response.ok: res_data = response.json() entity_id = int( res_data["rcsb_polymer_entity_instance_container_identifiers"]["entity_id"] ) else: entity_id = None return entity_id def get_gene_name_from_polymer_entity(pdb_id, entity_id): gene_name, species = None, None if entity_id: url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}" response = requests.get(url, timeout=1) if response.ok: res_data = response.json() uniprot_id = _extract_uniprot_id(res_data) source_organism = res_data.get("rcsb_entity_source_organism", [{}])[0] gene_name = source_organism.get("rcsb_gene_name", [{}])[0].get("value") species = source_organism.get("scientific_name") if gene_name is None and uniprot_id is not None: gene_name = get_gene_name_from_uniprot(uniprot_id) return gene_name, species def get_gene_name_from_uniprot(uniprot_id): gene_name = None url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}" response = requests.get(url, timeout=1.0) if response.ok: uniprot_data = response.json() gene_name = uniprot_data.get("genes", [{}])[0].get("geneName", {}).get("value") return gene_name def _extract_uniprot_id(res_data): ids = res_data.get("rcsb_polymer_entity_container_identifiers", {}) uniprot_id = ids.get("uniprot_ids", [None])[0] return uniprot_id def get_protein_name(pdb_id: str): url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}" response = requests.get(url, timeout=1) if response.ok: data = response.json() protein_name = data["struct"]["title"] else: protein_name = "Unknown" return protein_name