protein_binding_search / protein_viz.py
roni
getting the missing gene names from uniprot
743e6bd
raw
history blame
3.21 kB
import requests
def render_html(pdb_id, chain):
if pdb_id is None or chain is None:
return ""
html = f"""
"<html>
<header>
<script src="https://3Dmol.org/build/3Dmol-min.js"></script>
<script src="https://3Dmol.org/build/3Dmol.ui-min.js"></script>
</header>
<body>
<div style="height: 400px; position: relative;" class="viewer_3Dmoljs"
data-pdb="{pdb_id}"
data-backgroundalpha="0.0"
data-style="cartoon:color=white"
data-select1="chain:{chain}"
data-zoomto="chain:{chain}"
data-style1="cartoon:color=spectrum"
data-spin="axis:y;speed:0.2"
/>
</body>
</html>
"""
iframe = f"""
<iframe style="width: 100%; height: 480px" name="protein-vis"
frameborder="0" srcdoc='{html}'></iframe>
"""
return iframe
def get_gene_name(pdb_id, chain_id):
entity_id = get_polymer_entity_id(chain_id, pdb_id)
gene_name, species = get_gene_name_from_polymer_entity(
pdb_id=pdb_id, entity_id=entity_id
)
return gene_name, species
def get_polymer_entity_id(chain_id, pdb_id):
url = (
f"https://data.rcsb.org/rest/v1/core/"
f"polymer_entity_instance/{pdb_id}/{chain_id}"
)
response = requests.get(url, timeout=1)
if response.ok:
res_data = response.json()
entity_id = int(
res_data["rcsb_polymer_entity_instance_container_identifiers"]["entity_id"]
)
else:
entity_id = None
return entity_id
def get_gene_name_from_polymer_entity(pdb_id, entity_id):
gene_name, species = None, None
if entity_id:
url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
response = requests.get(url, timeout=1)
if response.ok:
res_data = response.json()
uniprot_id = _extract_uniprot_id(res_data)
source_organism = res_data.get("rcsb_entity_source_organism", [{}])[0]
gene_name = source_organism.get("rcsb_gene_name", [{}])[0].get("value")
species = source_organism.get("scientific_name")
if gene_name is None and uniprot_id is not None:
gene_name = get_gene_name_from_uniprot(uniprot_id)
return gene_name, species
def get_gene_name_from_uniprot(uniprot_id):
gene_name = None
url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}"
response = requests.get(url, timeout=1.0)
if response.ok:
uniprot_data = response.json()
gene_name = uniprot_data.get("genes", [{}])[0].get("geneName", {}).get("value")
return gene_name
def _extract_uniprot_id(res_data):
ids = res_data.get("rcsb_polymer_entity_container_identifiers", {})
uniprot_id = ids.get("uniprot_ids", [None])[0]
return uniprot_id
def get_protein_name(pdb_id: str):
url = f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}"
response = requests.get(url, timeout=1)
if response.ok:
data = response.json()
protein_name = data["struct"]["title"]
else:
protein_name = "Unknown"
return protein_name