ZackBradshaw's picture
Upload folder using huggingface_hub
e67043b verified
raw
history blame
4.96 kB
import requests
from pydantic import BaseModel
from bs4 import BeautifulSoup
import json, random
from ...tool import Tool
from typing import List, Optional, Union
class ChemicalPropAPI:
def __init__(self) -> None:
self._endpoint = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/"
def get_name_by_cid(self, cid: str, top_k: Optional[int] = None) -> List[str]:
html_doc = requests.get(f"{self._endpoint}cid/{cid}/synonyms/XML").text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
syns = soup.find_all("synonym")
ans = []
if top_k is None:
top_k = len(syns)
for syn in syns[:top_k]:
ans.append(syn.text)
return ans
def get_cid_by_struct(self, smiles: str) -> List[str]:
html_doc = requests.get(f"{self._endpoint}smiles/{smiles}/cids/XML").text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
cids = soup.find_all("cid")
if cids is None:
return []
ans = []
for cid in cids:
ans.append(cid.text)
return ans
def get_cid_by_name(self, name: str, name_type: Optional[str] = None) -> List[str]:
url = f"{self._endpoint}name/{name}/cids/XML"
if name_type is not None:
url += f"?name_type={name_type}"
html_doc = requests.get(url).text
soup = BeautifulSoup(html_doc, "html.parser", from_encoding="utf-8")
cids = soup.find_all("cid")
if cids is None:
return []
ans = []
for cid in cids:
ans.append(cid.text)
return ans
def get_prop_by_cid(self, cid: str) -> str:
html_doc = requests.get(
f"{self._endpoint}cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,IUPACName,XLogP,ExactMass,MonoisotopicMass,TPSA,Complexity,Charge,HBondDonorCount,HBondAcceptorCount,RotatableBondCount,HeavyAtomCount,CovalentUnitCount/json"
).text
return json.loads(html_doc)["PropertyTable"]["Properties"][0]
class GetNameResponse(BaseModel):
"""name list"""
names: List[str]
class GetStructureResponse(BaseModel):
"""structure list"""
state: int
content: Optional[str] = None
class GetIDResponse(BaseModel):
state: int
content: Union[str, List[str]]
def build_tool(config) -> Tool:
tool = Tool(
"Chemical Property Plugin",
description="looking up a chemical's property",
name_for_model="Chemical Property",
description_for_model="Plugin for looking up a chemical's property using a chemical knowledge base. All input should be a json like {'input': 'some input'}. Please use the provided questions and search step by step.",
logo_url="https://your-app-url.com/.well-known/logo.png",
contact_email="[email protected]",
legal_info_url="[email protected]",
)
if "debug" in config and config["debug"]:
chemical_prop_api = config["chemical_prop_api"]
else:
chemical_prop_api = ChemicalPropAPI()
@tool.get("/get_name")
def get_name(cid: str):
"""prints the possible 3 synonyms of the queried compound ID"""
ans = chemical_prop_api.get_name_by_cid(cid, top_k=3)
return {"names": ans}
@tool.get("/get_allname")
def get_allname(cid: str):
"""prints all the possible synonyms (might be too many, use this function carefully)."""
ans = chemical_prop_api.get_name_by_cid(cid)
return {"names": ans}
@tool.get("/get_id_by_struct")
def get_id_by_struct(smiles: str):
"""prints the ID of the queried compound SMILES. This should only be used if smiles is provided or retrieved in the previous step. The input should not be a string, but a SMILES formula."""
cids = chemical_prop_api.get_cid_by_struct(smiles)
if len(cids) == 0:
return {"state": "no result"}
else:
return {"state": "matched", "content": cids[0]}
@tool.get("/get_id")
def get_id(name: str):
"""prints the ID of the queried compound name, and prints the possible 5 names if the queried name can not been precisely matched,"""
cids = chemical_prop_api.get_cid_by_name(name)
if len(cids) > 0:
return {"state": "precise", "content": cids[0]}
cids = chemical_prop_api.get_cid_by_name(name, name_type="word")
if len(cids) > 0:
if name in get_name(cids[0]):
return {"state": "precise", "content": cids[0]}
ans = []
random.shuffle(cids)
for cid in cids[:5]:
nms = get_name(cid)
ans.append(nms)
return {"state": "not precise", "content": ans}
@tool.get("/get_prop")
def get_prop(cid: str):
"""prints the properties of the queried compound ID"""
return chemical_prop_api.get_prop_by_cid(cid)
return tool