protein_binding_search / get_index.py
roni
filtering on indexes with data
53aa3a7
raw
history blame
937 Bytes
import os.path
import sys
from glob import glob
from pathlib import Path
from huggingface_hub import snapshot_download
from credentials import get_token
def get_engines(index_repo: str, model_repo: str):
index_path = Path(
snapshot_download(index_repo, use_auth_token=get_token(), repo_type="dataset")
)
local_arch_path = Path(
snapshot_download(model_repo, use_auth_token=get_token(), repo_type="model")
)
sys.path.append(str(local_arch_path))
from protein_index import ( # pylint: disable=import-error,import-outside-toplevel
ProteinSearchEngine,
)
subindex_paths = glob(str(index_path / "*/"))
engines = {}
for subindex_path in subindex_paths:
subindex_name = os.path.basename(subindex_path)
engine = ProteinSearchEngine(data_path=Path(subindex_path))
if len(engine) > 10000:
engines[subindex_name] = engine
return engines