import os.path import sys from glob import glob from pathlib import Path from huggingface_hub import snapshot_download from credentials import get_token def get_engines(index_repo: str, model_repo: str): index_path = Path( snapshot_download(index_repo, use_auth_token=get_token(), repo_type="dataset") ) local_arch_path = Path( snapshot_download(model_repo, use_auth_token=get_token(), repo_type="model") ) sys.path.append(str(local_arch_path)) from protein_index import ( # pylint: disable=import-error,import-outside-toplevel ProteinSearchEngine, ) subindex_paths = glob(str(index_path / "*/")) engines = {} for subindex_path in subindex_paths: subindex_name = os.path.basename(subindex_path) engine = ProteinSearchEngine(data_path=Path(subindex_path)) if len(engine) > 10000: engines[subindex_name] = engine return engines