File size: 937 Bytes
6509a73
27e2770
6509a73
27e2770
 
 
 
 
 
 
6509a73
27e2770
b2a3d53
27e2770
6509a73
27e2770
b2a3d53
27e2770
 
b2a3d53
 
 
db68005
6509a73
 
 
6441ca8
53aa3a7
 
 
6509a73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os.path
import sys
from glob import glob
from pathlib import Path

from huggingface_hub import snapshot_download

from credentials import get_token


def get_engines(index_repo: str, model_repo: str):
    index_path = Path(
        snapshot_download(index_repo, use_auth_token=get_token(), repo_type="dataset")
    )

    local_arch_path = Path(
        snapshot_download(model_repo, use_auth_token=get_token(), repo_type="model")
    )
    sys.path.append(str(local_arch_path))
    from protein_index import (  # pylint: disable=import-error,import-outside-toplevel
        ProteinSearchEngine,
    )

    subindex_paths = glob(str(index_path / "*/"))
    engines = {}
    for subindex_path in subindex_paths:
        subindex_name = os.path.basename(subindex_path)
        engine = ProteinSearchEngine(data_path=Path(subindex_path))
        if len(engine) > 10000:
            engines[subindex_name] = engine
    return engines