File size: 3,414 Bytes
050a9de 34e8fb9 a5c4771 34e8fb9 050a9de a5c4771 34e8fb9 7022444 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 34e8fb9 050a9de 7022444 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
from constants import EVAL_REQUESTS_PATH
from pathlib import Path
from huggingface_hub import HfApi
from dotenv import load_dotenv
import git
import os
load_dotenv()
# Hub to access the dataset repo
TOKEN_HUB = os.environ.get("TOKEN_HUB_V2", None)
# Name of the repo where the dataset is stored user/repo_name
QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
# Local path where the repo is cloned to
QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
hf_api = HfApi(
endpoint="https://huggingface.co",
token=TOKEN_HUB,
)
def load_all_info_from_dataset_hub():
eval_queue_repo = None
csv_results = None
requested_models = None
if TOKEN_HUB is None:
print(
"No HuggingFace token provided. Skipping evaluation requests and results."
)
return eval_queue_repo, requested_models, csv_results
else:
print("Pulling evaluation requests and results.")
# Pull the dataset repo
user_name = QUEUE_REPO.split("/")[0]
repo_url = (
f"https://{user_name}:{TOKEN_HUB}@huggingface.co/datasets/{QUEUE_REPO}"
)
git.Repo.clone_from(repo_url, QUEUE_PATH)
# Local directory where dataset repo is cloned + folder with eval requests
directory = QUEUE_PATH / EVAL_REQUESTS_PATH
requested_models = get_all_requested_models(directory)
requested_models = [p.stem for p in requested_models]
# Local directory where dataset repo is cloned
csv_results = get_csv_with_results(QUEUE_PATH)
return eval_queue_repo, requested_models, csv_results
def upload_file(requested_model_name, path_or_fileobj):
dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
dest_repo_file = str(dest_repo_file)
hf_api.upload_file(
path_or_fileobj=path_or_fileobj,
path_in_repo=str(dest_repo_file),
repo_id=QUEUE_REPO,
token=TOKEN_HUB,
repo_type="dataset",
commit_message=f"Add {requested_model_name} to eval queue",
)
def get_all_requested_models(directory):
directory = Path(directory)
all_requested_models = list(directory.glob("*.txt"))
return all_requested_models
def get_csv_with_results(directory):
directory = Path(directory)
all_csv_files = list(directory.glob("*.csv"))
latest = [f for f in all_csv_files if f.stem.endswith("latest")]
if len(latest) != 1:
return None
return latest[0]
def is_model_on_hub(model_name, revision="main") -> bool:
try:
model_name = model_name.replace(" ", "")
author = model_name.split("/")[0]
model_id = model_name.split("/")[1]
if len(author) == 0 or len(model_id) == 0:
return (
False,
"is not a valid model name. Please use the format `author/model_name`.",
)
except Exception:
return (
False,
"is not a valid model name. Please use the format `author/model_name`.",
)
try:
models = list(hf_api.list_models(author=author, search=model_id))
matched = [model_name for m in models if m.modelId == model_name]
if len(matched) != 1:
return False, "was not found on the hub!"
else:
return True, None
except Exception as e:
print(f"Could not get the model from the hub.: {e}")
return False, "was not found on hub!"
|