File size: 1,524 Bytes
21f0ce7 dffb46d 21f0ce7 dffb46d 21f0ce7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from glob import glob
import pandas as pd
from huggingface_hub import snapshot_download
import json
from tqdm.auto import tqdm
import os
import traceback
from functions import pr_already_exists, commit
QUEUE_REPO = "eduagarcia-temp/llm_pt_leaderboard_requests"
EVAL_REQUESTS_PATH = "./eval-queue/"
blacklist = ['PORTULAN', 'Weni', '22h', 't5']
def run_pr_worker():
snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
for filepath in glob(os.path.join(EVAL_REQUESTS_PATH, '**/*.json'), recursive=True):
with open(filepath, 'r') as f:
model_data = json.load(f)
if model_data['status'] != 'FINISHED':
continue
if 'main_language' not in model_data:
continue
if model_data['main_language'] != "Portuguese":
continue
if model_data['result_metrics_average'] < 0.25:
continue
has_blacklist = False
for b in blacklist:
if b in model_data['model']:
has_blacklist = True
if has_blacklist:
continue
try:
if not pr_already_exists(model_data['model']):
print(f"Opening PR for {model_data['model']}")
commit(model_data['model'], check_if_pr_exists=True)
except Exception as e:
traceback.print_exc()
print(f"Error on {model_data['model']}: {str(e)}")
if __name__ == "__main__":
run_pr_worker()
|