|
from glob import glob |
|
import pandas as pd |
|
from huggingface_hub import snapshot_download |
|
import json |
|
from tqdm.auto import tqdm |
|
import os |
|
import traceback |
|
from functions import pr_already_exists, commit |
|
|
|
|
|
QUEUE_REPO = "eduagarcia-temp/llm_pt_leaderboard_requests" |
|
EVAL_REQUESTS_PATH = "./eval-queue/" |
|
blacklist = ['PORTULAN', 'Weni', '22h', 't5'] |
|
|
|
def run_pr_worker(): |
|
snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30) |
|
for filepath in glob(os.path.join(EVAL_REQUESTS_PATH, '**/*.json'), recursive=True): |
|
with open(filepath, 'r') as f: |
|
model_data = json.load(f) |
|
if model_data['status'] != 'FINISHED': |
|
continue |
|
if 'main_language' not in model_data: |
|
continue |
|
if model_data['main_language'] != "Portuguese": |
|
continue |
|
if model_data['result_metrics_average'] < 0.25: |
|
continue |
|
has_blacklist = False |
|
for b in blacklist: |
|
if b in model_data['model']: |
|
has_blacklist = True |
|
if has_blacklist: |
|
continue |
|
try: |
|
if not pr_already_exists(model_data['model']): |
|
print(f"Opening PR for {model_data['model']}") |
|
commit(model_data['model'], check_if_pr_exists=True) |
|
except Exception as e: |
|
traceback.print_exc() |
|
print(f"Error on {model_data['model']}: {str(e)}") |
|
|
|
|
|
if __name__ == "__main__": |
|
run_pr_worker() |
|
|
|
|
|
|