import json import os from datetime import datetime, timezone import time from huggingface_hub import ModelCard, snapshot_download from src.display.formatting import styled_error, styled_message, styled_warning from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, REPO, GIT_REQUESTS_PATH, GIT_STATUS_PATH, GLOBAL_COND from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS from src.submission.check_validity import ( already_submitted_models, check_model_card, get_model_size, get_quantized_model_parameters_memory, is_model_on_hub, is_gguf_on_hub, user_submission_permission, get_model_tags ) REQUESTED_MODELS = None USERS_TO_SUBMISSION_DATES = None def add_new_eval( model: str, revision: str, private: bool, compute_dtype: str="float16", precision: str="4bit", weight_dtype: str="int4", gguf_ftype: str="*Q4_0.gguf", ): global REQUESTED_MODELS global USERS_TO_SUBMISSION_DATES if not REQUESTED_MODELS: REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(GIT_STATUS_PATH) quant_type = None user_name = "" model_path = model if "/" in model: user_name = model.split("/")[0] model_path = model.split("/")[1] precision = precision.split(" ")[0] current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Is the user rate limited? if user_name != "": user_can_submit, error_msg = user_submission_permission( user_name, USERS_TO_SUBMISSION_DATES, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA ) if not user_can_submit: return styled_error(error_msg) # Did the model authors forbid its submission to the leaderboard? if model in DO_NOT_SUBMIT_MODELS: return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.") # Does the model actually exist? if revision == "": revision = "main" architecture = "?" downloads = 0 created_at = "" gguf_on_hub, error, gguf_files, new_gguf_ftype = is_gguf_on_hub(repo_id=model, filename=gguf_ftype) if new_gguf_ftype is not None: gguf_ftype = new_gguf_ftype model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True) # Is the model on the hub? if (not model_on_hub or model_config is None) and (not gguf_on_hub or gguf_files is None): return styled_error(f'Model "{model}" {error}') if model_config is not None: architectures = getattr(model_config, "architectures", None) if architectures: architecture = ";".join(architectures) downloads = getattr(model_config, 'downloads', 0) created_at = getattr(model_config, 'created_at', '') quantization_config = getattr(model_config, 'quantization_config', None) if gguf_files is not None: architectures = "" downloads = 0 created_at = "" quantization_config = None quant_type = "llama.cpp" # Is the model info correctly filled? try: model_info = API.model_info(repo_id=model, revision=revision) except Exception: return styled_error("Could not get your model information. Please fill it up properly.") # Were the model card and license filled? try: if model_info.cardData is None: license = "unknown" else: license = model_info.cardData.get("license", "unknown") except Exception: return styled_error("Please select a license for your model") modelcard_OK, error_msg, model_card = check_model_card(model) # maybe don't have model card """ if not modelcard_OK: return styled_error(error_msg) """ tags = get_model_tags(model_card, model) # Seems good, creating the eval print("Adding new eval") script = "ITREX" hardware = "cpu" precision = "4bit" if quantization_config is not None: quant_method = quantization_config.get("quant_method", None) if "bnb_4bit_quant_type" in quantization_config: quant_method = "bitsandbytes" quant_type = "bitsandbytes" hardware = "gpu" if quantization_config.get("load_in_4bit", True): precision = "4bit" if quantization_config.get("load_in_8bit", True): precision = "8bit" if quant_method == "gptq": hardware = "cpu" quant_type = "GPTQ" precision = f"{quantization_config.get('bits', '4bit')}bit" if quant_method == "awq": hardware = "gpu" quant_type = "AWQ" precision = f"{quantization_config.get('bits', '4bit')}bit" if quant_method == "aqlm": hardware = "gpu" quant_type = "AQLM" nbits_per_codebook = quantization_config.get('nbits_per_codebook') num_codebooks = quantization_config.get('num_codebooks') in_group_size = quantization_config.get('in_group_size') bits = int(nbits_per_codebook * num_codebooks / in_group_size) precision = f"{bits}bit" if precision == "4bit": weight_dtype = "int4" elif precision == "3bit": weight_dtype = "int3" elif precision == "2bit": weight_dtype = "int2" if quant_type is None or quant_type == "": return styled_error("Please select a quantization model like GPTQ, AWQ etc.") model_params, model_size = get_quantized_model_parameters_memory(model_info, quant_method=quant_type.lower(), bits=precision) if quant_type == "llama.cpp": hardware = "cpu" script = "llama_cpp" tags = "llama.cpp" else: hardware = "gpu" eval_entry = { "model": model, "revision": revision, "private": private, "params": model_size, "architectures": architecture, "quant_type": quant_type, "precision": precision, "model_params": model_params, "model_size": model_size, "precision": precision, "weight_dtype": weight_dtype, "compute_dtype": compute_dtype, "gguf_ftype": gguf_ftype, "hardware": hardware, "status": "Pending", "submitted_time": current_time, "model_type": "quantization", "job_id": -1, "job_start_time": None, "scripts": script } supplementary_info = { "likes": model_info.likes, "license": license, "still_on_hub": True, "tags": tags, "downloads": downloads, "created_at": created_at } print(eval_entry) # ToDo: need open # Check for duplicate submission if f"{model}_{revision}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}" in REQUESTED_MODELS: return styled_warning("This model has been already submitted.") print("Creating huggingface/dataset eval file") OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" os.makedirs(OUT_DIR, exist_ok=True) out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" with open(out_path, "w") as f: f.write(json.dumps(eval_entry)) print("Uploading eval file") API.upload_file( path_or_fileobj=out_path, path_in_repo=out_path.split("eval-queue/")[1], repo_id=QUEUE_REPO, repo_type="dataset", commit_message=f"Add {model} to eval queue", ) print("Creating git eval file") OUT_DIR = f"{GIT_REQUESTS_PATH}/{user_name}" os.makedirs(OUT_DIR, exist_ok=True) req_out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" req_git_path = "/".join(req_out_path.split('/')[1:]) print("Creating status file") OUT_DIR = f"{GIT_STATUS_PATH}/{user_name}" os.makedirs(OUT_DIR, exist_ok=True) sta_out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" sta_git_path = "/".join(sta_out_path.split('/')[1:]) print("Uploading eval file") try: print("git-push get lock..............") GLOBAL_COND.acquire() branch = REPO.active_branch.name REPO.remotes.origin.pull(branch) REPO.index.remove("requests", False, r=True) with open(req_out_path, "w") as f: f.write(json.dumps(eval_entry, indent=4)) with open(sta_out_path, "w") as f: f.write(json.dumps(eval_entry, indent=4)) REPO.index.add([req_git_path, sta_git_path]) commit = REPO.index.commit(f"Add {model} to eval requests/status.") REPO.remotes.origin.push(branch) time.sleep(10) print("git-push release lock..............") GLOBAL_COND.release() except Exception as e: print(str(e)) print("git-push error........") GLOBAL_COND.release() return styled_message( "Your request has been submitted to the evaluation queue!\nPlease wait for up to 3 hours for the model to show in the PENDING list." )