leaderboard / benchmark /llm_text_generation /code /scripts /aggregate_leaderboard_models.py
Jae-Won Chung
New leaderboard prototype
b10121d
raw
history blame
1.28 kB
import json
from glob import glob
from pathlib import Path
import tyro
def infer_params_from_name(model_name: str) -> str:
"""Try to guess the model parameters from the model name.
It basically parses the model name to look for a string that looks like "%d[bB]".
Examples:
- "meta-llama/Meta-Llama-3.1-8B-Instruct" -> 8B
- "facebook/chameleon-30b" -> 30B
"""
for token in model_name.lower().split("-"):
if token.endswith("b") and token[:-1].isdigit():
return token.upper()
return "NA"
def main(results_dir: Path, output_file: Path) -> None:
output_file.parent.mkdir(parents=True, exist_ok=True)
print(f"{results_dir} -> {output_file}")
models = {}
for model_dir in sorted(glob(f"{results_dir}/*/*")):
model_name = "/".join(model_dir.split("/")[-2:])
print(f" {model_name}")
model_info = dict(
url=f"https://huggingface.co/{model_name}",
nickname=model_name.split("/")[-1].replace("-", " ").title(),
params=infer_params_from_name(model_name),
)
assert model_name not in models
models[model_name] = model_info
json.dump(models, open(output_file, "w"), indent=2)
if __name__ == "__main__":
tyro.cli(main)