import pandas as pd
from pathlib import Path
from datasets import load_dataset
import numpy as np
import os
import re
UNVERIFIED_MODELS = [
]
CONTAMINATED_MODELS = [
]
# From Open LLM Leaderboard
def model_hyperlink(link, model_name):
# if model_name is above 50 characters, return first 47 characters and "..."
if len(model_name) > 50:
model_name = model_name[:47] + "..."
if model_name == "random":
output = "random"
elif model_name == "Cohere March 2024":
output = f'{model_name}'
elif "openai" == model_name.split("/")[0]:
output = f'{model_name}'
elif "Anthropic" == model_name.split("/")[0]:
output = f'{model_name}'
elif "google" == model_name.split("/")[0]:
output = f'{model_name}'
elif "PoLL" == model_name.split("/")[0]:
output = model_name
output = f'{model_name}'
if model_name in UNVERIFIED_MODELS:
output += " *"
if model_name in CONTAMINATED_MODELS:
output += " ⚠️"
return output
def undo_hyperlink(html_string):
# Regex pattern to match content inside > and <
pattern = r'>[^<]+<'
match = re.search(pattern, html_string)
if match:
# Extract the matched text and remove leading '>' and trailing '<'
return match.group(0)[1:-1]
else:
return "No text found"
# Define a function to fetch and process data
def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to pull the git repo
dir = Path(data_repo)
data_dir = dir / subdir
# get all files
models_names = [f.split(".json")[0] for f in os.listdir(data_dir)
if os.path.isfile(os.path.join(data_dir, f)) and f.endswith(".json")]
# create empty dataframe to add all data to
df = pd.DataFrame()
# load all json data in the list models_results one by one to avoid not having the same entries
for model_name in models_names:
model_data = load_dataset("json", data_files=os.path.join(data_dir, model_name + ".json"), split="train")
model_data = model_data.add_column("model", [model_name])
df2 = pd.DataFrame(model_data)
# add to df
df = pd.concat([df2, df])
return df
def prep_df(df):
# sort columns alphabetically
df = df.reindex(sorted(df.columns), axis=1)
# move column "model" to the front
cols = list(df.columns)
cols.insert(0, cols.pop(cols.index('model')))
df = df.loc[:, cols]
# apply model_hyperlink function to column "model"
df["model"] = df.apply(lambda row: model_hyperlink(f"https://huggingface.co/{row['path']}", row['model']), axis=1)
df = df.drop(columns=["path"])
# select all columns except "model" and convert to score
cols = df.columns.tolist()
cols.remove("model")
cols = [c for c in cols if "rank" not in c and "confi" not in c]
df[cols] = (df[cols]*100)
# move average column to the second
cols = list(df.columns)
cols.insert(1, cols.pop(cols.index('average')))
df = df.loc[:, cols]
df = df.rename(columns={
"model": "Model",
"average": "Average",
"brainstorm": "Brainstorm",
"open_qa": "Open QA",
"closed_qa": "Closed QA",
"extract": "Extract",
"generation": "Generation",
"rewrite": "Rewrite",
"summarize": "Summarize",
"classify": "Classify",
"reasoning_over_numerical_data": "Reasoning Over Numerical Data",
"multi-document_synthesis": "Multi-Document Synthesis",
"fact_checking_or_attributed_qa": "Fact Checking or Attributed QA",
})
# Format for different columns
# if Score exists, round to 2 decimals
# if "Average" in df.columns:
# df["Average"] = np.array([f"{v:.2f}" for v in df["Average"].values])
# # round all others to 1 decimal
# for col in df.columns:
# if col not in ["Model", "Average"]:
# # replace any df[col].values == '' with np.nan
# df[col] = df[col].replace('', np.nan)
# df[col] = np.array([f"{v:.1f}" for v in df[col].values])
return df
def sort_by_category(df, category):
new_df = df.copy()
col_rank = category.lower().replace(" ", "_") + "_rank"
col_confi = category.lower().replace(" ", "_") + "_confi"
# sort
new_df = new_df.sort_values(by=[col_rank, category], ascending=[True, False])
# move column ranking to the front
cols = list(new_df.columns)
cols.insert(0, cols.pop(cols.index(col_rank)))
new_df = new_df.loc[:, cols]
new_df = new_df.rename(columns={col_rank: "Rank"})
# move selected column to the third
cols = list(new_df.columns)
cols.insert(2, cols.pop(cols.index(category)))
new_df = new_df.loc[:, cols]
# move selected column to the fourth
cols = list(new_df.columns)
cols.insert(3, cols.pop(cols.index(col_confi)))
new_df = new_df.loc[:, cols]
new_df = new_df.rename(columns={col_confi: "95% CI"})
# drop all ranking and confidence interval
new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("rank")])
new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("confi")])
return new_df