File size: 6,012 Bytes
9ceb843
 
 
 
 
0b8c16d
9ceb843
91c5b22
df04a09
b64c62d
 
 
df04a09
91c5b22
 
9ceb843
 
5e9e451
 
 
90eea3b
91c5b22
1d33a30
91c5b22
1d33a30
91c5b22
1d33a30
91c5b22
b146dec
91c5b22
5e9e451
91c5b22
 
 
 
b64c62d
 
 
 
9ceb843
0b8c16d
 
 
 
 
 
 
 
 
 
 
9ceb843
ab74236
9ceb843
ab74236
df04a09
 
 
 
9ceb843
 
 
 
df04a09
 
 
9ceb843
 
 
df04a09
 
9ceb843
 
df04a09
63c5ebf
35e2ca1
 
df04a09
9ceb843
 
 
 
df04a09
 
 
 
9ceb843
df04a09
9ceb843
 
63c5ebf
7eaa6d2
9ceb843
 
 
 
 
56fcfaf
df04a09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f4ce43
9ceb843
df04a09
 
 
 
 
63c5ebf
df04a09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63c5ebf
 
 
 
78b8fe9
63c5ebf
df04a09
63c5ebf
df04a09
63c5ebf
df04a09
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import pandas as pd
from pathlib import Path
from datasets import load_dataset
import numpy as np
import os
import re

UNVERIFIED_MODELS = [

]

CONTAMINATED_MODELS = [

]

# From Open LLM Leaderboard
def model_hyperlink(link, model_name):
    # if model_name is above 50 characters, return first 47 characters and "..."
    if len(model_name) > 50:
        model_name = model_name[:47] + "..."
    if model_name == "random":
        output = "random"
    elif model_name == "Cohere March 2024":
        output = f'<a target="_blank" href="https://huggingface.co/Cohere" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
    elif "openai" == model_name.split("/")[0]:
        output = f'<a target="_blank" href="https://huggingface.co/openai" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
    elif "Anthropic" == model_name.split("/")[0]:
        output = f'<a target="_blank" href="https://huggingface.co/Anthropic" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
    elif "google" == model_name.split("/")[0]:
        output = f'<a target="_blank" href="https://huggingface.co/google" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
    elif "PoLL" == model_name.split("/")[0]:
        output = model_name
    output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'

    if model_name in UNVERIFIED_MODELS:
        output += " *"
    if model_name in CONTAMINATED_MODELS:
        output += " ⚠️"
    return output

def undo_hyperlink(html_string):
    # Regex pattern to match content inside > and <
    pattern = r'>[^<]+<'
    match = re.search(pattern, html_string)
    if match:
        # Extract the matched text and remove leading '>' and trailing '<'
        return match.group(0)[1:-1]
    else:
        return "No text found"


# Define a function to fetch and process data
def load_all_data(data_repo, subdir:str, subsubsets=False):    # use HF api to pull the git repo
    dir = Path(data_repo)
    data_dir = dir / subdir
    
    # get all files
    models_names = [f.split(".json")[0] for f in os.listdir(data_dir) 
                          if os.path.isfile(os.path.join(data_dir, f)) and f.endswith(".json")]
    # create empty dataframe to add all data to
    df = pd.DataFrame()

    # load all json data in the list models_results one by one to avoid not having the same entries
    for model_name in models_names:
        model_data = load_dataset("json", data_files=os.path.join(data_dir, model_name + ".json"), split="train")
        model_data = model_data.add_column("model", [model_name])
        df2 = pd.DataFrame(model_data)
        # add to df
        df = pd.concat([df2, df])
    
    return df


def prep_df(df):
    
    # sort columns alphabetically
    df = df.reindex(sorted(df.columns), axis=1)

    # move column "model" to the front
    cols = list(df.columns)
    cols.insert(0, cols.pop(cols.index('model')))
    df = df.loc[:, cols]
    
    # apply model_hyperlink function to column "model"
    df["model"] = df.apply(lambda row: model_hyperlink(f"https://huggingface.co/{row['path']}", row['model']), axis=1)
    df = df.drop(columns=["path"])

    # select all columns except "model" and convert to score 
    cols = df.columns.tolist()
    cols.remove("model")
    cols = [c for c in cols if "rank" not in c and "confi" not in c]
    df[cols] = (df[cols]*100)

    # move average column to the second
    cols = list(df.columns)
    cols.insert(1, cols.pop(cols.index('average')))
    df = df.loc[:, cols]

    df = df.rename(columns={
        "model": "Model",
        "average": "Average",
        "brainstorm": "Brainstorm",
        "open_qa": "Open QA",
        "closed_qa": "Closed QA",
        "extract": "Extract",
        "generation": "Generation",
        "rewrite": "Rewrite",
        "summarize": "Summarize",
        "classify": "Classify",
        "reasoning_over_numerical_data": "Reasoning Over Numerical Data",
        "multi-document_synthesis": "Multi-Document Synthesis",
        "fact_checking_or_attributed_qa": "Fact Checking or Attributed QA",
    })

    # Format for different columns
    # if Score exists, round to 2 decimals
    # if "Average" in df.columns:
    #     df["Average"] = np.array([f"{v:.2f}" for v in df["Average"].values])
         
    # # round all others to 1 decimal
    # for col in df.columns:
    #     if col not in ["Model", "Average"]:
    #         # replace any df[col].values == '' with np.nan
    #         df[col] = df[col].replace('', np.nan)
    #         df[col] = np.array([f"{v:.1f}" for v in df[col].values])

    return df


def sort_by_category(df, category):
    new_df = df.copy()
    col_rank = category.lower().replace(" ", "_") + "_rank"
    col_confi = category.lower().replace(" ", "_") + "_confi"

    # sort
    new_df = new_df.sort_values(by=[col_rank, category], ascending=[True, False])

    # move column ranking to the front
    cols = list(new_df.columns)
    cols.insert(0, cols.pop(cols.index(col_rank)))
    new_df = new_df.loc[:, cols]
    new_df = new_df.rename(columns={col_rank: "Rank"})

    # move selected column to the third
    cols = list(new_df.columns)
    cols.insert(2, cols.pop(cols.index(category)))
    new_df = new_df.loc[:, cols]

    # move selected column to the fourth
    cols = list(new_df.columns)
    cols.insert(3, cols.pop(cols.index(col_confi)))
    new_df = new_df.loc[:, cols]
    new_df = new_df.rename(columns={col_confi: "95% CI"})


    # drop all ranking and confidence interval
    new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("rank")])
    new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("confi")])

    return new_df