File size: 6,715 Bytes
4df8d2a
 
167137b
4df8d2a
 
 
167137b
 
6fbf558
167137b
 
6fbf558
167137b
6fbf558
 
167137b
 
 
 
 
 
 
6fbf558
167137b
 
 
6fbf558
167137b
6fbf558
 
167137b
 
 
 
 
 
 
6fbf558
167137b
 
557f1e5
167137b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ae93a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4df8d2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import json
from datetime import datetime

from typing import Literal, List

import pandas as pd
from huggingface_hub import HfFileSystem, hf_hub_download

# from: https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/monitor/monitor.py#L389
KEY_TO_CATEGORY_NAME = {
    "full": "Overall",
    "dedup": "De-duplicate Top Redundant Queries (soon to be default)",
    "coding": "Coding",
    "hard_6": "Hard Prompts (Overall)",
    "hard_english_6": "Hard Prompts (English)",
    "long_user": "Longer Query",
    "english": "English",
    "chinese": "Chinese",
    "french": "French",
    "no_tie": "Exclude Ties",
    "no_short": "Exclude Short Query (< 5 tokens)",
    "no_refusal": "Exclude Refusal",
    "overall_limit_5_user_vote": "overall_limit_5_user_vote",
}
CAT_NAME_TO_EXPLANATION = {
    "Overall": "Overall Questions",
    "De-duplicate Top Redundant Queries (soon to be default)": "De-duplicate top redundant queries (top 0.1%). See details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/#note-enhancing-quality-through-de-duplication).",
    "Coding": "Coding: whether conversation contains code snippets",
    "Hard Prompts (Overall)": "Hard Prompts (Overall): details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
    "Hard Prompts (English)": "Hard Prompts (English), note: the delta is to English Category. details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
    "Longer Query": "Longer Query (>= 500 tokens)",
    "English": "English Prompts",
    "Chinese": "Chinese Prompts",
    "French": "French Prompts",
    "Exclude Ties": "Exclude Ties and Bothbad",
    "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
    "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
    "overall_limit_5_user_vote": "overall_limit_5_user_vote",
}

PROPRIETARY_LICENSES = ["Proprietary", "Proprietory"]


def download_latest_data_from_space(
    repo_id: str, file_type: Literal["pkl", "csv"]
) -> str:
    """
    Downloads the latest data file of the specified file type from the given repository space.

    Args:
        repo_id (str): The ID of the repository space.
        file_type (Literal["pkl", "csv"]): The type of the data file to download. Must be either "pkl" or "csv".

    Returns:
        str: The local file path of the downloaded data file.
    """

    def extract_date(filename):
        return filename.split("/")[-1].split(".")[0].split("_")[-1]

    fs = HfFileSystem()
    data_file_path = f"spaces/{repo_id}/*.{file_type}"
    files = fs.glob(data_file_path)
    latest_file = sorted(files, key=extract_date, reverse=True)[0]

    latest_filepath_local = hf_hub_download(
        repo_id=repo_id,
        filename=latest_file.split("/")[-1],
        repo_type="space",
    )
    return latest_filepath_local


def get_constants(dfs):
    """
    Calculate and return the minimum and maximum Elo scores, as well as the maximum number of models per month.

    Parameters:
    - dfs (dict): A dictionary containing DataFrames for different categories.

    Returns:
    - min_elo_score (float): The minimum Elo score across all DataFrames.
    - max_elo_score (float): The maximum Elo score across all DataFrames.
    - upper_models_per_month (int): The maximum number of models per month per license across all DataFrames.
    """
    filter_ranges = {}
    for k, df in dfs.items():
        filter_ranges[k] = {
            "min_elo_score": df["rating"].min().round(),
            "max_elo_score": df["rating"].max().round(),
            "upper_models_per_month": int(
                df.groupby(["Month-Year", "License"])["rating"]
                .apply(lambda x: x.count())
                .max()
            ),
        }

    min_elo_score = float("inf")
    max_elo_score = float("-inf")
    upper_models_per_month = 0

    for _, value in filter_ranges.items():
        min_elo_score = min(min_elo_score, value["min_elo_score"])
        max_elo_score = max(max_elo_score, value["max_elo_score"])
        upper_models_per_month = max(
            upper_models_per_month, value["upper_models_per_month"]
        )
    return min_elo_score, max_elo_score, upper_models_per_month


def update_release_date_mapping(
    new_model_keys_to_add: List[str],
    leaderboard_df: pd.DataFrame,
    release_date_mapping: pd.DataFrame,
) -> pd.DataFrame:
    """
    Update the release date mapping with new model keys.

    Args:
        new_model_keys_to_add (List[str]): A list of new model keys to add to the release date mapping.
        leaderboard_df (pd.DataFrame): The leaderboard DataFrame containing the model information.
        release_date_mapping (pd.DataFrame): The current release date mapping DataFrame.

    Returns:
        pd.DataFrame: The updated release date mapping DataFrame.
    """
    # if any, add those to the release date mapping
    if new_model_keys_to_add:
        for key in new_model_keys_to_add:
            new_entry = {
                "key": key,
                "Model": leaderboard_df[leaderboard_df["key"] == key]["Model"].values[
                    0
                ],
                "Release Date": datetime.today().strftime("%Y-%m-%d"),
            }

            with open("release_date_mapping.json", "r") as file:
                data = json.load(file)

            data.append(new_entry)

            with open("release_date_mapping.json", "w") as file:
                json.dump(data, file, indent=4)

            print(f"Added {key} to release_date_mapping.json")

        # reload the release date mapping
        release_date_mapping = pd.read_json(
            "release_date_mapping.json", orient="records"
        )
    return release_date_mapping


def format_data(df):
    """
    Formats the given DataFrame by performing the following operations:
    - Converts the 'License' column values to 'Proprietary LLM' if they are in PROPRIETARY_LICENSES, otherwise 'Open LLM'.
    - Converts the 'Release Date' column to datetime format.
    - Adds a new 'Month-Year' column by extracting the month and year from the 'Release Date' column.
    - Rounds the 'rating' column to the nearest integer.
    - Resets the index of the DataFrame.

    Args:
        df (pandas.DataFrame): The DataFrame to be formatted.

    Returns:
        pandas.DataFrame: The formatted DataFrame.
    """
    df["License"] = df["License"].apply(
        lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
    )
    df["Release Date"] = pd.to_datetime(df["Release Date"])
    df["Month-Year"] = df["Release Date"].dt.to_period("M")
    df["rating"] = df["rating"].round()
    return df.reset_index(drop=True)