andrewrreed HF staff commited on
Commit
2f78375
·
1 Parent(s): 6bff0b5

add data split tab + refactor

Browse files
Files changed (2) hide show
  1. app.py +87 -37
  2. dev.ipynb +0 -0
app.py CHANGED
@@ -7,6 +7,7 @@ import plotly.express as px
7
  from utils import (
8
  KEY_TO_CATEGORY_NAME,
9
  PROPRIETARY_LICENSES,
 
10
  download_latest_data_from_space,
11
  )
12
 
@@ -55,30 +56,66 @@ for k, v in merged_dfs.items():
55
  merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
56
  )
57
 
58
- df = merged_dfs["Overall"]
59
- df["License"] = df["License"].apply(
60
- lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
61
- )
62
- df["Release Date"] = pd.to_datetime(df["Release Date"])
63
- df["Month-Year"] = df["Release Date"].dt.to_period("M")
64
- df["rating"] = df["rating"].round()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  ###################
68
  ### Plot Data
69
  ###################
70
 
71
- date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
72
- min_elo_score = df["rating"].min().round()
73
- max_elo_score = df["rating"].max().round()
74
- upper_models_per_month = int(
75
- df.groupby(["Month-Year", "License"])["rating"].apply(lambda x: x.count()).max()
76
- )
77
 
 
78
 
79
- def build_plot(min_score, max_models_per_month, toggle_annotations):
80
 
 
81
  filtered_df = df[(df["rating"] >= min_score)]
 
82
  filtered_df = (
83
  filtered_df.groupby(["Month-Year", "License"])
84
  .apply(lambda x: x.nlargest(max_models_per_month, "rating"))
@@ -91,11 +128,11 @@ def build_plot(min_score, max_models_per_month, toggle_annotations):
91
  y="rating",
92
  color="License",
93
  hover_name="Model",
94
- hover_data=["Organization", "License"],
95
  trendline="ols",
96
  title=f"Proprietary vs Open LLMs (LMSYS Arena ELO as of {date_updated})",
97
  labels={"rating": "Arena ELO", "Release Date": "Release Date"},
98
- height=700,
99
  template="seaborn",
100
  )
101
 
@@ -143,45 +180,58 @@ with gr.Blocks(
143
  </div>
144
  """
145
  )
 
146
  with gr.Row():
147
- min_score = gr.Slider(
148
- minimum=min_elo_score,
149
- maximum=max_elo_score,
150
- value=800,
151
- step=50,
152
- label="Minimum ELO Score",
153
- )
154
- max_models_per_month = gr.Slider(
155
- value=upper_models_per_month,
156
- minimum=1,
157
- maximum=upper_models_per_month,
158
- step=1,
159
- label="Max Models per Month (per License)",
160
- )
161
- toggle_annotations = gr.Radio(
162
- choices=[True, False], label="Overlay Best Model Name", value=False
163
- )
 
 
 
 
 
 
 
164
 
165
  # Show plot
166
  plot = gr.Plot()
167
  demo.load(
168
  fn=build_plot,
169
- inputs=[min_score, max_models_per_month, toggle_annotations],
170
  outputs=plot,
171
  )
172
  min_score.change(
173
  fn=build_plot,
174
- inputs=[min_score, max_models_per_month, toggle_annotations],
175
  outputs=plot,
176
  )
177
  max_models_per_month.change(
178
  fn=build_plot,
179
- inputs=[min_score, max_models_per_month, toggle_annotations],
180
  outputs=plot,
181
  )
182
  toggle_annotations.change(
183
  fn=build_plot,
184
- inputs=[min_score, max_models_per_month, toggle_annotations],
 
 
 
 
 
185
  outputs=plot,
186
  )
187
 
 
7
  from utils import (
8
  KEY_TO_CATEGORY_NAME,
9
  PROPRIETARY_LICENSES,
10
+ CAT_NAME_TO_EXPLANATION,
11
  download_latest_data_from_space,
12
  )
13
 
 
56
  merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
57
  )
58
 
59
+
60
+ # format dataframes
61
+ def format_data(df):
62
+ df["License"] = df["License"].apply(
63
+ lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
64
+ )
65
+ df["Release Date"] = pd.to_datetime(df["Release Date"])
66
+ df["Month-Year"] = df["Release Date"].dt.to_period("M")
67
+ df["rating"] = df["rating"].round()
68
+ return df.reset_index(drop=True)
69
+
70
+
71
+ merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
72
+
73
+
74
+ # get constants
75
+ filter_ranges = {}
76
+ for k, df in merged_dfs.items():
77
+ filter_ranges[k] = {
78
+ "min_elo_score": df["rating"].min().round(),
79
+ "max_elo_score": df["rating"].max().round(),
80
+ "upper_models_per_month": int(
81
+ df.groupby(["Month-Year", "License"])["rating"]
82
+ .apply(lambda x: x.count())
83
+ .max()
84
+ ),
85
+ }
86
+
87
+ min_elo_score = float("inf")
88
+ max_elo_score = float("-inf")
89
+ upper_models_per_month = 0
90
+
91
+ for key, value in filter_ranges.items():
92
+ min_elo_score = min(min_elo_score, value["min_elo_score"])
93
+ max_elo_score = max(max_elo_score, value["max_elo_score"])
94
+ upper_models_per_month = max(
95
+ upper_models_per_month, value["upper_models_per_month"]
96
+ )
97
+
98
+
99
+ date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
100
+
101
+
102
+ def get_data_split(dfs, set_name):
103
+ df = dfs[set_name].copy(deep=True)
104
+ return df.reset_index(drop=True)
105
 
106
 
107
  ###################
108
  ### Plot Data
109
  ###################
110
 
 
 
 
 
 
 
111
 
112
+ def build_plot(min_score, max_models_per_month, toggle_annotations, set_selector):
113
 
114
+ df = get_data_split(merged_dfs, set_name=set_selector)
115
 
116
+ # filter data
117
  filtered_df = df[(df["rating"] >= min_score)]
118
+
119
  filtered_df = (
120
  filtered_df.groupby(["Month-Year", "License"])
121
  .apply(lambda x: x.nlargest(max_models_per_month, "rating"))
 
128
  y="rating",
129
  color="License",
130
  hover_name="Model",
131
+ hover_data=["Organization", "License", "Link"],
132
  trendline="ols",
133
  title=f"Proprietary vs Open LLMs (LMSYS Arena ELO as of {date_updated})",
134
  labels={"rating": "Arena ELO", "Release Date": "Release Date"},
135
+ height=800,
136
  template="seaborn",
137
  )
138
 
 
180
  </div>
181
  """
182
  )
183
+
184
  with gr.Row():
185
+ with gr.Column():
186
+ toggle_annotations = gr.Radio(
187
+ choices=[True, False], label="Overlay Best Model Name", value=True
188
+ )
189
+ set_selector = gr.Dropdown(
190
+ choices=list(CAT_NAME_TO_EXPLANATION.keys()),
191
+ label="Select Dataset",
192
+ value="Overall",
193
+ )
194
+ with gr.Column():
195
+ min_score = gr.Slider(
196
+ minimum=min_elo_score,
197
+ maximum=max_elo_score,
198
+ value=(max_elo_score - min_elo_score) * 0.3 + min_elo_score,
199
+ step=50,
200
+ label="Minimum ELO Score",
201
+ )
202
+ max_models_per_month = gr.Slider(
203
+ value=upper_models_per_month - 2,
204
+ minimum=1,
205
+ maximum=upper_models_per_month,
206
+ step=1,
207
+ label="Max Models per Month (per License)",
208
+ )
209
 
210
  # Show plot
211
  plot = gr.Plot()
212
  demo.load(
213
  fn=build_plot,
214
+ inputs=[min_score, max_models_per_month, toggle_annotations, set_selector],
215
  outputs=plot,
216
  )
217
  min_score.change(
218
  fn=build_plot,
219
+ inputs=[min_score, max_models_per_month, toggle_annotations, set_selector],
220
  outputs=plot,
221
  )
222
  max_models_per_month.change(
223
  fn=build_plot,
224
+ inputs=[min_score, max_models_per_month, toggle_annotations, set_selector],
225
  outputs=plot,
226
  )
227
  toggle_annotations.change(
228
  fn=build_plot,
229
+ inputs=[min_score, max_models_per_month, toggle_annotations, set_selector],
230
+ outputs=plot,
231
+ )
232
+ set_selector.change(
233
+ fn=build_plot,
234
+ inputs=[min_score, max_models_per_month, toggle_annotations, set_selector],
235
  outputs=plot,
236
  )
237
 
dev.ipynb CHANGED
The diff for this file is too large to render. See raw diff