Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
Β·
86c3dd5
1
Parent(s):
50c352c
apply code style and quality checks to app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
import pandas as pd
|
3 |
import logging
|
4 |
import time
|
5 |
import gradio as gr
|
@@ -23,8 +22,6 @@ from src.display.utils import (
|
|
23 |
COLS,
|
24 |
EVAL_COLS,
|
25 |
EVAL_TYPES,
|
26 |
-
NUMERIC_INTERVALS,
|
27 |
-
TYPES,
|
28 |
AutoEvalColumn,
|
29 |
ModelType,
|
30 |
Precision,
|
@@ -51,11 +48,12 @@ from src.tools.collections import update_collections
|
|
51 |
from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
|
52 |
|
53 |
# Configure logging
|
54 |
-
logging.basicConfig(level=logging.INFO, format=
|
55 |
|
56 |
# Start ephemeral Spaces on PRs (see config in README.md)
|
57 |
enable_space_ci()
|
58 |
|
|
|
59 |
def restart_space():
|
60 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
61 |
|
@@ -68,6 +66,7 @@ def time_diff_wrapper(func):
|
|
68 |
diff = end_time - start_time
|
69 |
logging.info(f"Time taken for {func.__name__}: {diff} seconds")
|
70 |
return result
|
|
|
71 |
return wrapper
|
72 |
|
73 |
|
@@ -89,12 +88,13 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
|
|
89 |
logging.info("Download successful")
|
90 |
return
|
91 |
except Exception as e:
|
92 |
-
wait_time = backoff_factor
|
93 |
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
94 |
time.sleep(wait_time)
|
95 |
attempt += 1
|
96 |
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
97 |
|
|
|
98 |
def init_space(full_init: bool = True):
|
99 |
"""Initializes the application space, loading only necessary data."""
|
100 |
if full_init:
|
@@ -120,12 +120,13 @@ def init_space(full_init: bool = True):
|
|
120 |
update_collections(original_df)
|
121 |
|
122 |
leaderboard_df = original_df.copy()
|
123 |
-
|
124 |
# Evaluation queue DataFrame retrieval is independent of initialization detail level
|
125 |
eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
126 |
|
127 |
return leaderboard_df, raw_data, original_df, eval_queue_dfs
|
128 |
|
|
|
129 |
# Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
|
130 |
# This controls whether a full initialization should be performed.
|
131 |
do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
|
@@ -153,36 +154,34 @@ with demo:
|
|
153 |
value=leaderboard_df,
|
154 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
155 |
select_columns=SelectColumns(
|
156 |
-
default_selection=[
|
157 |
-
c.name
|
158 |
-
for c in fields(AutoEvalColumn)
|
159 |
-
if c.displayed_by_default
|
160 |
-
],
|
161 |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
|
162 |
label="Select Columns to Display:",
|
163 |
),
|
164 |
-
search_columns=[
|
165 |
-
|
166 |
-
AutoEvalColumn.fullname.name,
|
167 |
-
AutoEvalColumn.license.name
|
168 |
-
],
|
169 |
-
hide_columns=[
|
170 |
-
c.name
|
171 |
-
for c in fields(AutoEvalColumn)
|
172 |
-
if c.hidden
|
173 |
-
],
|
174 |
filter_columns=[
|
175 |
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
176 |
ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
177 |
-
ColumnFilter(
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
|
181 |
ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
|
182 |
],
|
183 |
-
bool_checkboxgroup_label="Hide models"
|
184 |
)
|
185 |
-
|
186 |
with gr.TabItem("π Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
|
187 |
with gr.Row():
|
188 |
with gr.Column():
|
@@ -313,4 +312,4 @@ scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h
|
|
313 |
scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
|
314 |
scheduler.start()
|
315 |
|
316 |
-
demo.queue(default_concurrency_limit=40).launch()
|
|
|
1 |
import os
|
|
|
2 |
import logging
|
3 |
import time
|
4 |
import gradio as gr
|
|
|
22 |
COLS,
|
23 |
EVAL_COLS,
|
24 |
EVAL_TYPES,
|
|
|
|
|
25 |
AutoEvalColumn,
|
26 |
ModelType,
|
27 |
Precision,
|
|
|
48 |
from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
|
49 |
|
50 |
# Configure logging
|
51 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
52 |
|
53 |
# Start ephemeral Spaces on PRs (see config in README.md)
|
54 |
enable_space_ci()
|
55 |
|
56 |
+
|
57 |
def restart_space():
|
58 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
59 |
|
|
|
66 |
diff = end_time - start_time
|
67 |
logging.info(f"Time taken for {func.__name__}: {diff} seconds")
|
68 |
return result
|
69 |
+
|
70 |
return wrapper
|
71 |
|
72 |
|
|
|
88 |
logging.info("Download successful")
|
89 |
return
|
90 |
except Exception as e:
|
91 |
+
wait_time = backoff_factor**attempt
|
92 |
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
93 |
time.sleep(wait_time)
|
94 |
attempt += 1
|
95 |
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
96 |
|
97 |
+
|
98 |
def init_space(full_init: bool = True):
|
99 |
"""Initializes the application space, loading only necessary data."""
|
100 |
if full_init:
|
|
|
120 |
update_collections(original_df)
|
121 |
|
122 |
leaderboard_df = original_df.copy()
|
123 |
+
|
124 |
# Evaluation queue DataFrame retrieval is independent of initialization detail level
|
125 |
eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
126 |
|
127 |
return leaderboard_df, raw_data, original_df, eval_queue_dfs
|
128 |
|
129 |
+
|
130 |
# Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
|
131 |
# This controls whether a full initialization should be performed.
|
132 |
do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
|
|
|
154 |
value=leaderboard_df,
|
155 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
156 |
select_columns=SelectColumns(
|
157 |
+
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
|
|
|
|
|
|
|
|
158 |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
|
159 |
label="Select Columns to Display:",
|
160 |
),
|
161 |
+
search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.fullname.name, AutoEvalColumn.license.name],
|
162 |
+
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
filter_columns=[
|
164 |
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
165 |
ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
166 |
+
ColumnFilter(
|
167 |
+
AutoEvalColumn.params.name,
|
168 |
+
type="slider",
|
169 |
+
min=0,
|
170 |
+
max=150,
|
171 |
+
label="Select the number of parameters (B)",
|
172 |
+
),
|
173 |
+
ColumnFilter(
|
174 |
+
AutoEvalColumn.still_on_hub.name, type="boolean", label="Private or deleted", default=True
|
175 |
+
),
|
176 |
+
ColumnFilter(
|
177 |
+
AutoEvalColumn.merged.name, type="boolean", label="Contains a merge/moerge", default=True
|
178 |
+
),
|
179 |
ColumnFilter(AutoEvalColumn.moe.name, type="boolean", label="MoE", default=False),
|
180 |
ColumnFilter(AutoEvalColumn.not_flagged.name, type="boolean", label="Flagged", default=True),
|
181 |
],
|
182 |
+
bool_checkboxgroup_label="Hide models",
|
183 |
)
|
184 |
+
|
185 |
with gr.TabItem("π Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
|
186 |
with gr.Row():
|
187 |
with gr.Column():
|
|
|
312 |
scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
|
313 |
scheduler.start()
|
314 |
|
315 |
+
demo.queue(default_concurrency_limit=40).launch()
|