Spaces:
Running
Running
migrate to fsspec
Browse files
app.py
CHANGED
@@ -9,12 +9,8 @@ import numpy as np
|
|
9 |
from datetime import datetime
|
10 |
|
11 |
import gradio as gr
|
12 |
-
import huggingface_hub
|
13 |
import pandas as pd
|
14 |
-
|
15 |
-
from huggingface_hub.file_download import repo_folder_name
|
16 |
-
from huggingface_hub.hf_api import RepoFile
|
17 |
-
from huggingface_hub.utils import EntryNotFoundError
|
18 |
|
19 |
FALLBACK_TOKEN_NAME = "HF_TOKEN"
|
20 |
|
@@ -41,20 +37,20 @@ def get_run_name_seed(run_name):
|
|
41 |
run_name, seed = run_name.split("-seed-")
|
42 |
return run_name, int(seed)
|
43 |
|
44 |
-
def fetch_repo_structure(
|
45 |
token = os.environ.get(FALLBACK_TOKEN_NAME)
|
46 |
if oauth_token:
|
47 |
token = oauth_token.token
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
if not runs:
|
53 |
return {}, gr.update(choices=[], value=None)
|
54 |
|
55 |
def process_run(run):
|
56 |
-
run_files =
|
57 |
-
return run,
|
58 |
|
59 |
with ThreadPoolExecutor() as executor:
|
60 |
results = list(executor.map(process_run, runs))
|
@@ -86,14 +82,16 @@ def select_runs_by_language(runs, current_selected, language):
|
|
86 |
return select_runs_by_regex(runs, current_selected, f".*-{language}-.*")
|
87 |
return current_selected
|
88 |
|
89 |
-
def fetch_available_tasks(
|
90 |
token = os.environ.get(FALLBACK_TOKEN_NAME)
|
91 |
|
|
|
92 |
all_tasks = defaultdict(lambda: defaultdict(dict))
|
|
|
93 |
for run in runs_to_fetch:
|
94 |
try:
|
95 |
-
files =
|
96 |
-
parquet_files = [f.
|
97 |
|
98 |
for full_filename in parquet_files:
|
99 |
task_name, date_str = full_filename.replace('.parquet', '').rsplit('_', 1)
|
@@ -101,8 +99,10 @@ def fetch_available_tasks(repo_name, runs_to_fetch, checkpoint) -> dict[str, dic
|
|
101 |
|
102 |
if run not in all_tasks[task_name] or date > all_tasks[task_name][run]['date']:
|
103 |
all_tasks[task_name][run] = {'filename': full_filename, 'date': date}
|
104 |
-
except
|
105 |
print(f"Checkpoint not found for run: {run}")
|
|
|
|
|
106 |
|
107 |
available_tasks = {
|
108 |
task: {run: info['filename'] for run, info in runs.items()}
|
@@ -112,17 +112,17 @@ def fetch_available_tasks(repo_name, runs_to_fetch, checkpoint) -> dict[str, dic
|
|
112 |
|
113 |
return available_tasks
|
114 |
|
115 |
-
def fetch_run_results(
|
116 |
oauth_token: gr.OAuthToken | None = None, progress=gr.Progress()):
|
117 |
|
118 |
-
task_runs_dict = fetch_available_tasks(
|
119 |
task_names = list(task_runs_dict.keys())
|
120 |
return gr.update(choices=task_names, value=task_names[0] if task_names else None), task_runs_dict
|
121 |
|
122 |
|
123 |
def render_table(df, selected_runs, metric_names):
|
124 |
if df is None or not selected_runs or not metric_names:
|
125 |
-
return None
|
126 |
kept_metrics = [f"metric_{metric_name}_{run_name}" for run_name in selected_runs for metric_name in metric_names]
|
127 |
other_metrics = [col for col in df.columns if col.startswith(f"metric_") and col not in kept_metrics]
|
128 |
df = df.drop(columns=other_metrics)
|
@@ -130,8 +130,9 @@ def render_table(df, selected_runs, metric_names):
|
|
130 |
df = shorten_column_names(df, selected_runs, metric_names)
|
131 |
|
132 |
# Sample 100
|
|
|
133 |
df = df.sample(n=min(100, len(df)), random_state=42)
|
134 |
-
return df
|
135 |
|
136 |
def get_column_widths(df):
|
137 |
column_widths = []
|
@@ -170,19 +171,25 @@ def shorten_column_names(df, run_names: list[str], metric_names: list[str]):
|
|
170 |
return df
|
171 |
|
172 |
|
173 |
-
def load_task_data(
|
174 |
token = os.environ.get(FALLBACK_TOKEN_NAME)
|
175 |
if not runs_to_fetch or not task_name:
|
176 |
return None, None, None
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
def fetch_run_file(run_to_fetch):
|
179 |
file_path = f"details/{run_to_fetch}/{checkpoint}/{tasks_files[task_name][run_to_fetch]}"
|
180 |
try:
|
181 |
-
|
182 |
-
|
183 |
return df, run_to_fetch
|
184 |
-
except
|
185 |
-
print(f"File not found: {
|
186 |
return None, run_to_fetch
|
187 |
|
188 |
with ThreadPoolExecutor() as pool:
|
@@ -245,7 +252,7 @@ def load_task_data(repo_name, runs_to_fetch, checkpoint, task_name, tasks_files,
|
|
245 |
# Join all prepared DataFrames
|
246 |
for df, run_name in zip(dfs, run_names):
|
247 |
prepared_df = prepare_df(df, run_name, task_type)
|
248 |
-
combined_df = combined_df.join(prepared_df, how='outer'
|
249 |
|
250 |
|
251 |
available_metrics = list(set("_".join(col.split('_')[1:-1]) for col in combined_df.columns if col.startswith("metric_")))
|
@@ -259,7 +266,7 @@ with gr.Blocks() as demo:
|
|
259 |
results_df_full = gr.State(None)
|
260 |
tasks_files = gr.State({})
|
261 |
login_button = gr.LoginButton(visible=False)
|
262 |
-
|
263 |
with gr.Column():
|
264 |
gr.Markdown("# FineWeb experiments results explorer")
|
265 |
with gr.Row():
|
@@ -277,11 +284,14 @@ with gr.Blocks() as demo:
|
|
277 |
task_name = gr.Dropdown(choices=[], interactive=True, label="Task name")
|
278 |
metric_names = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="Metric")
|
279 |
results_df = gr.Dataframe(interactive=False, wrap=True)
|
|
|
|
|
|
|
280 |
|
281 |
# Run selection
|
282 |
gr.on(
|
283 |
-
triggers=[
|
284 |
-
fn=fetch_repo_structure, inputs=[
|
285 |
)
|
286 |
gr.on(
|
287 |
triggers=[select_by_regex_button.click],
|
@@ -306,37 +316,37 @@ with gr.Blocks() as demo:
|
|
306 |
gr.on(
|
307 |
triggers=[fetch_res.click],
|
308 |
fn=fetch_run_results,
|
309 |
-
inputs=[
|
310 |
outputs=[task_name, tasks_files]
|
311 |
).then(
|
312 |
fn=load_task_data,
|
313 |
-
inputs=[
|
314 |
outputs=[results_df_full, metric_names]
|
315 |
).then(
|
316 |
fn=render_table,
|
317 |
inputs=[results_df_full, selected_runs, metric_names],
|
318 |
-
outputs=[results_df]
|
319 |
)
|
320 |
|
321 |
# Update results when task name or metric changes
|
322 |
gr.on(
|
323 |
triggers=[task_name.input],
|
324 |
fn=load_task_data,
|
325 |
-
inputs=[
|
326 |
outputs=[results_df_full, metric_names]
|
327 |
).then(
|
328 |
fn=render_table,
|
329 |
inputs=[results_df_full, selected_runs, metric_names],
|
330 |
-
outputs=[results_df]
|
331 |
)
|
332 |
|
333 |
gr.on(
|
334 |
triggers=[metric_names.input],
|
335 |
fn=render_table,
|
336 |
inputs=[results_df_full, selected_runs, metric_names],
|
337 |
-
outputs=[results_df]
|
338 |
)
|
339 |
|
340 |
-
demo.load(fn=fetch_repo_structure, inputs=[
|
341 |
|
342 |
demo.launch()
|
|
|
9 |
from datetime import datetime
|
10 |
|
11 |
import gradio as gr
|
|
|
12 |
import pandas as pd
|
13 |
+
from datatrove.io import DataFolder
|
|
|
|
|
|
|
14 |
|
15 |
FALLBACK_TOKEN_NAME = "HF_TOKEN"
|
16 |
|
|
|
37 |
run_name, seed = run_name.split("-seed-")
|
38 |
return run_name, int(seed)
|
39 |
|
40 |
+
def fetch_repo_structure(results_uri, oauth_token: gr.OAuthToken | None = None):
|
41 |
token = os.environ.get(FALLBACK_TOKEN_NAME)
|
42 |
if oauth_token:
|
43 |
token = oauth_token.token
|
44 |
|
45 |
+
data_folder = DataFolder(results_uri, token=token)
|
46 |
+
runs = [f.removeprefix("details/") for f in data_folder.list_files("details", recursive=False, include_directories=True) if f != "details"]
|
47 |
+
|
48 |
if not runs:
|
49 |
return {}, gr.update(choices=[], value=None)
|
50 |
|
51 |
def process_run(run):
|
52 |
+
run_files = [f.removeprefix(f"details/{run}/") for f in data_folder.list_files(f"details/{run}", recursive=False, include_directories=True) if f != f"details/{run}"]
|
53 |
+
return run, run_files
|
54 |
|
55 |
with ThreadPoolExecutor() as executor:
|
56 |
results = list(executor.map(process_run, runs))
|
|
|
82 |
return select_runs_by_regex(runs, current_selected, f".*-{language}-.*")
|
83 |
return current_selected
|
84 |
|
85 |
+
def fetch_available_tasks(results_uri, runs_to_fetch, checkpoint) -> dict[str, dict[str, str]]:
|
86 |
token = os.environ.get(FALLBACK_TOKEN_NAME)
|
87 |
|
88 |
+
data_folder = DataFolder(results_uri, token=token)
|
89 |
all_tasks = defaultdict(lambda: defaultdict(dict))
|
90 |
+
|
91 |
for run in runs_to_fetch:
|
92 |
try:
|
93 |
+
files = data_folder.list_files(f"details/{run}/{checkpoint}", recursive=False)
|
94 |
+
parquet_files = [f.split("/")[-1] for f in files if f.endswith('.parquet')]
|
95 |
|
96 |
for full_filename in parquet_files:
|
97 |
task_name, date_str = full_filename.replace('.parquet', '').rsplit('_', 1)
|
|
|
99 |
|
100 |
if run not in all_tasks[task_name] or date > all_tasks[task_name][run]['date']:
|
101 |
all_tasks[task_name][run] = {'filename': full_filename, 'date': date}
|
102 |
+
except FileNotFoundError:
|
103 |
print(f"Checkpoint not found for run: {run}")
|
104 |
+
|
105 |
+
print(all_tasks)
|
106 |
|
107 |
available_tasks = {
|
108 |
task: {run: info['filename'] for run, info in runs.items()}
|
|
|
112 |
|
113 |
return available_tasks
|
114 |
|
115 |
+
def fetch_run_results(results_uri, runs_to_fetch, checkpoint,
|
116 |
oauth_token: gr.OAuthToken | None = None, progress=gr.Progress()):
|
117 |
|
118 |
+
task_runs_dict = fetch_available_tasks(results_uri, runs_to_fetch, checkpoint)
|
119 |
task_names = list(task_runs_dict.keys())
|
120 |
return gr.update(choices=task_names, value=task_names[0] if task_names else None), task_runs_dict
|
121 |
|
122 |
|
123 |
def render_table(df, selected_runs, metric_names):
|
124 |
if df is None or not selected_runs or not metric_names:
|
125 |
+
return None, "0"
|
126 |
kept_metrics = [f"metric_{metric_name}_{run_name}" for run_name in selected_runs for metric_name in metric_names]
|
127 |
other_metrics = [col for col in df.columns if col.startswith(f"metric_") and col not in kept_metrics]
|
128 |
df = df.drop(columns=other_metrics)
|
|
|
130 |
df = shorten_column_names(df, selected_runs, metric_names)
|
131 |
|
132 |
# Sample 100
|
133 |
+
n_samples = len(df)
|
134 |
df = df.sample(n=min(100, len(df)), random_state=42)
|
135 |
+
return df, n_samples
|
136 |
|
137 |
def get_column_widths(df):
|
138 |
column_widths = []
|
|
|
171 |
return df
|
172 |
|
173 |
|
174 |
+
def load_task_data(results_uri, runs_to_fetch, checkpoint, task_name, tasks_files, progress=gr.Progress()):
|
175 |
token = os.environ.get(FALLBACK_TOKEN_NAME)
|
176 |
if not runs_to_fetch or not task_name:
|
177 |
return None, None, None
|
178 |
|
179 |
+
|
180 |
+
print(runs_to_fetch)
|
181 |
+
|
182 |
+
data_folder = DataFolder(f"filecache::{results_uri}", token=token, cache_storage="./results-cache")
|
183 |
+
print(tasks_files)
|
184 |
+
|
185 |
def fetch_run_file(run_to_fetch):
|
186 |
file_path = f"details/{run_to_fetch}/{checkpoint}/{tasks_files[task_name][run_to_fetch]}"
|
187 |
try:
|
188 |
+
with data_folder.open(file_path, "rb") as f:
|
189 |
+
df = pd.read_parquet(f)
|
190 |
return df, run_to_fetch
|
191 |
+
except FileNotFoundError:
|
192 |
+
print(f"File not found: {tasks_files[task_name][run_to_fetch]}")
|
193 |
return None, run_to_fetch
|
194 |
|
195 |
with ThreadPoolExecutor() as pool:
|
|
|
252 |
# Join all prepared DataFrames
|
253 |
for df, run_name in zip(dfs, run_names):
|
254 |
prepared_df = prepare_df(df, run_name, task_type)
|
255 |
+
combined_df = combined_df.join(prepared_df, how='outer')
|
256 |
|
257 |
|
258 |
available_metrics = list(set("_".join(col.split('_')[1:-1]) for col in combined_df.columns if col.startswith("metric_")))
|
|
|
266 |
results_df_full = gr.State(None)
|
267 |
tasks_files = gr.State({})
|
268 |
login_button = gr.LoginButton(visible=False)
|
269 |
+
results_uri = gr.Textbox(label="Results URI", value="s3://fineweb-multilingual-v1/evals/test/", visible=True)
|
270 |
with gr.Column():
|
271 |
gr.Markdown("# FineWeb experiments results explorer")
|
272 |
with gr.Row():
|
|
|
284 |
task_name = gr.Dropdown(choices=[], interactive=True, label="Task name")
|
285 |
metric_names = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="Metric")
|
286 |
results_df = gr.Dataframe(interactive=False, wrap=True)
|
287 |
+
with gr.Row():
|
288 |
+
with gr.Column():
|
289 |
+
num_samples = gr.Text(interactive=False, label="# Samples")
|
290 |
|
291 |
# Run selection
|
292 |
gr.on(
|
293 |
+
triggers=[results_uri.change],
|
294 |
+
fn=fetch_repo_structure, inputs=[results_uri], outputs=[runs_checkpoints, selected_runs],
|
295 |
)
|
296 |
gr.on(
|
297 |
triggers=[select_by_regex_button.click],
|
|
|
316 |
gr.on(
|
317 |
triggers=[fetch_res.click],
|
318 |
fn=fetch_run_results,
|
319 |
+
inputs=[results_uri, selected_runs, checkpoint],
|
320 |
outputs=[task_name, tasks_files]
|
321 |
).then(
|
322 |
fn=load_task_data,
|
323 |
+
inputs=[results_uri, selected_runs, checkpoint, task_name, tasks_files],
|
324 |
outputs=[results_df_full, metric_names]
|
325 |
).then(
|
326 |
fn=render_table,
|
327 |
inputs=[results_df_full, selected_runs, metric_names],
|
328 |
+
outputs=[results_df, num_samples]
|
329 |
)
|
330 |
|
331 |
# Update results when task name or metric changes
|
332 |
gr.on(
|
333 |
triggers=[task_name.input],
|
334 |
fn=load_task_data,
|
335 |
+
inputs=[results_uri, selected_runs, checkpoint, task_name, tasks_files],
|
336 |
outputs=[results_df_full, metric_names]
|
337 |
).then(
|
338 |
fn=render_table,
|
339 |
inputs=[results_df_full, selected_runs, metric_names],
|
340 |
+
outputs=[results_df, num_samples]
|
341 |
)
|
342 |
|
343 |
gr.on(
|
344 |
triggers=[metric_names.input],
|
345 |
fn=render_table,
|
346 |
inputs=[results_df_full, selected_runs, metric_names],
|
347 |
+
outputs=[results_df, num_samples]
|
348 |
)
|
349 |
|
350 |
+
demo.load(fn=fetch_repo_structure, inputs=[results_uri], outputs=[runs_checkpoints, selected_runs])
|
351 |
|
352 |
demo.launch()
|