hynky HF staff commited on
Commit
e2f5761
·
1 Parent(s): b355b72

support new format of lighteval output

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -90,8 +90,9 @@ def fetch_available_tasks(results_uri, runs_to_fetch, checkpoint) -> dict[str, d
90
 
91
  for run in runs_to_fetch:
92
  try:
93
- files = data_folder.list_files(f"details/{run}/{checkpoint}", recursive=False)
94
- parquet_files = [f.split("/")[-1] for f in files if f.endswith('.parquet')]
 
95
 
96
  for full_filename in parquet_files:
97
  task_name, date_str = full_filename.replace('.parquet', '').rsplit('_', 1)
@@ -102,7 +103,6 @@ def fetch_available_tasks(results_uri, runs_to_fetch, checkpoint) -> dict[str, d
102
  except FileNotFoundError:
103
  print(f"Checkpoint not found for run: {run}")
104
 
105
- print(all_tasks)
106
 
107
  available_tasks = {
108
  task: {run: info['filename'] for run, info in runs.items()}
@@ -177,10 +177,8 @@ def load_task_data(results_uri, runs_to_fetch, checkpoint, task_name, tasks_file
177
  return None, None, None
178
 
179
 
180
- print(runs_to_fetch)
181
 
182
  data_folder = DataFolder(f"filecache::{results_uri}", token=token, cache_storage="./results-cache")
183
- print(tasks_files)
184
 
185
  def fetch_run_file(run_to_fetch):
186
  file_path = f"details/{run_to_fetch}/{checkpoint}/{tasks_files[task_name][run_to_fetch]}"
@@ -233,8 +231,12 @@ def load_task_data(results_uri, runs_to_fetch, checkpoint, task_name, tasks_file
233
  # For some reason some metrics are stored as strings
234
  metrics = df['metrics']
235
  # Assume all metrics are the same
236
- for metric_key in metrics[0].keys():
237
- prepared_df[f'metric_{metric_key}_{run_name}'] = [metric[metric_key] for metric in metrics]
 
 
 
 
238
  return prepared_df.set_index('full_prompt')
239
 
240
  def get_gold_label(df, task_type):
 
90
 
91
  for run in runs_to_fetch:
92
  try:
93
+ details_folder = f"details/{run}/{checkpoint}"
94
+ files = data_folder.list_files(details_folder, recursive=True)
95
+ parquet_files = [f.removeprefix(details_folder + "/") for f in files if f.endswith('.parquet')]
96
 
97
  for full_filename in parquet_files:
98
  task_name, date_str = full_filename.replace('.parquet', '').rsplit('_', 1)
 
103
  except FileNotFoundError:
104
  print(f"Checkpoint not found for run: {run}")
105
 
 
106
 
107
  available_tasks = {
108
  task: {run: info['filename'] for run, info in runs.items()}
 
177
  return None, None, None
178
 
179
 
 
180
 
181
  data_folder = DataFolder(f"filecache::{results_uri}", token=token, cache_storage="./results-cache")
 
182
 
183
  def fetch_run_file(run_to_fetch):
184
  file_path = f"details/{run_to_fetch}/{checkpoint}/{tasks_files[task_name][run_to_fetch]}"
 
231
  # For some reason some metrics are stored as strings
232
  metrics = df['metrics']
233
  # Assume all metrics are the same
234
+ available_metrics = set(metric for row_metrics in metrics for metric in row_metrics)
235
+ for metric_key in available_metrics:
236
+ prepared_df[f'metric_{metric_key}_{run_name}'] = [metric.get(metric_key, None) for metric in metrics]
237
+
238
+ # Merge rows with the same full_prompt
239
+ prepared_df = prepared_df.groupby('full_prompt').agg(lambda x: next((item for item in x if item is not None), None)).reset_index()
240
  return prepared_df.set_index('full_prompt')
241
 
242
  def get_gold_label(df, task_type):