Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
zdwls
commited on
Commit
·
b12b1dc
1
Parent(s):
a812c3b
init branch
Browse files- app.py +16 -2
- config.yaml +1 -0
app.py
CHANGED
@@ -116,11 +116,20 @@ for model in pbar:
|
|
116 |
ds = ds.map(add_task)
|
117 |
base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))}
|
118 |
# For now only one metric per task - Could add more metrics lateron
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
for task, metric in TASK_TO_METRIC.items():
|
120 |
-
ds_dict = ds.filter(lambda x: (x
|
121 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
122 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
123 |
|
|
|
124 |
# Save & cache EXTERNAL_MODEL_RESULTS
|
125 |
with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
|
126 |
json.dump(EXTERNAL_MODEL_RESULTS, f)
|
@@ -457,6 +466,7 @@ for board, board_config in BOARDS_CONFIG.items():
|
|
457 |
"data": boards_data[board]["data_tasks"][task_category],
|
458 |
"refresh": get_refresh_function(task_category, task_category_list),
|
459 |
"credits": credits,
|
|
|
460 |
})
|
461 |
|
462 |
dataframes = []
|
@@ -612,11 +622,15 @@ with gr.Blocks(css=css) as block:
|
|
612 |
# For updating the 'language' in the URL
|
613 |
item_tab.select(update_url_language, [current_task_language, language_per_task], [current_task_language, language_per_task], trigger_mode="always_last").then(None, [current_task_language], [], js=set_window_url_params)
|
614 |
|
|
|
|
|
|
|
|
|
615 |
with gr.Row():
|
616 |
gr.Markdown(f"""
|
617 |
{item['description']}
|
618 |
|
619 |
-
- **Metric:** {
|
620 |
- **Languages:** {item['language_long'] if 'language_long' in item else item['language']}
|
621 |
{"- **Credits:** " + item['credits'] if ("credits" in item and item["credits"] is not None) else ''}
|
622 |
""")
|
|
|
116 |
ds = ds.map(add_task)
|
117 |
base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))}
|
118 |
# For now only one metric per task - Could add more metrics lateron
|
119 |
+
|
120 |
+
def filter_function(x, task, metric):
|
121 |
+
# This is a hack for the passkey and needle retrieval test, which reports ndcg_at_1 (i.e. accuracy), rather than the ndcg_at_10 that is commonly used for retrieval tasks.
|
122 |
+
if x['mteb_dataset_name'] in ['LEMBNeedleRetrieval', 'LEMBPasskeyRetrieval']:
|
123 |
+
return x["mteb_task"] == task and x['metric'] == 'ndcg_at_1'
|
124 |
+
else:
|
125 |
+
return x["mteb_task"] == task and x["metric"] == metric
|
126 |
+
|
127 |
for task, metric in TASK_TO_METRIC.items():
|
128 |
+
ds_dict = ds.filter(lambda x: filter_function(x, task, metric))["test"].to_dict()
|
129 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
130 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
131 |
|
132 |
+
print("********************hello********************")
|
133 |
# Save & cache EXTERNAL_MODEL_RESULTS
|
134 |
with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
|
135 |
json.dump(EXTERNAL_MODEL_RESULTS, f)
|
|
|
466 |
"data": boards_data[board]["data_tasks"][task_category],
|
467 |
"refresh": get_refresh_function(task_category, task_category_list),
|
468 |
"credits": credits,
|
469 |
+
"metric": board_config.get("metric", None),
|
470 |
})
|
471 |
|
472 |
dataframes = []
|
|
|
622 |
# For updating the 'language' in the URL
|
623 |
item_tab.select(update_url_language, [current_task_language, language_per_task], [current_task_language, language_per_task], trigger_mode="always_last").then(None, [current_task_language], [], js=set_window_url_params)
|
624 |
|
625 |
+
specific_metric = metric
|
626 |
+
if item.get("metric", None) is not None:
|
627 |
+
specific_metric = item['metric']
|
628 |
+
|
629 |
with gr.Row():
|
630 |
gr.Markdown(f"""
|
631 |
{item['description']}
|
632 |
|
633 |
+
- **Metric:** {specific_metric}
|
634 |
- **Languages:** {item['language_long'] if 'language_long' in item else item['language']}
|
635 |
{"- **Credits:** " + item['credits'] if ("credits" in item and item["credits"] is not None) else ''}
|
636 |
""")
|
config.yaml
CHANGED
@@ -301,6 +301,7 @@ boards:
|
|
301 |
icon: "📚"
|
302 |
special_icons: null
|
303 |
credits: "[LongEmbed](https://arxiv.org/abs/2404.12096v2)"
|
|
|
304 |
tasks:
|
305 |
Retrieval:
|
306 |
- LEMBNarrativeQARetrieval
|
|
|
301 |
icon: "📚"
|
302 |
special_icons: null
|
303 |
credits: "[LongEmbed](https://arxiv.org/abs/2404.12096v2)"
|
304 |
+
metric: nDCG@10 (for NarrativeQA, QMSum, SummScreenFD, WikimQA) & nDCG@1 (for passkey and needle)
|
305 |
tasks:
|
306 |
Retrieval:
|
307 |
- LEMBNarrativeQARetrieval
|