Muennighoff commited on
Commit
1380fc9
·
verified ·
1 Parent(s): 2fcf697

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -15
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from functools import partial
2
  import json
 
3
 
4
  from datasets import load_dataset
5
  import gradio as gr
@@ -805,7 +806,12 @@ MODELS_TO_SKIP = {
805
  "Severian/nomic", # Copy
806
  }
807
 
808
- EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
 
 
 
 
 
809
 
810
  def add_lang(examples):
811
  if not(examples["eval_language"]):
@@ -837,20 +843,25 @@ def add_task(examples):
837
  examples["mteb_task"] = "Unknown"
838
  return examples
839
 
840
- pbar = tqdm(EXTERNAL_MODELS, desc="Fetching external model results")
841
- for model in pbar:
842
- pbar.set_description(f"Fetching external model results for {model!r}")
843
- ds = load_dataset("mteb/results", model, trust_remote_code=True)
844
- # For local debugging:
845
- #, download_mode='force_redownload', verification_mode="no_checks")
846
- ds = ds.map(add_lang)
847
- ds = ds.map(add_task)
848
- base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))}
849
- # For now only one metric per task - Could add more metrics lateron
850
- for task, metric in TASK_TO_METRIC.items():
851
- ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict()
852
- ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
853
- EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
 
 
 
 
 
854
 
855
  def get_dim_seq_size(model):
856
  filenames = [sib.rfilename for sib in model.siblings]
 
1
  from functools import partial
2
  import json
3
+ import os
4
 
5
  from datasets import load_dataset
6
  import gradio as gr
 
806
  "Severian/nomic", # Copy
807
  }
808
 
809
+
810
+ if os.path.exists("EXTERNAL_MODEL_RESULTS.json"):
811
+ with open("EXTERNAL_MODEL_RESULTS.json") as f:
812
+ EXTERNAL_MODEL_RESULTS = json.load(f)
813
+ else:
814
+ EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
815
 
816
  def add_lang(examples):
817
  if not(examples["eval_language"]):
 
843
  examples["mteb_task"] = "Unknown"
844
  return examples
845
 
846
+ if not(os.path.exists("EXTERNAL_MODEL_RESULTS.json")):
847
+ pbar = tqdm(EXTERNAL_MODELS, desc="Fetching external model results")
848
+ for model in pbar:
849
+ pbar.set_description(f"Fetching external model results for {model!r}")
850
+ ds = load_dataset("mteb/results", model, trust_remote_code=True)
851
+ # For local debugging:
852
+ #, download_mode='force_redownload', verification_mode="no_checks")
853
+ ds = ds.map(add_lang)
854
+ ds = ds.map(add_task)
855
+ base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))}
856
+ # For now only one metric per task - Could add more metrics lateron
857
+ for task, metric in TASK_TO_METRIC.items():
858
+ ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict()
859
+ ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
860
+ EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
861
+
862
+ # Save & cache EXTERNAL_MODEL_RESULTS
863
+ with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
864
+ json.dump(EXTERNAL_MODEL_RESULTS, f)
865
 
866
  def get_dim_seq_size(model):
867
  filenames = [sib.rfilename for sib in model.siblings]