orionweller commited on
Commit
29be9e3
·
1 Parent(s): 04622fa

cache the model downloading

Browse files
Files changed (1) hide show
  1. refresh.py +16 -8
refresh.py CHANGED
@@ -14,7 +14,7 @@ from utils.model_size import get_model_parameters_memory
14
  from envs import LEADERBOARD_CONFIG, MODEL_META, REPO_ID, RESULTS_REPO, API
15
 
16
 
17
-
18
  TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"]
19
  BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
20
 
@@ -187,6 +187,20 @@ def get_external_model_results():
187
  return EXTERNAL_MODEL_RESULTS
188
 
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
191
  global MODEL_INFOS
192
 
@@ -230,16 +244,10 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
230
  for model in pbar:
231
  if model.modelId in MODELS_TO_SKIP: continue
232
  pbar.set_description(f"Fetching {model.modelId!r} metadata")
233
- try:
234
- readme_path = hf_hub_download(model.modelId, filename="README.md", etag_timeout=30)
235
- except Exception:
236
- print(f"ERROR: Could not fetch metadata for {model.modelId}, trying again")
237
- readme_path = hf_hub_download(model.modelId, filename="README.md", etag_timeout=30)
238
- meta = metadata_load(readme_path)
239
  MODEL_INFOS[model.modelId] = {
240
  "metadata": meta
241
  }
242
- meta = MODEL_INFOS[model.modelId]["metadata"]
243
  if "model-index" not in meta:
244
  continue
245
  # meta['model-index'][0]["results"] is list of elements like:
 
14
  from envs import LEADERBOARD_CONFIG, MODEL_META, REPO_ID, RESULTS_REPO, API
15
 
16
 
17
+ MODEL_CACHE = {}
18
  TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"]
19
  BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
20
 
 
187
  return EXTERNAL_MODEL_RESULTS
188
 
189
 
190
+ def download_or_use_cache(modelId):
191
+ global MODEL_CACHE
192
+ if modelId in MODEL_CACHE:
193
+ return MODEL_CACHE[modelId]
194
+ try:
195
+ readme_path = hf_hub_download(modelId, filename="README.md", etag_timeout=30)
196
+ except Exception:
197
+ print(f"ERROR: Could not fetch metadata for {modelId}, trying again")
198
+ readme_path = hf_hub_download(modelId, filename="README.md", etag_timeout=30)
199
+ meta = metadata_load(readme_path)
200
+ MODEL_CACHE[modelId] = meta
201
+ return meta
202
+
203
+
204
  def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
205
  global MODEL_INFOS
206
 
 
244
  for model in pbar:
245
  if model.modelId in MODELS_TO_SKIP: continue
246
  pbar.set_description(f"Fetching {model.modelId!r} metadata")
247
+ meta = download_or_use_cache(model.modelId)
 
 
 
 
 
248
  MODEL_INFOS[model.modelId] = {
249
  "metadata": meta
250
  }
 
251
  if "model-index" not in meta:
252
  continue
253
  # meta['model-index'][0]["results"] is list of elements like: