lewtun HF staff commited on
Commit
f29dc9f
·
unverified ·
2 Parent(s): 1161178 1edd506

Merge pull request #33 from huggingface/add-metric-hash

Browse files
Files changed (2) hide show
  1. app.py +1 -0
  2. evaluation.py +11 -2
app.py CHANGED
@@ -433,6 +433,7 @@ with st.form(key="form"):
433
  selected_dataset,
434
  selected_config,
435
  selected_split,
 
436
  )
437
  print("INFO -- Selected models after filter:", selected_models)
438
 
 
433
  selected_dataset,
434
  selected_config,
435
  selected_split,
436
+ selected_metrics,
437
  )
438
  print("INFO -- Selected models after filter:", selected_models)
439
 
evaluation.py CHANGED
@@ -12,12 +12,17 @@ class EvaluationInfo:
12
  dataset_name: str
13
  dataset_config: str
14
  dataset_split: str
 
15
 
16
 
17
  def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
18
  if dataset_info.cardData is not None:
19
  metadata = dataset_info.cardData["eval_info"]
20
  metadata.pop("col_mapping", None)
 
 
 
 
21
  evaluation_info = EvaluationInfo(**metadata)
22
  return hash(evaluation_info)
23
  else:
@@ -30,7 +35,7 @@ def get_evaluation_ids():
30
  return [compute_evaluation_id(dset) for dset in evaluation_datasets]
31
 
32
 
33
- def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split):
34
  evaluation_ids = get_evaluation_ids()
35
 
36
  for idx, model in enumerate(models):
@@ -40,10 +45,14 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
40
  dataset_name=dataset_name,
41
  dataset_config=dataset_config,
42
  dataset_split=dataset_split,
 
43
  )
44
  candidate_id = hash(evaluation_info)
45
  if candidate_id in evaluation_ids:
46
- st.info(f"Model `{model}` has already been evaluated on this configuration. Skipping evaluation...")
 
 
 
47
  models.pop(idx)
48
 
49
  return models
 
12
  dataset_name: str
13
  dataset_config: str
14
  dataset_split: str
15
+ metrics: set
16
 
17
 
18
  def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
19
  if dataset_info.cardData is not None:
20
  metadata = dataset_info.cardData["eval_info"]
21
  metadata.pop("col_mapping", None)
22
+ # TODO(lewtun): populate dataset cards with metric info
23
+ if "metrics" not in metadata:
24
+ metadata["metrics"] = frozenset()
25
+ metadata["metrics"] = frozenset(metadata["metrics"])
26
  evaluation_info = EvaluationInfo(**metadata)
27
  return hash(evaluation_info)
28
  else:
 
35
  return [compute_evaluation_id(dset) for dset in evaluation_datasets]
36
 
37
 
38
+ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
39
  evaluation_ids = get_evaluation_ids()
40
 
41
  for idx, model in enumerate(models):
 
45
  dataset_name=dataset_name,
46
  dataset_config=dataset_config,
47
  dataset_split=dataset_split,
48
+ metrics=frozenset(metrics),
49
  )
50
  candidate_id = hash(evaluation_info)
51
  if candidate_id in evaluation_ids:
52
+ st.info(
53
+ f"Model `{model}` has already been evaluated on this configuration. \
54
+ This model will be excluded from the evaluation job..."
55
+ )
56
  models.pop(idx)
57
 
58
  return models