Spaces:
Runtime error
Runtime error
Merge pull request #33 from huggingface/add-metric-hash
Browse files- app.py +1 -0
- evaluation.py +11 -2
app.py
CHANGED
@@ -433,6 +433,7 @@ with st.form(key="form"):
|
|
433 |
selected_dataset,
|
434 |
selected_config,
|
435 |
selected_split,
|
|
|
436 |
)
|
437 |
print("INFO -- Selected models after filter:", selected_models)
|
438 |
|
|
|
433 |
selected_dataset,
|
434 |
selected_config,
|
435 |
selected_split,
|
436 |
+
selected_metrics,
|
437 |
)
|
438 |
print("INFO -- Selected models after filter:", selected_models)
|
439 |
|
evaluation.py
CHANGED
@@ -12,12 +12,17 @@ class EvaluationInfo:
|
|
12 |
dataset_name: str
|
13 |
dataset_config: str
|
14 |
dataset_split: str
|
|
|
15 |
|
16 |
|
17 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
18 |
if dataset_info.cardData is not None:
|
19 |
metadata = dataset_info.cardData["eval_info"]
|
20 |
metadata.pop("col_mapping", None)
|
|
|
|
|
|
|
|
|
21 |
evaluation_info = EvaluationInfo(**metadata)
|
22 |
return hash(evaluation_info)
|
23 |
else:
|
@@ -30,7 +35,7 @@ def get_evaluation_ids():
|
|
30 |
return [compute_evaluation_id(dset) for dset in evaluation_datasets]
|
31 |
|
32 |
|
33 |
-
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split):
|
34 |
evaluation_ids = get_evaluation_ids()
|
35 |
|
36 |
for idx, model in enumerate(models):
|
@@ -40,10 +45,14 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
|
|
40 |
dataset_name=dataset_name,
|
41 |
dataset_config=dataset_config,
|
42 |
dataset_split=dataset_split,
|
|
|
43 |
)
|
44 |
candidate_id = hash(evaluation_info)
|
45 |
if candidate_id in evaluation_ids:
|
46 |
-
st.info(
|
|
|
|
|
|
|
47 |
models.pop(idx)
|
48 |
|
49 |
return models
|
|
|
12 |
dataset_name: str
|
13 |
dataset_config: str
|
14 |
dataset_split: str
|
15 |
+
metrics: set
|
16 |
|
17 |
|
18 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
19 |
if dataset_info.cardData is not None:
|
20 |
metadata = dataset_info.cardData["eval_info"]
|
21 |
metadata.pop("col_mapping", None)
|
22 |
+
# TODO(lewtun): populate dataset cards with metric info
|
23 |
+
if "metrics" not in metadata:
|
24 |
+
metadata["metrics"] = frozenset()
|
25 |
+
metadata["metrics"] = frozenset(metadata["metrics"])
|
26 |
evaluation_info = EvaluationInfo(**metadata)
|
27 |
return hash(evaluation_info)
|
28 |
else:
|
|
|
35 |
return [compute_evaluation_id(dset) for dset in evaluation_datasets]
|
36 |
|
37 |
|
38 |
+
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
39 |
evaluation_ids = get_evaluation_ids()
|
40 |
|
41 |
for idx, model in enumerate(models):
|
|
|
45 |
dataset_name=dataset_name,
|
46 |
dataset_config=dataset_config,
|
47 |
dataset_split=dataset_split,
|
48 |
+
metrics=frozenset(metrics),
|
49 |
)
|
50 |
candidate_id = hash(evaluation_info)
|
51 |
if candidate_id in evaluation_ids:
|
52 |
+
st.info(
|
53 |
+
f"Model `{model}` has already been evaluated on this configuration. \
|
54 |
+
This model will be excluded from the evaluation job..."
|
55 |
+
)
|
56 |
models.pop(idx)
|
57 |
|
58 |
return models
|