lm1-misc-pile / 8b712b12b /evaluation /rankeval /lm1-8b7-12b-results_lm-eval_global_step5494_2023-01-24-13-57-43_4shots.json
Muennighoff's picture
Add
66cf324
raw
history blame
2.47 kB
{
"results": {
"anli_r1": {
"acc": 0.351,
"acc_stderr": 0.015100563798316407
},
"anli_r2": {
"acc": 0.357,
"acc_stderr": 0.015158521721486774
},
"anli_r3": {
"acc": 0.3433333333333333,
"acc_stderr": 0.01371263383046586
},
"cb": {
"acc": 0.42857142857142855,
"acc_stderr": 0.06672848092813058,
"f1": 0.27903469079939663
},
"copa": {
"acc": 0.66,
"acc_stderr": 0.04760952285695237
},
"hellaswag": {
"acc": 0.2913762198765186,
"acc_stderr": 0.0045346777501027325,
"acc_norm": 0.3185620394343756,
"acc_norm_stderr": 0.004649665273890652
},
"rte": {
"acc": 0.47653429602888087,
"acc_stderr": 0.03006330041190266
},
"winogrande": {
"acc": 0.49171270718232046,
"acc_stderr": 0.014050555322824194
},
"storycloze_2016": {
"acc": 0.5702832709780866,
"acc_stderr": 0.011447631560863357
},
"boolq": {
"acc": 0.5119266055045871,
"acc_stderr": 0.008742566760633423
},
"arc_easy": {
"acc": 0.45707070707070707,
"acc_stderr": 0.010221897564256047,
"acc_norm": 0.42424242424242425,
"acc_norm_stderr": 0.010141333654958574
},
"arc_challenge": {
"acc": 0.1962457337883959,
"acc_stderr": 0.011606019881416288,
"acc_norm": 0.23208191126279865,
"acc_norm_stderr": 0.012336718284948854
},
"sciq": {
"acc": 0.785,
"acc_stderr": 0.01299784381903181,
"acc_norm": 0.769,
"acc_norm_stderr": 0.013334797216936433
},
"piqa": {
"acc": 0.6398258977149075,
"acc_stderr": 0.011200375176667474,
"acc_norm": 0.6294885745375408,
"acc_norm_stderr": 0.011267826475447662
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}