lm1-misc-pile
/
8b712b12b
/evaluation
/rankeval
/lm1-8b7-12b-results_lm-eval_global_step5494_2023-01-24-13-57-43_4shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.351, | |
"acc_stderr": 0.015100563798316407 | |
}, | |
"anli_r2": { | |
"acc": 0.357, | |
"acc_stderr": 0.015158521721486774 | |
}, | |
"anli_r3": { | |
"acc": 0.3433333333333333, | |
"acc_stderr": 0.01371263383046586 | |
}, | |
"cb": { | |
"acc": 0.42857142857142855, | |
"acc_stderr": 0.06672848092813058, | |
"f1": 0.27903469079939663 | |
}, | |
"copa": { | |
"acc": 0.66, | |
"acc_stderr": 0.04760952285695237 | |
}, | |
"hellaswag": { | |
"acc": 0.2913762198765186, | |
"acc_stderr": 0.0045346777501027325, | |
"acc_norm": 0.3185620394343756, | |
"acc_norm_stderr": 0.004649665273890652 | |
}, | |
"rte": { | |
"acc": 0.47653429602888087, | |
"acc_stderr": 0.03006330041190266 | |
}, | |
"winogrande": { | |
"acc": 0.49171270718232046, | |
"acc_stderr": 0.014050555322824194 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5702832709780866, | |
"acc_stderr": 0.011447631560863357 | |
}, | |
"boolq": { | |
"acc": 0.5119266055045871, | |
"acc_stderr": 0.008742566760633423 | |
}, | |
"arc_easy": { | |
"acc": 0.45707070707070707, | |
"acc_stderr": 0.010221897564256047, | |
"acc_norm": 0.42424242424242425, | |
"acc_norm_stderr": 0.010141333654958574 | |
}, | |
"arc_challenge": { | |
"acc": 0.1962457337883959, | |
"acc_stderr": 0.011606019881416288, | |
"acc_norm": 0.23208191126279865, | |
"acc_norm_stderr": 0.012336718284948854 | |
}, | |
"sciq": { | |
"acc": 0.785, | |
"acc_stderr": 0.01299784381903181, | |
"acc_norm": 0.769, | |
"acc_norm_stderr": 0.013334797216936433 | |
}, | |
"piqa": { | |
"acc": 0.6398258977149075, | |
"acc_stderr": 0.011200375176667474, | |
"acc_norm": 0.6294885745375408, | |
"acc_norm_stderr": 0.011267826475447662 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |