lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/rankeval
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-17-05-36_5shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.341, | |
"acc_stderr": 0.014998131348402697 | |
}, | |
"anli_r2": { | |
"acc": 0.349, | |
"acc_stderr": 0.0150806639915631 | |
}, | |
"anli_r3": { | |
"acc": 0.35333333333333333, | |
"acc_stderr": 0.01380457216231493 | |
}, | |
"cb": { | |
"acc": 0.5357142857142857, | |
"acc_stderr": 0.06724777654937658, | |
"f1": 0.3364632237871675 | |
}, | |
"copa": { | |
"acc": 0.64, | |
"acc_stderr": 0.04824181513244218 | |
}, | |
"hellaswag": { | |
"acc": 0.2863971320454093, | |
"acc_stderr": 0.0045115330394062255, | |
"acc_norm": 0.30940051782513445, | |
"acc_norm_stderr": 0.004613018101185303 | |
}, | |
"rte": { | |
"acc": 0.49458483754512633, | |
"acc_stderr": 0.030094698123239966 | |
}, | |
"winogrande": { | |
"acc": 0.5138121546961326, | |
"acc_stderr": 0.014047122916440419 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5686798503474078, | |
"acc_stderr": 0.011452834647978421 | |
}, | |
"boolq": { | |
"acc": 0.5669724770642202, | |
"acc_stderr": 0.008666251305518052 | |
}, | |
"arc_easy": { | |
"acc": 0.45580808080808083, | |
"acc_stderr": 0.01021963176343785, | |
"acc_norm": 0.43434343434343436, | |
"acc_norm_stderr": 0.010170943451269428 | |
}, | |
"arc_challenge": { | |
"acc": 0.2098976109215017, | |
"acc_stderr": 0.011900548748047442, | |
"acc_norm": 0.24658703071672355, | |
"acc_norm_stderr": 0.012595726268790136 | |
}, | |
"sciq": { | |
"acc": 0.806, | |
"acc_stderr": 0.012510816141264374, | |
"acc_norm": 0.783, | |
"acc_norm_stderr": 0.01304151375727071 | |
}, | |
"piqa": { | |
"acc": 0.6169749727965179, | |
"acc_stderr": 0.011342081709082855, | |
"acc_norm": 0.6104461371055495, | |
"acc_norm_stderr": 0.011377657455130238 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |