lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/rankeval
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-23-18-55-43_1shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.328, | |
"acc_stderr": 0.014853842487270334 | |
}, | |
"anli_r2": { | |
"acc": 0.328, | |
"acc_stderr": 0.014853842487270338 | |
}, | |
"anli_r3": { | |
"acc": 0.32666666666666666, | |
"acc_stderr": 0.013544340907003665 | |
}, | |
"cb": { | |
"acc": 0.48214285714285715, | |
"acc_stderr": 0.06737697508644648, | |
"f1": 0.3421052631578947 | |
}, | |
"copa": { | |
"acc": 0.64, | |
"acc_stderr": 0.04824181513244218 | |
}, | |
"hellaswag": { | |
"acc": 0.28818960366460866, | |
"acc_stderr": 0.0045199417165083435, | |
"acc_norm": 0.30701055566620195, | |
"acc_norm_stderr": 0.004603111343213066 | |
}, | |
"rte": { | |
"acc": 0.5306859205776173, | |
"acc_stderr": 0.03003973059219781 | |
}, | |
"winogrande": { | |
"acc": 0.5067087608524072, | |
"acc_stderr": 0.014051220692330352 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5660074826296099, | |
"acc_stderr": 0.011461234645182191 | |
}, | |
"boolq": { | |
"acc": 0.5363914373088685, | |
"acc_stderr": 0.008721861424877861 | |
}, | |
"arc_easy": { | |
"acc": 0.4494949494949495, | |
"acc_stderr": 0.010207308833916047, | |
"acc_norm": 0.4208754208754209, | |
"acc_norm_stderr": 0.010130502164066328 | |
}, | |
"arc_challenge": { | |
"acc": 0.197098976109215, | |
"acc_stderr": 0.011625047669880624, | |
"acc_norm": 0.24744027303754265, | |
"acc_norm_stderr": 0.012610352663292673 | |
}, | |
"sciq": { | |
"acc": 0.771, | |
"acc_stderr": 0.013294199326613583, | |
"acc_norm": 0.735, | |
"acc_norm_stderr": 0.013963164754809947 | |
}, | |
"piqa": { | |
"acc": 0.6153427638737758, | |
"acc_stderr": 0.011351177743205584, | |
"acc_norm": 0.6175190424374319, | |
"acc_norm_stderr": 0.011339019654272347 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |