lm1-misc-pile
/
221m60b60b
/evaluation
/lm1-221m-60b-results_lm-eval_global_step115203_2023-01-24-13-53-29_2shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.313, | |
"acc_stderr": 0.014671272822977885 | |
}, | |
"anli_r2": { | |
"acc": 0.334, | |
"acc_stderr": 0.014922019523732963 | |
}, | |
"anli_r3": { | |
"acc": 0.33, | |
"acc_stderr": 0.01357953127780092 | |
}, | |
"cb": { | |
"acc": 0.4107142857142857, | |
"acc_stderr": 0.06633634150359541, | |
"f1": 0.24493628437290407 | |
}, | |
"copa": { | |
"acc": 0.64, | |
"acc_stderr": 0.04824181513244218 | |
}, | |
"hellaswag": { | |
"acc": 0.2894841665006971, | |
"acc_stderr": 0.004525960965551705, | |
"acc_norm": 0.3194582752439753, | |
"acc_norm_stderr": 0.004653138360948109 | |
}, | |
"rte": { | |
"acc": 0.45126353790613716, | |
"acc_stderr": 0.029953149241808946 | |
}, | |
"winogrande": { | |
"acc": 0.510655090765588, | |
"acc_stderr": 0.014049294536290393 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5873864243719936, | |
"acc_stderr": 0.011384472322969045 | |
}, | |
"boolq": { | |
"acc": 0.5140672782874618, | |
"acc_stderr": 0.008741593202770605 | |
}, | |
"arc_easy": { | |
"acc": 0.45580808080808083, | |
"acc_stderr": 0.01021963176343785, | |
"acc_norm": 0.4377104377104377, | |
"acc_norm_stderr": 0.010179856486006916 | |
}, | |
"arc_challenge": { | |
"acc": 0.19880546075085323, | |
"acc_stderr": 0.011662850198175543, | |
"acc_norm": 0.22525597269624573, | |
"acc_norm_stderr": 0.01220783999540732 | |
}, | |
"sciq": { | |
"acc": 0.825, | |
"acc_stderr": 0.012021627157731972, | |
"acc_norm": 0.793, | |
"acc_norm_stderr": 0.012818553557843983 | |
}, | |
"piqa": { | |
"acc": 0.6360174102285092, | |
"acc_stderr": 0.011225875703487176, | |
"acc_norm": 0.6316648531011969, | |
"acc_norm_stderr": 0.011254089354334354 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |