lm1-misc-pile
/
1b12b12b
/1b112b12bpile
/evaluation
/rankeval
/lm1-1b1-12b-results_lm-eval_global_step23189_2023-01-24-13-57-09_3shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.334,0.01492201952373296,0 | |
anli_r2,acc,0.364,0.015222868840522022,0 | |
anli_r3,acc,0.33,0.013579531277800918,0 | |
arc_challenge,acc,0.19368600682593856,0.01154842540997854,0 | |
arc_challenge,acc_norm,0.24744027303754265,0.01261035266329267,0 | |
arc_easy,acc,0.4739057239057239,0.010245801990240049,0 | |
arc_easy,acc_norm,0.4457070707070707,0.010199118183322996,0 | |
boolq,acc,0.4859327217125382,0.008741593202770596,1 | |
cb,acc,0.4107142857142857,0.06633634150359541,1 | |
cb,f1,0.2891156462585034,,1 | |
copa,acc,0.67,0.04725815626252607,0 | |
hellaswag,acc,0.29555865365465045,0.004553609405747219,0 | |
hellaswag,acc_norm,0.3260306711810396,0.004678006403691723,0 | |
piqa,acc,0.6501632208922742,0.011127288644632843,0 | |
piqa,acc_norm,0.6501632208922742,0.011127288644632841,0 | |
rte,acc,0.5234657039711191,0.03006330041190266,0 | |
sciq,acc,0.822,0.012102167676183594,0 | |
sciq,acc_norm,0.815,0.012285191326386691,0 | |
storycloze_2016,acc,0.5857830037413148,0.011390991756297536,0 | |
winogrande,acc,0.5201262825572218,0.014041096664344329,0 | |