lm1-misc-pile
/
221m60b60b
/evaluation
/lm1-221m-60b-results_lm-eval_global_step115203_2023-01-24-13-53-29_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.313,0.014671272822977885,0 | |
anli_r2,acc,0.334,0.014922019523732963,0 | |
anli_r3,acc,0.33,0.01357953127780092,0 | |
arc_challenge,acc,0.19880546075085323,0.011662850198175543,0 | |
arc_challenge,acc_norm,0.22525597269624573,0.01220783999540732,0 | |
arc_easy,acc,0.45580808080808083,0.01021963176343785,0 | |
arc_easy,acc_norm,0.4377104377104377,0.010179856486006916,0 | |
boolq,acc,0.5140672782874618,0.008741593202770605,1 | |
cb,acc,0.4107142857142857,0.06633634150359541,1 | |
cb,f1,0.24493628437290407,,1 | |
copa,acc,0.64,0.04824181513244218,0 | |
hellaswag,acc,0.2894841665006971,0.004525960965551705,0 | |
hellaswag,acc_norm,0.3194582752439753,0.004653138360948109,0 | |
piqa,acc,0.6360174102285092,0.011225875703487176,0 | |
piqa,acc_norm,0.6316648531011969,0.011254089354334354,0 | |
rte,acc,0.45126353790613716,0.029953149241808946,0 | |
sciq,acc,0.825,0.012021627157731972,0 | |
sciq,acc_norm,0.793,0.012818553557843983,0 | |
storycloze_2016,acc,0.5873864243719936,0.011384472322969045,0 | |
winogrande,acc,0.510655090765588,0.014049294536290393,0 | |