lm1-misc-pile
/
146m14b14b
/evaluation
/rankeval
/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.319,0.014746404865473472,0 | |
anli_r2,acc,0.348,0.01507060460376841,0 | |
anli_r3,acc,0.3416666666666667,0.013696658778002512,0 | |
arc_challenge,acc,0.17406143344709898,0.011080177129482203,0 | |
arc_challenge,acc_norm,0.2090443686006826,0.011882746987406451,0 | |
arc_easy,acc,0.382996632996633,0.00997492038453648,0 | |
arc_easy,acc_norm,0.36363636363636365,0.009870849346011776,0 | |
boolq,acc,0.6039755351681957,0.008553881336813417,1 | |
cb,acc,0.35714285714285715,0.0646095738380922,1 | |
cb,f1,0.23561507936507933,,1 | |
copa,acc,0.61,0.04902071300001975,0 | |
hellaswag,acc,0.2704640509858594,0.004432917403755055,0 | |
hellaswag,acc_norm,0.27853017327225654,0.004473595650807676,0 | |
piqa,acc,0.5957562568008705,0.011449891763007466,0 | |
piqa,acc_norm,0.588683351468988,0.011480860577192815,0 | |
rte,acc,0.5342960288808665,0.030025579819366426,0 | |
sciq,acc,0.718,0.014236526215291334,0 | |
sciq,acc_norm,0.694,0.014580006055436965,0 | |
storycloze_2016,acc,0.5483698556921432,0.011508201145928354,0 | |
winogrande,acc,0.5232833464877664,0.014037241309573642,0 | |