lm1-misc-pile / 146m14b14b /evaluation /rankeval /lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.csv
Muennighoff's picture
Add
0f79083
raw
history blame
1.07 kB
task,metric,value,err,version
anli_r1,acc,0.319,0.014746404865473472,0
anli_r2,acc,0.348,0.01507060460376841,0
anli_r3,acc,0.3416666666666667,0.013696658778002512,0
arc_challenge,acc,0.17406143344709898,0.011080177129482203,0
arc_challenge,acc_norm,0.2090443686006826,0.011882746987406451,0
arc_easy,acc,0.382996632996633,0.00997492038453648,0
arc_easy,acc_norm,0.36363636363636365,0.009870849346011776,0
boolq,acc,0.6039755351681957,0.008553881336813417,1
cb,acc,0.35714285714285715,0.0646095738380922,1
cb,f1,0.23561507936507933,,1
copa,acc,0.61,0.04902071300001975,0
hellaswag,acc,0.2704640509858594,0.004432917403755055,0
hellaswag,acc_norm,0.27853017327225654,0.004473595650807676,0
piqa,acc,0.5957562568008705,0.011449891763007466,0
piqa,acc_norm,0.588683351468988,0.011480860577192815,0
rte,acc,0.5342960288808665,0.030025579819366426,0
sciq,acc,0.718,0.014236526215291334,0
sciq,acc_norm,0.694,0.014580006055436965,0
storycloze_2016,acc,0.5483698556921432,0.011508201145928354,0
winogrande,acc,0.5232833464877664,0.014037241309573642,0