lm1-misc-pile / 146m14b14b /evaluation /rankeval /lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.csv
Muennighoff's picture
Add
0f79083
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.326,0.014830507204541037,0
anli_r2,acc,0.356,0.01514904265930662,0
anli_r3,acc,0.3475,0.013751753243291852,0
arc_challenge,acc,0.17406143344709898,0.0110801771294822,0
arc_challenge,acc_norm,0.2175767918088737,0.012057262020972504,0
arc_easy,acc,0.39436026936026936,0.010028176038392992,0
arc_easy,acc_norm,0.3573232323232323,0.009833205612463125,0
boolq,acc,0.608868501529052,0.008535239054221164,1
cb,acc,0.4107142857142857,0.0663363415035954,1
cb,f1,0.2134646962233169,,1
copa,acc,0.64,0.04824181513244218,0
hellaswag,acc,0.26598287193786097,0.004409521343140108,0
hellaswag,acc_norm,0.27633937462656843,0.00446272754305589,0
piqa,acc,0.5914036996735582,0.011469240387245143,0
piqa,acc_norm,0.5935799782372143,0.01145968265012358,0
rte,acc,0.5270758122743683,0.030052303463143706,0
sciq,acc,0.71,0.014356395999905689,0
sciq,acc_norm,0.62,0.01535694747779758,0
storycloze_2016,acc,0.5467664350614645,0.011511744771088352,0
winogrande,acc,0.5114443567482242,0.014048804199859329,0