lm1-misc-pile
/
146m14b14b
/evaluation
/rankeval
/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.326,0.014830507204541037,0 | |
anli_r2,acc,0.356,0.01514904265930662,0 | |
anli_r3,acc,0.3475,0.013751753243291852,0 | |
arc_challenge,acc,0.17406143344709898,0.0110801771294822,0 | |
arc_challenge,acc_norm,0.2175767918088737,0.012057262020972504,0 | |
arc_easy,acc,0.39436026936026936,0.010028176038392992,0 | |
arc_easy,acc_norm,0.3573232323232323,0.009833205612463125,0 | |
boolq,acc,0.608868501529052,0.008535239054221164,1 | |
cb,acc,0.4107142857142857,0.0663363415035954,1 | |
cb,f1,0.2134646962233169,,1 | |
copa,acc,0.64,0.04824181513244218,0 | |
hellaswag,acc,0.26598287193786097,0.004409521343140108,0 | |
hellaswag,acc_norm,0.27633937462656843,0.00446272754305589,0 | |
piqa,acc,0.5914036996735582,0.011469240387245143,0 | |
piqa,acc_norm,0.5935799782372143,0.01145968265012358,0 | |
rte,acc,0.5270758122743683,0.030052303463143706,0 | |
sciq,acc,0.71,0.014356395999905689,0 | |
sciq,acc_norm,0.62,0.01535694747779758,0 | |
storycloze_2016,acc,0.5467664350614645,0.011511744771088352,0 | |
winogrande,acc,0.5114443567482242,0.014048804199859329,0 | |