lm1-misc-pile
/
220m7b57b5
/evaluation
/lm1-220m-7b5-results_lm-eval_global_step14324_2023-01-24-13-53-29_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.327,0.014842213153411249,0 | |
anli_r2,acc,0.34,0.014987482264363937,0 | |
anli_r3,acc,0.3408333333333333,0.013688600793296934,0 | |
arc_challenge,acc,0.16552901023890784,0.010860860440277693,0 | |
arc_challenge,acc_norm,0.2098976109215017,0.011900548748047446,0 | |
arc_easy,acc,0.40404040404040403,0.010069061649549545,0 | |
arc_easy,acc_norm,0.37542087542087543,0.009936218527114304,0 | |
boolq,acc,0.5908256880733945,0.008599563442397349,1 | |
cb,acc,0.4107142857142857,0.0663363415035954,1 | |
cb,f1,0.29081632653061223,,1 | |
copa,acc,0.62,0.048783173121456316,0 | |
hellaswag,acc,0.2689703246365266,0.004425182676353203,0 | |
hellaswag,acc_norm,0.27763393746265685,0.004469165728600334,0 | |
piqa,acc,0.5908596300326442,0.011471593460443318,0 | |
piqa,acc_norm,0.5859630032644179,0.01149211848141758,0 | |
rte,acc,0.4981949458483754,0.030096267148976633,0 | |
sciq,acc,0.712,0.01432694179723156,0 | |
sciq,acc_norm,0.689,0.014645596385722697,0 | |
storycloze_2016,acc,0.5398182789951897,0.011525709570367516,0 | |
winogrande,acc,0.5201262825572218,0.014041096664344329,0 | |