lm1-misc-pile / 574m174b174b /evaluation /rankeval /lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-13-57-05_3shots.csv
Muennighoff's picture
Add
8393ff0
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.327,0.014842213153411242,0
anli_r2,acc,0.353,0.01512017260548369,0
anli_r3,acc,0.3408333333333333,0.013688600793296939,0
arc_challenge,acc,0.23464163822525597,0.012383873560768676,0
arc_challenge,acc_norm,0.27474402730375425,0.013044617212771227,0
arc_easy,acc,0.5563973063973064,0.010194308914521135,0
arc_easy,acc_norm,0.5467171717171717,0.010214901516731604,0
boolq,acc,0.481039755351682,0.008738765179491934,1
cb,acc,0.5,0.06741998624632421,1
cb,f1,0.42649620505163616,,1
copa,acc,0.68,0.046882617226215034,0
hellaswag,acc,0.3430591515634336,0.004737608340163389,0
hellaswag,acc_norm,0.4219279028082055,0.004928578106026371,0
piqa,acc,0.6789989118607181,0.010892641574707899,0
piqa,acc_norm,0.6800870511425462,0.010882873582092062,0
rte,acc,0.5342960288808665,0.030025579819366426,0
sciq,acc,0.889,0.009938701010583726,0
sciq,acc_norm,0.883,0.010169287802713329,0
storycloze_2016,acc,0.6349545697487974,0.011133301783914869,0
winogrande,acc,0.5098658247829518,0.014049749833367592,0