lm1-misc-pile
/
574m174b174b
/evaluation
/rankeval
/lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-13-57-05_3shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.327,0.014842213153411242,0 | |
anli_r2,acc,0.353,0.01512017260548369,0 | |
anli_r3,acc,0.3408333333333333,0.013688600793296939,0 | |
arc_challenge,acc,0.23464163822525597,0.012383873560768676,0 | |
arc_challenge,acc_norm,0.27474402730375425,0.013044617212771227,0 | |
arc_easy,acc,0.5563973063973064,0.010194308914521135,0 | |
arc_easy,acc_norm,0.5467171717171717,0.010214901516731604,0 | |
boolq,acc,0.481039755351682,0.008738765179491934,1 | |
cb,acc,0.5,0.06741998624632421,1 | |
cb,f1,0.42649620505163616,,1 | |
copa,acc,0.68,0.046882617226215034,0 | |
hellaswag,acc,0.3430591515634336,0.004737608340163389,0 | |
hellaswag,acc_norm,0.4219279028082055,0.004928578106026371,0 | |
piqa,acc,0.6789989118607181,0.010892641574707899,0 | |
piqa,acc_norm,0.6800870511425462,0.010882873582092062,0 | |
rte,acc,0.5342960288808665,0.030025579819366426,0 | |
sciq,acc,0.889,0.009938701010583726,0 | |
sciq,acc_norm,0.883,0.010169287802713329,0 | |
storycloze_2016,acc,0.6349545697487974,0.011133301783914869,0 | |
winogrande,acc,0.5098658247829518,0.014049749833367592,0 | |