task,metric,value,err,version anli_r1,acc,0.327,0.014842213153411242,0 anli_r2,acc,0.353,0.01512017260548369,0 anli_r3,acc,0.3408333333333333,0.013688600793296939,0 arc_challenge,acc,0.23464163822525597,0.012383873560768676,0 arc_challenge,acc_norm,0.27474402730375425,0.013044617212771227,0 arc_easy,acc,0.5563973063973064,0.010194308914521135,0 arc_easy,acc_norm,0.5467171717171717,0.010214901516731604,0 boolq,acc,0.481039755351682,0.008738765179491934,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.42649620505163616,,1 copa,acc,0.68,0.046882617226215034,0 hellaswag,acc,0.3430591515634336,0.004737608340163389,0 hellaswag,acc_norm,0.4219279028082055,0.004928578106026371,0 piqa,acc,0.6789989118607181,0.010892641574707899,0 piqa,acc_norm,0.6800870511425462,0.010882873582092062,0 rte,acc,0.5342960288808665,0.030025579819366426,0 sciq,acc,0.889,0.009938701010583726,0 sciq,acc_norm,0.883,0.010169287802713329,0 storycloze_2016,acc,0.6349545697487974,0.011133301783914869,0 winogrande,acc,0.5098658247829518,0.014049749833367592,0