task,metric,value,err,version anli_r1,acc,0.343,0.015019206922356951,0 anli_r2,acc,0.339,0.014976758771620349,0 anli_r3,acc,0.33416666666666667,0.013622434813136774,0 arc_challenge,acc,0.28924914675767915,0.013250012579393443,0 arc_challenge,acc_norm,0.310580204778157,0.013522292098053059,0 arc_easy,acc,0.6077441077441077,0.010018744689650043,0 arc_easy,acc_norm,0.6026936026936027,0.010041053078884286,0 boolq,acc,0.5529051987767584,0.008695963064172717,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.30617283950617286,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4419438358892651,0.004956030970911519,0 hellaswag,acc_norm,0.5717984465245967,0.004938068627349502,0 piqa,acc,0.7295973884657236,0.010363167031620784,0 piqa,acc_norm,0.735038084874864,0.010296557993316042,0 rte,acc,0.4404332129963899,0.029882123363118726,0 sciq,acc,0.914,0.008870325962594766,0 sciq,acc_norm,0.908,0.009144376393151108,0 storycloze_2016,acc,0.6862640299305185,0.01073017911931762,0 winogrande,acc,0.5382794001578532,0.014011242594964115,0