task,metric,value,err,version anli_r1,acc,0.363,0.015213890444671283,0 anli_r2,acc,0.362,0.0152048409129195,0 anli_r3,acc,0.3516666666666667,0.013789711695404806,0 arc_challenge,acc,0.27559726962457337,0.013057169655761838,0 arc_challenge,acc_norm,0.31313993174061433,0.013552671543623501,0 arc_easy,acc,0.6203703703703703,0.009958037725468565,0 arc_easy,acc_norm,0.6085858585858586,0.010014917532627824,0 boolq,acc,0.5162079510703363,0.008740459157499082,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.3340305010893247,,1 copa,acc,0.74,0.04408440022768078,0 hellaswag,acc,0.44064927305317664,0.004954503606471609,0 hellaswag,acc_norm,0.5764787890858395,0.004931065434173691,0 piqa,acc,0.7285092491838956,0.010376251176596135,0 piqa,acc_norm,0.7393906420021763,0.010241826155811632,0 rte,acc,0.44765342960288806,0.029931070362939526,0 sciq,acc,0.91,0.009054390204866444,0 sciq,acc_norm,0.914,0.008870325962594766,0 storycloze_2016,acc,0.6932121859967931,0.010664275190473634,0 winogrande,acc,0.5501183898973955,0.013981711904049732,0