task,metric,value,err,version anli_r1,acc,0.314,0.014683991951087976,0 anli_r2,acc,0.336,0.014944140233795023,0 anli_r3,acc,0.32166666666666666,0.013490095282989521,0 arc_challenge,acc,0.17747440273037543,0.01116513876964396,0 arc_challenge,acc_norm,0.20819112627986347,0.011864866118448069,0 arc_easy,acc,0.3265993265993266,0.009623047038267647,0 arc_easy,acc_norm,0.31734006734006737,0.009550648343947771,0 boolq,acc,0.6021406727828746,0.008560641169303364,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.286512928022362,,1 copa,acc,0.6,0.049236596391733084,0 hellaswag,acc,0.2657837084246166,0.004408468107262735,0 hellaswag,acc_norm,0.2660824536944832,0.00441004753083503,0 piqa,acc,0.5788900979325353,0.01151970105915149,0 piqa,acc_norm,0.5680087051142546,0.01155740721010026,0 rte,acc,0.4729241877256318,0.030052303463143706,0 sciq,acc,0.562,0.01569721001969469,0 sciq,acc_norm,0.552,0.015733516566347833,0 storycloze_2016,acc,0.5173703901656868,0.011555452669106639,0 winogrande,acc,0.5114443567482242,0.014048804199859332,0