task,metric,value,err,version anli_r1,acc,0.336,0.014944140233795021,0 anli_r2,acc,0.332,0.014899597242811475,0 anli_r3,acc,0.3383333333333333,0.013664144006618268,0 arc_challenge,acc,0.28071672354948807,0.013131238126975576,0 arc_challenge,acc_norm,0.3003412969283277,0.013395909309956995,0 arc_easy,acc,0.6085858585858586,0.01001491753262781,0 arc_easy,acc_norm,0.5909090909090909,0.010088775152615786,0 boolq,acc,0.5779816513761468,0.008638040428462952,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.30810810810810807,,1 copa,acc,0.8,0.04020151261036845,0 hellaswag,acc,0.43198566022704643,0.004943400892881053,0 hellaswag,acc_norm,0.5660227046405099,0.004946089230153026,0 piqa,acc,0.7187159956474428,0.010490509832327423,0 piqa,acc_norm,0.7165397170837867,0.010515057791152041,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.895,0.009698921026024961,0 sciq,acc_norm,0.878,0.010354864712936694,0 storycloze_2016,acc,0.7097808658471406,0.010495529690730063,0 winogrande,acc,0.5730071033938438,0.013901878072575055,0