task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928367,0 anli_r2,acc,0.318,0.014734079309311901,0 anli_r3,acc,0.34,0.013680495725767797,0 arc_challenge,acc,0.24744027303754265,0.01261035266329267,0 arc_challenge,acc_norm,0.27986348122866894,0.013119040897725923,0 arc_easy,acc,0.5597643097643098,0.010186228624515655,0 arc_easy,acc_norm,0.5446127946127947,0.010218861787618721,0 boolq,acc,0.4996941896024465,0.008745053340723166,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.3421052631578947,,1 copa,acc,0.69,0.04648231987117316,0 hellaswag,acc,0.3619796853216491,0.004795908282584554,0 hellaswag,acc_norm,0.4466241784505079,0.004961268387512966,0 piqa,acc,0.6887921653971708,0.01080226387804584,0 piqa,acc_norm,0.6860718171926007,0.010827928134189646,0 rte,acc,0.5270758122743683,0.0300523034631437,0 sciq,acc,0.893,0.009779910359847167,0 sciq,acc_norm,0.89,0.009899393819724439,0 storycloze_2016,acc,0.6509887760555852,0.011022640519108541,0 winogrande,acc,0.5374901341752171,0.014012928183336574,0