task,metric,value,err,version anli_r1,acc,0.318,0.014734079309311901,0 anli_r2,acc,0.338,0.014965960710224473,0 anli_r3,acc,0.3275,0.01355321116725195,0 arc_challenge,acc,0.1621160409556314,0.01077026738079835,0 arc_challenge,acc_norm,0.2090443686006826,0.011882746987406446,0 arc_easy,acc,0.3303872053872054,0.009651430216428194,0 arc_easy,acc_norm,0.3253367003367003,0.00961342770899619,0 boolq,acc,0.5850152905198777,0.008617716361921567,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.33636151875145587,,1 copa,acc,0.55,0.04999999999999999,0 hellaswag,acc,0.26379207329217286,0.00439787247185492,0 hellaswag,acc_norm,0.2671778530173272,0.004415816696303073,0 piqa,acc,0.573993471164309,0.011537375448519445,0 piqa,acc_norm,0.5652883569096845,0.011565943814308855,0 rte,acc,0.49097472924187724,0.030091559826331334,0 sciq,acc,0.547,0.015749255189977586,0 sciq,acc_norm,0.534,0.015782683329937614,0 storycloze_2016,acc,0.5189738107963656,0.011554104174019695,0 winogrande,acc,0.5146014206787688,0.014046492383275837,0