task,metric,value,err,version anli_r1,acc,0.308,0.014606483127342765,0 anli_r2,acc,0.346,0.015050266127564443,0 anli_r3,acc,0.3233333333333333,0.013508372867300224,0 arc_challenge,acc,0.24658703071672355,0.012595726268790125,0 arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 arc_easy,acc,0.5526094276094277,0.010202832385415646,0 arc_easy,acc_norm,0.5484006734006734,0.010211600726405236,0 boolq,acc,0.4932721712538226,0.008744263273827433,1 cb,acc,0.32142857142857145,0.06297362289056341,1 cb,f1,0.23303167420814477,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.3419637522405895,0.004733980470799225,0 hellaswag,acc_norm,0.4202350129456284,0.004925877705771198,0 piqa,acc,0.676278563656148,0.010916765010708781,0 piqa,acc_norm,0.675734494015234,0.010921539041347985,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.891,0.009859828407037188,0 sciq,acc_norm,0.884,0.010131468138757005,0 storycloze_2016,acc,0.6435061464457509,0.011075964871050996,0 winogrande,acc,0.516179952644041,0.0140451261309786,0