task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.353,0.015120172605483685,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.23037542662116042,0.01230492841874761,0 arc_challenge,acc_norm,0.2593856655290102,0.012808273573927102,0 arc_easy,acc,0.5547138047138047,0.010198171137873871,0 arc_easy,acc_norm,0.5303030303030303,0.01024092360872654,0 boolq,acc,0.5146788990825688,0.00874128556866792,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.3383267705301604,,1 copa,acc,0.67,0.047258156262526066,0 hellaswag,acc,0.33499302927703645,0.004710234188047354,0 hellaswag,acc_norm,0.3973312089225254,0.0048834551889089675,0 piqa,acc,0.6692056583242655,0.010977520584714413,0 piqa,acc_norm,0.6637649619151251,0.011022346708970236,0 rte,acc,0.4548736462093863,0.029973636495415252,0 sciq,acc,0.888,0.009977753031397247,0 sciq,acc_norm,0.883,0.010169287802713329,0 storycloze_2016,acc,0.623730625334046,0.011202815067213618,0 winogrande,acc,0.5027624309392266,0.014052271211616429,0