task,metric,value,err,version anli_r1,acc,0.322,0.014782913600996667,0 anli_r2,acc,0.335,0.014933117490932573,0 anli_r3,acc,0.3333333333333333,0.013613950010225606,0 arc_challenge,acc,0.1757679180887372,0.011122850863120485,0 arc_challenge,acc_norm,0.2158703071672355,0.012022975360030663,0 arc_easy,acc,0.380050505050505,0.009960175831493116,0 arc_easy,acc_norm,0.36363636363636365,0.009870849346011767,0 boolq,acc,0.5452599388379205,0.00870915345549762,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.2751322751322751,,1 copa,acc,0.62,0.048783173121456316,0 hellaswag,acc,0.26837283409679347,0.004422070927212535,0 hellaswag,acc_norm,0.27504481179047996,0.004456242601950632,0 piqa,acc,0.5984766050054406,0.01143732437339785,0 piqa,acc_norm,0.5903155603917302,0.011473932007187613,0 rte,acc,0.5415162454873647,0.02999253538537331,0 sciq,acc,0.707,0.014399942998441271,0 sciq,acc_norm,0.659,0.014998131348402697,0 storycloze_2016,acc,0.5451630144307856,0.011515167912227987,0 winogrande,acc,0.5240726124704025,0.014036189665395132,0