task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811488,0 anli_r2,acc,0.316,0.014709193056057118,0 anli_r3,acc,0.3308333333333333,0.013588208070708995,0 arc_challenge,acc,0.27303754266211605,0.013019332762635746,0 arc_challenge,acc_norm,0.29436860068259385,0.013318528460539426,0 arc_easy,acc,0.6136363636363636,0.009991296778159617,0 arc_easy,acc_norm,0.5812289562289562,0.010123487160167813,0 boolq,acc,0.5703363914373089,0.00865809540849789,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.28154851684263454,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.4366660027882892,0.004949589567678892,0 hellaswag,acc_norm,0.5635331607249552,0.0049493353568818635,0 piqa,acc,0.7181719260065288,0.010496675231258166,0 piqa,acc_norm,0.7132752992383025,0.01055131450310808,0 rte,acc,0.5342960288808665,0.030025579819366426,0 sciq,acc,0.88,0.010281328012747386,0 sciq,acc_norm,0.863,0.010878848714333318,0 storycloze_2016,acc,0.7151256012827365,0.01043751398661171,0 winogrande,acc,0.5808997632202052,0.013867325192210117,0