|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811488,0
|
|
anli_r2,acc,0.316,0.014709193056057118,0
|
|
anli_r3,acc,0.3308333333333333,0.013588208070708995,0
|
|
arc_challenge,acc,0.27303754266211605,0.013019332762635746,0
|
|
arc_challenge,acc_norm,0.29436860068259385,0.013318528460539426,0
|
|
arc_easy,acc,0.6136363636363636,0.009991296778159617,0
|
|
arc_easy,acc_norm,0.5812289562289562,0.010123487160167813,0
|
|
boolq,acc,0.5703363914373089,0.00865809540849789,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.28154851684263454,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.4366660027882892,0.004949589567678892,0
|
|
hellaswag,acc_norm,0.5635331607249552,0.0049493353568818635,0
|
|
piqa,acc,0.7181719260065288,0.010496675231258166,0
|
|
piqa,acc_norm,0.7132752992383025,0.01055131450310808,0
|
|
rte,acc,0.5342960288808665,0.030025579819366426,0
|
|
sciq,acc,0.88,0.010281328012747386,0
|
|
sciq,acc_norm,0.863,0.010878848714333318,0
|
|
storycloze_2016,acc,0.7151256012827365,0.01043751398661171,0
|
|
winogrande,acc,0.5808997632202052,0.013867325192210117,0
|
|
|