task,metric,value,err,version anli_r1,acc,0.344,0.015029633724408945,0 anli_r2,acc,0.329,0.014865395385928355,0 anli_r3,acc,0.35583333333333333,0.013826518748493322,0 arc_challenge,acc,0.16467576791808874,0.010838369209479231,0 arc_challenge,acc_norm,0.20733788395904437,0.011846905782971364,0 arc_easy,acc,0.3421717171717172,0.009735236771958743,0 arc_easy,acc_norm,0.32365319865319864,0.009600478182273768,0 boolq,acc,0.6128440366972477,0.008519429207594412,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.3235431235431235,,1 copa,acc,0.62,0.048783173121456316,0 hellaswag,acc,0.2658832901812388,0.0044089948686501,0 hellaswag,acc_norm,0.26558454491137223,0.004407413723383407,0 piqa,acc,0.5669205658324266,0.01156086442315138,0 piqa,acc_norm,0.5571273122959739,0.011589430503509102,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.556,0.01571976816340209,0 sciq,acc_norm,0.541,0.015766025737882165,0 storycloze_2016,acc,0.5227151256012827,0.011550494192008948,0 winogrande,acc,0.5011838989739542,0.014052446290529015,0