lm1-misc-pile
/
3b92b62b6
/evaluation
/lm1-3b9-26b-results_lm-eval_global_step24424_2023-01-24-13-53-41_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.322,0.014782913600996666,0 | |
anli_r2,acc,0.332,0.014899597242811471,0 | |
anli_r3,acc,0.31583333333333335,0.013424568830356457,0 | |
arc_challenge,acc,0.23720136518771331,0.012430399829260846,0 | |
arc_challenge,acc_norm,0.2551194539249147,0.012739038695202098,0 | |
arc_easy,acc,0.5420875420875421,0.010223371342195902,0 | |
arc_easy,acc_norm,0.5214646464646465,0.010250325159456652,0 | |
boolq,acc,0.5204892966360857,0.008737709345935946,1 | |
cb,acc,0.44642857142857145,0.06703189227942398,1 | |
cb,f1,0.3234429296979417,,1 | |
copa,acc,0.65,0.0479372485441102,0 | |
hellaswag,acc,0.33339972117108146,0.004704645214506539,0 | |
hellaswag,acc_norm,0.39583748257319257,0.004880303863138502,0 | |
piqa,acc,0.6708378672470077,0.010963750414134703,0 | |
piqa,acc_norm,0.6648531011969532,0.011013513128643931,0 | |
rte,acc,0.516245487364621,0.030080573208738064,0 | |
sciq,acc,0.882,0.010206869264381791,0 | |
sciq,acc_norm,0.874,0.010499249222408033,0 | |
storycloze_2016,acc,0.6215927311598076,0.011215325833205824,0 | |
winogrande,acc,0.5067087608524072,0.014051220692330349,0 | |