lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/rankeval
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-17-05-36_5shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.341,0.014998131348402697,0 | |
anli_r2,acc,0.349,0.0150806639915631,0 | |
anli_r3,acc,0.35333333333333333,0.01380457216231493,0 | |
arc_challenge,acc,0.2098976109215017,0.011900548748047442,0 | |
arc_challenge,acc_norm,0.24658703071672355,0.012595726268790136,0 | |
arc_easy,acc,0.45580808080808083,0.01021963176343785,0 | |
arc_easy,acc_norm,0.43434343434343436,0.010170943451269428,0 | |
boolq,acc,0.5669724770642202,0.008666251305518052,1 | |
cb,acc,0.5357142857142857,0.06724777654937658,1 | |
cb,f1,0.3364632237871675,,1 | |
copa,acc,0.64,0.04824181513244218,0 | |
hellaswag,acc,0.2863971320454093,0.0045115330394062255,0 | |
hellaswag,acc_norm,0.30940051782513445,0.004613018101185303,0 | |
piqa,acc,0.6169749727965179,0.011342081709082855,0 | |
piqa,acc_norm,0.6104461371055495,0.011377657455130238,0 | |
rte,acc,0.49458483754512633,0.030094698123239966,0 | |
sciq,acc,0.806,0.012510816141264374,0 | |
sciq,acc_norm,0.783,0.01304151375727071,0 | |
storycloze_2016,acc,0.5686798503474078,0.011452834647978421,0 | |
winogrande,acc,0.5138121546961326,0.014047122916440419,0 | |