lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/rankeval
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-13-57-09_3shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.321,0.014770821817934645,0 | |
anli_r2,acc,0.35,0.015090650341444233,0 | |
anli_r3,acc,0.3433333333333333,0.01371263383046586,0 | |
arc_challenge,acc,0.20477815699658702,0.011792544338513409,0 | |
arc_challenge,acc_norm,0.24914675767918087,0.012639407111926433,0 | |
arc_easy,acc,0.45286195286195285,0.010214087372211392,0 | |
arc_easy,acc_norm,0.4276094276094276,0.010151683397430679,0 | |
boolq,acc,0.5629969418960244,0.008675365793227086,1 | |
cb,acc,0.5714285714285714,0.06672848092813058,1 | |
cb,f1,0.3720462543991956,,1 | |
copa,acc,0.64,0.04824181513244218,0 | |
hellaswag,acc,0.28560047799243177,0.004507768029590114,0 | |
hellaswag,acc_norm,0.30860386377215693,0.004609731925736893,0 | |
piqa,acc,0.6175190424374319,0.011339019654272345,0 | |
piqa,acc_norm,0.6164309031556039,0.01134512873411628,0 | |
rte,acc,0.5270758122743683,0.030052303463143706,0 | |
sciq,acc,0.793,0.012818553557843981,0 | |
sciq,acc_norm,0.76,0.013512312258920845,0 | |
storycloze_2016,acc,0.5665419561731694,0.011459581799087402,0 | |
winogrande,acc,0.5209155485398579,0.01404018549421294,0 | |