lm1-misc-pile / 1b58b88b8 /1b58b88b8pile /evaluation /rankeval /lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-13-57-09_3shots.csv
Muennighoff's picture
Add
46fa37e
raw
history blame
1.07 kB
task,metric,value,err,version
anli_r1,acc,0.321,0.014770821817934645,0
anli_r2,acc,0.35,0.015090650341444233,0
anli_r3,acc,0.3433333333333333,0.01371263383046586,0
arc_challenge,acc,0.20477815699658702,0.011792544338513409,0
arc_challenge,acc_norm,0.24914675767918087,0.012639407111926433,0
arc_easy,acc,0.45286195286195285,0.010214087372211392,0
arc_easy,acc_norm,0.4276094276094276,0.010151683397430679,0
boolq,acc,0.5629969418960244,0.008675365793227086,1
cb,acc,0.5714285714285714,0.06672848092813058,1
cb,f1,0.3720462543991956,,1
copa,acc,0.64,0.04824181513244218,0
hellaswag,acc,0.28560047799243177,0.004507768029590114,0
hellaswag,acc_norm,0.30860386377215693,0.004609731925736893,0
piqa,acc,0.6175190424374319,0.011339019654272345,0
piqa,acc_norm,0.6164309031556039,0.01134512873411628,0
rte,acc,0.5270758122743683,0.030052303463143706,0
sciq,acc,0.793,0.012818553557843981,0
sciq,acc_norm,0.76,0.013512312258920845,0
storycloze_2016,acc,0.5665419561731694,0.011459581799087402,0
winogrande,acc,0.5209155485398579,0.01404018549421294,0