lm1-misc-pile / 1b58b88b8 /1b58b88b8pile /evaluation /rankeval /lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-13-57-09_4shots.csv
Muennighoff's picture
Add
46fa37e
raw
history blame
1.07 kB
task,metric,value,err,version
anli_r1,acc,0.358,0.015167928865407557,0
anli_r2,acc,0.352,0.015110404505648668,0
anli_r3,acc,0.33666666666666667,0.013647602942406394,0
arc_challenge,acc,0.20648464163822525,0.011828865619002316,0
arc_challenge,acc_norm,0.24658703071672355,0.012595726268790127,0
arc_easy,acc,0.45580808080808083,0.010219631763437851,0
arc_easy,acc_norm,0.4292929292929293,0.01015667807591109,0
boolq,acc,0.5663608562691131,0.008667690464344678,1
cb,acc,0.42857142857142855,0.06672848092813058,1
cb,f1,0.27354497354497354,,1
copa,acc,0.66,0.04760952285695237,0
hellaswag,acc,0.28450507866958774,0.004502563079349396,0
hellaswag,acc_norm,0.308105954989046,0.004607669909914983,0
piqa,acc,0.6158868335146899,0.011348160741479145,0
piqa,acc_norm,0.6039173014145811,0.011411089031912477,0
rte,acc,0.49097472924187724,0.030091559826331334,0
sciq,acc,0.797,0.012726073744598288,0
sciq,acc_norm,0.763,0.013454070462577962,0
storycloze_2016,acc,0.5708177445216461,0.011445869980247976,0
winogrande,acc,0.489344909234412,0.014049294536290403,0