lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/rankeval
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-13-57-09_4shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.358,0.015167928865407557,0 | |
anli_r2,acc,0.352,0.015110404505648668,0 | |
anli_r3,acc,0.33666666666666667,0.013647602942406394,0 | |
arc_challenge,acc,0.20648464163822525,0.011828865619002316,0 | |
arc_challenge,acc_norm,0.24658703071672355,0.012595726268790127,0 | |
arc_easy,acc,0.45580808080808083,0.010219631763437851,0 | |
arc_easy,acc_norm,0.4292929292929293,0.01015667807591109,0 | |
boolq,acc,0.5663608562691131,0.008667690464344678,1 | |
cb,acc,0.42857142857142855,0.06672848092813058,1 | |
cb,f1,0.27354497354497354,,1 | |
copa,acc,0.66,0.04760952285695237,0 | |
hellaswag,acc,0.28450507866958774,0.004502563079349396,0 | |
hellaswag,acc_norm,0.308105954989046,0.004607669909914983,0 | |
piqa,acc,0.6158868335146899,0.011348160741479145,0 | |
piqa,acc_norm,0.6039173014145811,0.011411089031912477,0 | |
rte,acc,0.49097472924187724,0.030091559826331334,0 | |
sciq,acc,0.797,0.012726073744598288,0 | |
sciq,acc_norm,0.763,0.013454070462577962,0 | |
storycloze_2016,acc,0.5708177445216461,0.011445869980247976,0 | |
winogrande,acc,0.489344909234412,0.014049294536290403,0 | |