lm1-misc-pile
/
574m174b174b
/evaluation
/rankeval
/lm1-574m-174b-results_lm-eval_global_step331103_2023-01-22-13-12-25_0shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.326,0.014830507204541033,0 | |
anli_r2,acc,0.337,0.014955087918653609,0 | |
anli_r3,acc,0.345,0.01372842153945488,0 | |
arc_challenge,acc,0.22610921501706485,0.012224202097063283,0 | |
arc_challenge,acc_norm,0.26023890784982934,0.012821930225112564,0 | |
arc_easy,acc,0.5357744107744108,0.010233488709726539,0 | |
arc_easy,acc_norm,0.4692760942760943,0.01024039558481524,0 | |
boolq,acc,0.536697247706422,0.008721469450159985,1 | |
cb,acc,0.4107142857142857,0.06633634150359541,1 | |
cb,f1,0.2824582824582824,,1 | |
copa,acc,0.7,0.046056618647183814,0 | |
hellaswag,acc,0.3451503684524995,0.004744456628455116,0 | |
hellaswag,acc_norm,0.41983668591913964,0.004925233680511587,0 | |
piqa,acc,0.6724700761697497,0.010949830482825483,0 | |
piqa,acc_norm,0.6822633297062024,0.01086313324656929,0 | |
rte,acc,0.5306859205776173,0.03003973059219781,0 | |
sciq,acc,0.822,0.012102167676183582,0 | |
sciq,acc_norm,0.75,0.013699915608779773,0 | |
storycloze_2016,acc,0.638161411010155,0.011112247531047543,0 | |
winogrande,acc,0.526440410418311,0.014032823874407224,0 | |