lm1-misc-pile
/
1b566b66b
/1b566b66bpile
/evaluation
/lm1-1b5-66b-results_lm-eval_global_step125429_2023-01-24-13-53-33_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.325,0.014818724459095524,0 | |
anli_r2,acc,0.345,0.015039986742055237,0 | |
anli_r3,acc,0.33416666666666667,0.013622434813136764,0 | |
arc_challenge,acc,0.25597269624573377,0.012753013241244521,0 | |
arc_challenge,acc_norm,0.28498293515358364,0.013191348179838793,0 | |
arc_easy,acc,0.5787037037037037,0.010131882498193134,0 | |
arc_easy,acc_norm,0.5854377104377104,0.010108889212447783,0 | |
boolq,acc,0.5831804281345566,0.008623192108843677,1 | |
cb,acc,0.375,0.06527912098338669,1 | |
cb,f1,0.26400759734093066,,1 | |
copa,acc,0.73,0.044619604333847394,0 | |
hellaswag,acc,0.3670583549093806,0.004810175357871108,0 | |
hellaswag,acc_norm,0.458972316271659,0.004972954732733358,0 | |
piqa,acc,0.6871599564744287,0.010817714425701112,0 | |
piqa,acc_norm,0.6953210010881393,0.010738889044325161,0 | |
rte,acc,0.5306859205776173,0.030039730592197816,0 | |
sciq,acc,0.914,0.008870325962594766,0 | |
sciq,acc_norm,0.912,0.008963053962592074,0 | |
storycloze_2016,acc,0.6584714056654195,0.010966327011405767,0 | |
winogrande,acc,0.55327545382794,0.0139724883716167,0 | |