lm1-misc-pile / 1b566b66b /1b566b66bpile /evaluation /lm1-1b5-66b-results_lm-eval_global_step125429_2023-01-24-13-53-33_2shots.csv
Muennighoff's picture
Add
46fa37e
task,metric,value,err,version
anli_r1,acc,0.325,0.014818724459095524,0
anli_r2,acc,0.345,0.015039986742055237,0
anli_r3,acc,0.33416666666666667,0.013622434813136764,0
arc_challenge,acc,0.25597269624573377,0.012753013241244521,0
arc_challenge,acc_norm,0.28498293515358364,0.013191348179838793,0
arc_easy,acc,0.5787037037037037,0.010131882498193134,0
arc_easy,acc_norm,0.5854377104377104,0.010108889212447783,0
boolq,acc,0.5831804281345566,0.008623192108843677,1
cb,acc,0.375,0.06527912098338669,1
cb,f1,0.26400759734093066,,1
copa,acc,0.73,0.044619604333847394,0
hellaswag,acc,0.3670583549093806,0.004810175357871108,0
hellaswag,acc_norm,0.458972316271659,0.004972954732733358,0
piqa,acc,0.6871599564744287,0.010817714425701112,0
piqa,acc_norm,0.6953210010881393,0.010738889044325161,0
rte,acc,0.5306859205776173,0.030039730592197816,0
sciq,acc,0.914,0.008870325962594766,0
sciq,acc_norm,0.912,0.008963053962592074,0
storycloze_2016,acc,0.6584714056654195,0.010966327011405767,0
winogrande,acc,0.55327545382794,0.0139724883716167,0