lm1-misc-pile
/
280m5b95b9
/280m5b95b9pile
/evaluation
/rankeval
/lm1-280m-5b9-results_lm-eval_global_step11269_2023-01-24-13-53-29_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.321,0.014770821817934633,0 | |
anli_r2,acc,0.335,0.014933117490932575,0 | |
anli_r3,acc,0.3408333333333333,0.013688600793296936,0 | |
arc_challenge,acc,0.17747440273037543,0.011165138769643973,0 | |
arc_challenge,acc_norm,0.21416382252559726,0.01198838320596649,0 | |
arc_easy,acc,0.37626262626262624,0.00994064622151377,0 | |
arc_easy,acc_norm,0.3648989898989899,0.009878157021155647,0 | |
boolq,acc,0.5568807339449541,0.008688282882073805,1 | |
cb,acc,0.44642857142857145,0.06703189227942398,1 | |
cb,f1,0.29572649572649573,,1 | |
copa,acc,0.62,0.048783173121456316,0 | |
hellaswag,acc,0.2700657239593706,0.004430861033616649,0 | |
hellaswag,acc_norm,0.27394941246763593,0.004450718673552668,0 | |
piqa,acc,0.5935799782372143,0.011459682650123575,0 | |
piqa,acc_norm,0.5854189336235038,0.011494326682255165,0 | |
rte,acc,0.5018050541516246,0.030096267148976626,0 | |
sciq,acc,0.715,0.014282120955200475,0 | |
sciq,acc_norm,0.667,0.014910846164229854,0 | |
storycloze_2016,acc,0.5489043292357029,0.011506993144185261,0 | |
winogrande,acc,0.5185477505919495,0.014042813708888378,0 | |