lm1-misc-pile
/
574m174b174b
/evaluation
/rankeval
/lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-13-57-05_4shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.324,0.01480686473373886,0 | |
anli_r2,acc,0.365,0.015231776226264914,0 | |
anli_r3,acc,0.3258333333333333,0.013535422043417462,0 | |
arc_challenge,acc,0.2551194539249147,0.012739038695202104,0 | |
arc_challenge,acc_norm,0.2764505119453925,0.013069662474252425,0 | |
arc_easy,acc,0.553030303030303,0.010201914927791676,0 | |
arc_easy,acc_norm,0.5471380471380471,0.010214087372211392,0 | |
boolq,acc,0.4761467889908257,0.008735097860690577,1 | |
cb,acc,0.39285714285714285,0.0658538889806635,1 | |
cb,f1,0.34593096497858405,,1 | |
copa,acc,0.73,0.044619604333847394,0 | |
hellaswag,acc,0.3436566421031667,0.004739575380508866,0 | |
hellaswag,acc_norm,0.41635132443736306,0.004919457850104229,0 | |
piqa,acc,0.6784548422198041,0.010897500107575647,0 | |
piqa,acc_norm,0.6784548422198041,0.010897500107575652,0 | |
rte,acc,0.4620938628158845,0.030009848912529117,0 | |
sciq,acc,0.9,0.009491579957525052,0 | |
sciq,acc_norm,0.893,0.009779910359847165,0 | |
storycloze_2016,acc,0.638161411010155,0.011112247531047545,0 | |
winogrande,acc,0.5217048145224941,0.014039239216484629,0 | |