lm1-misc-pile / 3b92b62b6 /evaluation /rankeval /lm1-3b9-26b-results_lm-eval_global_step24424_2023-01-22-13-12-25_0shots.csv
Muennighoff's picture
Add
0461a88
task,metric,value,err,version
anli_r1,acc,0.323,0.014794927843348633,0
anli_r2,acc,0.333,0.014910846164229863,0
anli_r3,acc,0.33166666666666667,0.013596836729485168,0
arc_challenge,acc,0.1962457337883959,0.011606019881416286,0
arc_challenge,acc_norm,0.257679180887372,0.01278077056276841,0
arc_easy,acc,0.515993265993266,0.010254533589288163,0
arc_easy,acc_norm,0.4659090909090909,0.010235908103438692,0
boolq,acc,0.5318042813455658,0.008727345583419182,1
cb,acc,0.2857142857142857,0.06091449038731724,1
cb,f1,0.15023474178403756,,1
copa,acc,0.7,0.046056618647183814,0
hellaswag,acc,0.3355905198167696,0.004712314511950957,0
hellaswag,acc_norm,0.39713204540928104,0.004883037758919961,0
piqa,acc,0.6605005440696409,0.011048455047173915,0
piqa,acc_norm,0.6621327529923831,0.011035474307853841,0
rte,acc,0.51985559566787,0.030072723167317184,0
sciq,acc,0.822,0.01210216767618359,0
sciq,acc_norm,0.731,0.014029819522568198,0
storycloze_2016,acc,0.6264029930518439,0.011186849693644694,0
winogrande,acc,0.4964483030781373,0.014052131146915864,0