lm1-misc-pile
/
574m174b174b
/evaluation
/rankeval
/lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-17-05-33_5shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.318,0.014734079309311901,0 | |
anli_r2,acc,0.351,0.0151005637983164,0 | |
anli_r3,acc,0.32916666666666666,0.013570806258433625,0 | |
arc_challenge,acc,0.24232081911262798,0.012521593295800118,0 | |
arc_challenge,acc_norm,0.27986348122866894,0.013119040897725923,0 | |
arc_easy,acc,0.5593434343434344,0.010187264635711986,0 | |
arc_easy,acc_norm,0.5467171717171717,0.010214901516731604,0 | |
boolq,acc,0.4889908256880734,0.008742934884517644,1 | |
cb,acc,0.4642857142857143,0.06724777654937658,1 | |
cb,f1,0.3288355822088955,,1 | |
copa,acc,0.68,0.04688261722621504,0 | |
hellaswag,acc,0.34256124278032263,0.004735962781136079,0 | |
hellaswag,acc_norm,0.4219279028082055,0.00492857810602637,0 | |
piqa,acc,0.6751904243743199,0.01092629623829403,0 | |
piqa,acc_norm,0.6789989118607181,0.010892641574707903,0 | |
rte,acc,0.555956678700361,0.02990739633379599,0 | |
sciq,acc,0.903,0.009363689373248128,0 | |
sciq,acc_norm,0.906,0.009233052000787731,0 | |
storycloze_2016,acc,0.6440406199893105,0.01107225418438284,0 | |
winogrande,acc,0.5311760063141279,0.014025142640639513,0 | |