lm1-misc-pile / 574m174b174b /evaluation /rankeval /lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-17-05-33_5shots.json
Muennighoff's picture
Add
8393ff0
raw
history blame
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.318,
"acc_stderr": 0.014734079309311901
},
"anli_r2": {
"acc": 0.351,
"acc_stderr": 0.0151005637983164
},
"anli_r3": {
"acc": 0.32916666666666666,
"acc_stderr": 0.013570806258433625
},
"cb": {
"acc": 0.4642857142857143,
"acc_stderr": 0.06724777654937658,
"f1": 0.3288355822088955
},
"copa": {
"acc": 0.68,
"acc_stderr": 0.04688261722621504
},
"hellaswag": {
"acc": 0.34256124278032263,
"acc_stderr": 0.004735962781136079,
"acc_norm": 0.4219279028082055,
"acc_norm_stderr": 0.00492857810602637
},
"rte": {
"acc": 0.555956678700361,
"acc_stderr": 0.02990739633379599
},
"winogrande": {
"acc": 0.5311760063141279,
"acc_stderr": 0.014025142640639513
},
"storycloze_2016": {
"acc": 0.6440406199893105,
"acc_stderr": 0.01107225418438284
},
"boolq": {
"acc": 0.4889908256880734,
"acc_stderr": 0.008742934884517644
},
"arc_easy": {
"acc": 0.5593434343434344,
"acc_stderr": 0.010187264635711986,
"acc_norm": 0.5467171717171717,
"acc_norm_stderr": 0.010214901516731604
},
"arc_challenge": {
"acc": 0.24232081911262798,
"acc_stderr": 0.012521593295800118,
"acc_norm": 0.27986348122866894,
"acc_norm_stderr": 0.013119040897725923
},
"sciq": {
"acc": 0.903,
"acc_stderr": 0.009363689373248128,
"acc_norm": 0.906,
"acc_norm_stderr": 0.009233052000787731
},
"piqa": {
"acc": 0.6751904243743199,
"acc_stderr": 0.01092629623829403,
"acc_norm": 0.6789989118607181,
"acc_norm_stderr": 0.010892641574707903
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}