lm1-misc-pile / 574m174b174b /evaluation /rankeval /lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-13-57-05_4shots.json
Muennighoff's picture
Add
8393ff0
raw
history blame
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.324,
"acc_stderr": 0.01480686473373886
},
"anli_r2": {
"acc": 0.365,
"acc_stderr": 0.015231776226264914
},
"anli_r3": {
"acc": 0.3258333333333333,
"acc_stderr": 0.013535422043417462
},
"cb": {
"acc": 0.39285714285714285,
"acc_stderr": 0.0658538889806635,
"f1": 0.34593096497858405
},
"copa": {
"acc": 0.73,
"acc_stderr": 0.044619604333847394
},
"hellaswag": {
"acc": 0.3436566421031667,
"acc_stderr": 0.004739575380508866,
"acc_norm": 0.41635132443736306,
"acc_norm_stderr": 0.004919457850104229
},
"rte": {
"acc": 0.4620938628158845,
"acc_stderr": 0.030009848912529117
},
"winogrande": {
"acc": 0.5217048145224941,
"acc_stderr": 0.014039239216484629
},
"storycloze_2016": {
"acc": 0.638161411010155,
"acc_stderr": 0.011112247531047545
},
"boolq": {
"acc": 0.4761467889908257,
"acc_stderr": 0.008735097860690577
},
"arc_easy": {
"acc": 0.553030303030303,
"acc_stderr": 0.010201914927791676,
"acc_norm": 0.5471380471380471,
"acc_norm_stderr": 0.010214087372211392
},
"arc_challenge": {
"acc": 0.2551194539249147,
"acc_stderr": 0.012739038695202104,
"acc_norm": 0.2764505119453925,
"acc_norm_stderr": 0.013069662474252425
},
"sciq": {
"acc": 0.9,
"acc_stderr": 0.009491579957525052,
"acc_norm": 0.893,
"acc_norm_stderr": 0.009779910359847165
},
"piqa": {
"acc": 0.6784548422198041,
"acc_stderr": 0.010897500107575647,
"acc_norm": 0.6784548422198041,
"acc_norm_stderr": 0.010897500107575652
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}