lm1-misc-pile / 3b92b62b6 /evaluation /rankeval /lm1-3b9-26b-results_lm-eval_global_step24424_2023-01-24-17-05-44_5shots.json
Muennighoff's picture
Add
0461a88
{
"results": {
"anli_r1": {
"acc": 0.33,
"acc_stderr": 0.014876872027456729
},
"anli_r2": {
"acc": 0.336,
"acc_stderr": 0.014944140233795023
},
"anli_r3": {
"acc": 0.33416666666666667,
"acc_stderr": 0.013622434813136781
},
"cb": {
"acc": 0.42857142857142855,
"acc_stderr": 0.06672848092813057,
"f1": 0.31018867924528304
},
"copa": {
"acc": 0.69,
"acc_stderr": 0.04648231987117316
},
"hellaswag": {
"acc": 0.3339972117108146,
"acc_stderr": 0.0047067481521253195,
"acc_norm": 0.3973312089225254,
"acc_norm_stderr": 0.0048834551889089675
},
"rte": {
"acc": 0.51985559566787,
"acc_stderr": 0.030072723167317177
},
"winogrande": {
"acc": 0.516179952644041,
"acc_stderr": 0.014045126130978601
},
"storycloze_2016": {
"acc": 0.6285408872260823,
"acc_stderr": 0.011173814890350135
},
"boolq": {
"acc": 0.5305810397553516,
"acc_stderr": 0.008728682900189723
},
"arc_easy": {
"acc": 0.547979797979798,
"acc_stderr": 0.010212436978834106,
"acc_norm": 0.5311447811447811,
"acc_norm_stderr": 0.010239860250021741
},
"arc_challenge": {
"acc": 0.2363481228668942,
"acc_stderr": 0.012414960524301829,
"acc_norm": 0.25341296928327645,
"acc_norm_stderr": 0.012710896778378606
},
"sciq": {
"acc": 0.901,
"acc_stderr": 0.009449248027662734,
"acc_norm": 0.899,
"acc_norm_stderr": 0.009533618929340987
},
"piqa": {
"acc": 0.6681175190424374,
"acc_stderr": 0.010986617776361597,
"acc_norm": 0.6621327529923831,
"acc_norm_stderr": 0.011035474307853843
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}