lm1-misc-pile
/
3b92b62b6
/evaluation
/rankeval
/lm1-3b9-26b-results_lm-eval_global_step24424_2023-01-24-17-05-44_5shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.33, | |
"acc_stderr": 0.014876872027456729 | |
}, | |
"anli_r2": { | |
"acc": 0.336, | |
"acc_stderr": 0.014944140233795023 | |
}, | |
"anli_r3": { | |
"acc": 0.33416666666666667, | |
"acc_stderr": 0.013622434813136781 | |
}, | |
"cb": { | |
"acc": 0.42857142857142855, | |
"acc_stderr": 0.06672848092813057, | |
"f1": 0.31018867924528304 | |
}, | |
"copa": { | |
"acc": 0.69, | |
"acc_stderr": 0.04648231987117316 | |
}, | |
"hellaswag": { | |
"acc": 0.3339972117108146, | |
"acc_stderr": 0.0047067481521253195, | |
"acc_norm": 0.3973312089225254, | |
"acc_norm_stderr": 0.0048834551889089675 | |
}, | |
"rte": { | |
"acc": 0.51985559566787, | |
"acc_stderr": 0.030072723167317177 | |
}, | |
"winogrande": { | |
"acc": 0.516179952644041, | |
"acc_stderr": 0.014045126130978601 | |
}, | |
"storycloze_2016": { | |
"acc": 0.6285408872260823, | |
"acc_stderr": 0.011173814890350135 | |
}, | |
"boolq": { | |
"acc": 0.5305810397553516, | |
"acc_stderr": 0.008728682900189723 | |
}, | |
"arc_easy": { | |
"acc": 0.547979797979798, | |
"acc_stderr": 0.010212436978834106, | |
"acc_norm": 0.5311447811447811, | |
"acc_norm_stderr": 0.010239860250021741 | |
}, | |
"arc_challenge": { | |
"acc": 0.2363481228668942, | |
"acc_stderr": 0.012414960524301829, | |
"acc_norm": 0.25341296928327645, | |
"acc_norm_stderr": 0.012710896778378606 | |
}, | |
"sciq": { | |
"acc": 0.901, | |
"acc_stderr": 0.009449248027662734, | |
"acc_norm": 0.899, | |
"acc_norm_stderr": 0.009533618929340987 | |
}, | |
"piqa": { | |
"acc": 0.6681175190424374, | |
"acc_stderr": 0.010986617776361597, | |
"acc_norm": 0.6621327529923831, | |
"acc_norm_stderr": 0.011035474307853843 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |