lm1-misc-pile
/
1b11b51b5
/evaluation
/rankeval
/lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-13-53-33_2shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.318, | |
"acc_stderr": 0.014734079309311901 | |
}, | |
"anli_r2": { | |
"acc": 0.338, | |
"acc_stderr": 0.014965960710224473 | |
}, | |
"anli_r3": { | |
"acc": 0.3275, | |
"acc_stderr": 0.01355321116725195 | |
}, | |
"cb": { | |
"acc": 0.48214285714285715, | |
"acc_stderr": 0.06737697508644648, | |
"f1": 0.33636151875145587 | |
}, | |
"copa": { | |
"acc": 0.55, | |
"acc_stderr": 0.04999999999999999 | |
}, | |
"hellaswag": { | |
"acc": 0.26379207329217286, | |
"acc_stderr": 0.00439787247185492, | |
"acc_norm": 0.2671778530173272, | |
"acc_norm_stderr": 0.004415816696303073 | |
}, | |
"rte": { | |
"acc": 0.49097472924187724, | |
"acc_stderr": 0.030091559826331334 | |
}, | |
"winogrande": { | |
"acc": 0.5146014206787688, | |
"acc_stderr": 0.014046492383275837 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5189738107963656, | |
"acc_stderr": 0.011554104174019695 | |
}, | |
"boolq": { | |
"acc": 0.5850152905198777, | |
"acc_stderr": 0.008617716361921567 | |
}, | |
"arc_easy": { | |
"acc": 0.3303872053872054, | |
"acc_stderr": 0.009651430216428194, | |
"acc_norm": 0.3253367003367003, | |
"acc_norm_stderr": 0.00961342770899619 | |
}, | |
"arc_challenge": { | |
"acc": 0.1621160409556314, | |
"acc_stderr": 0.01077026738079835, | |
"acc_norm": 0.2090443686006826, | |
"acc_norm_stderr": 0.011882746987406446 | |
}, | |
"sciq": { | |
"acc": 0.547, | |
"acc_stderr": 0.015749255189977586, | |
"acc_norm": 0.534, | |
"acc_norm_stderr": 0.015782683329937614 | |
}, | |
"piqa": { | |
"acc": 0.573993471164309, | |
"acc_stderr": 0.011537375448519445, | |
"acc_norm": 0.5652883569096845, | |
"acc_norm_stderr": 0.011565943814308855 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |