lm1-misc-pile
/
1b11b51b5
/evaluation
/rankeval
/lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-23-19-58-55_1shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.323, | |
"acc_stderr": 0.014794927843348637 | |
}, | |
"anli_r2": { | |
"acc": 0.319, | |
"acc_stderr": 0.01474640486547348 | |
}, | |
"anli_r3": { | |
"acc": 0.3358333333333333, | |
"acc_stderr": 0.013639261190932884 | |
}, | |
"cb": { | |
"acc": 0.48214285714285715, | |
"acc_stderr": 0.06737697508644648, | |
"f1": 0.3421052631578947 | |
}, | |
"copa": { | |
"acc": 0.54, | |
"acc_stderr": 0.05009082659620332 | |
}, | |
"hellaswag": { | |
"acc": 0.2638916550487951, | |
"acc_stderr": 0.004398404992933851, | |
"acc_norm": 0.26419040031866164, | |
"acc_norm_stderr": 0.004400000822742047 | |
}, | |
"rte": { | |
"acc": 0.5306859205776173, | |
"acc_stderr": 0.03003973059219781 | |
}, | |
"winogrande": { | |
"acc": 0.5090765588003157, | |
"acc_stderr": 0.014050170094497704 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5189738107963656, | |
"acc_stderr": 0.011554104174019694 | |
}, | |
"boolq": { | |
"acc": 0.5226299694189602, | |
"acc_stderr": 0.008736093428015826 | |
}, | |
"arc_easy": { | |
"acc": 0.34764309764309764, | |
"acc_stderr": 0.009771868846830912, | |
"acc_norm": 0.3186026936026936, | |
"acc_norm_stderr": 0.009560775507673364 | |
}, | |
"arc_challenge": { | |
"acc": 0.17235494880546076, | |
"acc_stderr": 0.011037113093461295, | |
"acc_norm": 0.2098976109215017, | |
"acc_norm_stderr": 0.011900548748047446 | |
}, | |
"sciq": { | |
"acc": 0.568, | |
"acc_stderr": 0.01567232023733621, | |
"acc_norm": 0.546, | |
"acc_norm_stderr": 0.015752210388771844 | |
}, | |
"piqa": { | |
"acc": 0.5723612622415669, | |
"acc_stderr": 0.011543009623282828, | |
"acc_norm": 0.5652883569096845, | |
"acc_norm_stderr": 0.011565943814308855 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |