lm1-misc-pile
/
574m174b174b
/evaluation
/lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-13-53-29_2shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.308, | |
"acc_stderr": 0.014606483127342765 | |
}, | |
"anli_r2": { | |
"acc": 0.346, | |
"acc_stderr": 0.015050266127564443 | |
}, | |
"anli_r3": { | |
"acc": 0.3233333333333333, | |
"acc_stderr": 0.013508372867300224 | |
}, | |
"cb": { | |
"acc": 0.32142857142857145, | |
"acc_stderr": 0.06297362289056341, | |
"f1": 0.23303167420814477 | |
}, | |
"copa": { | |
"acc": 0.75, | |
"acc_stderr": 0.04351941398892446 | |
}, | |
"hellaswag": { | |
"acc": 0.3419637522405895, | |
"acc_stderr": 0.004733980470799225, | |
"acc_norm": 0.4202350129456284, | |
"acc_norm_stderr": 0.004925877705771198 | |
}, | |
"rte": { | |
"acc": 0.5090252707581228, | |
"acc_stderr": 0.030091559826331334 | |
}, | |
"winogrande": { | |
"acc": 0.516179952644041, | |
"acc_stderr": 0.0140451261309786 | |
}, | |
"storycloze_2016": { | |
"acc": 0.6435061464457509, | |
"acc_stderr": 0.011075964871050996 | |
}, | |
"boolq": { | |
"acc": 0.4932721712538226, | |
"acc_stderr": 0.008744263273827433 | |
}, | |
"arc_easy": { | |
"acc": 0.5526094276094277, | |
"acc_stderr": 0.010202832385415646, | |
"acc_norm": 0.5484006734006734, | |
"acc_norm_stderr": 0.010211600726405236 | |
}, | |
"arc_challenge": { | |
"acc": 0.24658703071672355, | |
"acc_stderr": 0.012595726268790125, | |
"acc_norm": 0.2841296928327645, | |
"acc_norm_stderr": 0.013179442447653886 | |
}, | |
"sciq": { | |
"acc": 0.891, | |
"acc_stderr": 0.009859828407037188, | |
"acc_norm": 0.884, | |
"acc_norm_stderr": 0.010131468138757005 | |
}, | |
"piqa": { | |
"acc": 0.676278563656148, | |
"acc_stderr": 0.010916765010708781, | |
"acc_norm": 0.675734494015234, | |
"acc_norm_stderr": 0.010921539041347985 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |