lm1-misc-pile
/
280m5b95b9
/280m5b95b9pile
/evaluation
/rankeval
/lm1-280m-5b9-results_lm-eval_global_step11269_2023-01-24-13-57-03_3shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.321, | |
"acc_stderr": 0.014770821817934654 | |
}, | |
"anli_r2": { | |
"acc": 0.333, | |
"acc_stderr": 0.014910846164229857 | |
}, | |
"anli_r3": { | |
"acc": 0.3233333333333333, | |
"acc_stderr": 0.013508372867300219 | |
}, | |
"cb": { | |
"acc": 0.4107142857142857, | |
"acc_stderr": 0.0663363415035954, | |
"f1": 0.27519379844961245 | |
}, | |
"copa": { | |
"acc": 0.58, | |
"acc_stderr": 0.04960449637488583 | |
}, | |
"hellaswag": { | |
"acc": 0.27106154152559253, | |
"acc_stderr": 0.004435993492583858, | |
"acc_norm": 0.2756423023302131, | |
"acc_norm_stderr": 0.004459241474518783 | |
}, | |
"rte": { | |
"acc": 0.49458483754512633, | |
"acc_stderr": 0.030094698123239966 | |
}, | |
"winogrande": { | |
"acc": 0.526440410418311, | |
"acc_stderr": 0.01403282387440723 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5398182789951897, | |
"acc_stderr": 0.011525709570367517 | |
}, | |
"boolq": { | |
"acc": 0.591743119266055, | |
"acc_stderr": 0.008596583869583204 | |
}, | |
"arc_easy": { | |
"acc": 0.37962962962962965, | |
"acc_stderr": 0.009958037725468558, | |
"acc_norm": 0.35984848484848486, | |
"acc_norm_stderr": 0.00984848484848485 | |
}, | |
"arc_challenge": { | |
"acc": 0.18088737201365188, | |
"acc_stderr": 0.011248574467407024, | |
"acc_norm": 0.21160409556313994, | |
"acc_norm_stderr": 0.011935916358632852 | |
}, | |
"sciq": { | |
"acc": 0.718, | |
"acc_stderr": 0.014236526215291343, | |
"acc_norm": 0.661, | |
"acc_norm_stderr": 0.014976758771620345 | |
}, | |
"piqa": { | |
"acc": 0.5957562568008705, | |
"acc_stderr": 0.01144989176300747, | |
"acc_norm": 0.5843307943416758, | |
"acc_norm_stderr": 0.011498699770894797 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |