lm1-misc-pile / 280m5b95b9 /280m5b95b9pile /evaluation /rankeval /lm1-280m-5b9-results_lm-eval_global_step11269_2023-01-24-13-57-03_3shots.json
Muennighoff's picture
Add
c305798
{
"results": {
"anli_r1": {
"acc": 0.321,
"acc_stderr": 0.014770821817934654
},
"anli_r2": {
"acc": 0.333,
"acc_stderr": 0.014910846164229857
},
"anli_r3": {
"acc": 0.3233333333333333,
"acc_stderr": 0.013508372867300219
},
"cb": {
"acc": 0.4107142857142857,
"acc_stderr": 0.0663363415035954,
"f1": 0.27519379844961245
},
"copa": {
"acc": 0.58,
"acc_stderr": 0.04960449637488583
},
"hellaswag": {
"acc": 0.27106154152559253,
"acc_stderr": 0.004435993492583858,
"acc_norm": 0.2756423023302131,
"acc_norm_stderr": 0.004459241474518783
},
"rte": {
"acc": 0.49458483754512633,
"acc_stderr": 0.030094698123239966
},
"winogrande": {
"acc": 0.526440410418311,
"acc_stderr": 0.01403282387440723
},
"storycloze_2016": {
"acc": 0.5398182789951897,
"acc_stderr": 0.011525709570367517
},
"boolq": {
"acc": 0.591743119266055,
"acc_stderr": 0.008596583869583204
},
"arc_easy": {
"acc": 0.37962962962962965,
"acc_stderr": 0.009958037725468558,
"acc_norm": 0.35984848484848486,
"acc_norm_stderr": 0.00984848484848485
},
"arc_challenge": {
"acc": 0.18088737201365188,
"acc_stderr": 0.011248574467407024,
"acc_norm": 0.21160409556313994,
"acc_norm_stderr": 0.011935916358632852
},
"sciq": {
"acc": 0.718,
"acc_stderr": 0.014236526215291343,
"acc_norm": 0.661,
"acc_norm_stderr": 0.014976758771620345
},
"piqa": {
"acc": 0.5957562568008705,
"acc_stderr": 0.01144989176300747,
"acc_norm": 0.5843307943416758,
"acc_norm_stderr": 0.011498699770894797
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}