lm1-misc-pile / 1b191b91b /1b191b91bpile /evaluation /rankeval /lm1-1b1-91b-results_lm-eval_global_step173500_2023-01-24-17-05-38_5shots.json
Muennighoff's picture
Add'
0fc6405
raw
history blame
2.44 kB
{
"results": {
"anli_r1": {
"acc": 0.348,
"acc_stderr": 0.01507060460376841
},
"anli_r2": {
"acc": 0.348,
"acc_stderr": 0.01507060460376841
},
"anli_r3": {
"acc": 0.3541666666666667,
"acc_stderr": 0.013811933499570963
},
"cb": {
"acc": 0.625,
"acc_stderr": 0.06527912098338669,
"f1": 0.4297739297739298
},
"copa": {
"acc": 0.67,
"acc_stderr": 0.04725815626252607
},
"hellaswag": {
"acc": 0.36247759410476,
"acc_stderr": 0.00479733256599006,
"acc_norm": 0.4449312885879307,
"acc_norm_stderr": 0.004959425421382024
},
"rte": {
"acc": 0.516245487364621,
"acc_stderr": 0.030080573208738064
},
"winogrande": {
"acc": 0.5351223362273086,
"acc_stderr": 0.014017773120881578
},
"storycloze_2016": {
"acc": 0.6509887760555852,
"acc_stderr": 0.011022640519108536
},
"boolq": {
"acc": 0.4584097859327217,
"acc_stderr": 0.00871474901770989
},
"arc_easy": {
"acc": 0.5732323232323232,
"acc_stderr": 0.010149141043955635,
"acc_norm": 0.5669191919191919,
"acc_norm_stderr": 0.010167478013701789
},
"arc_challenge": {
"acc": 0.25341296928327645,
"acc_stderr": 0.012710896778378606,
"acc_norm": 0.2815699658703072,
"acc_norm_stderr": 0.013143376735009015
},
"sciq": {
"acc": 0.913,
"acc_stderr": 0.008916866630745904,
"acc_norm": 0.916,
"acc_norm_stderr": 0.00877616208949112
},
"piqa": {
"acc": 0.6898803046789989,
"acc_stderr": 0.01079187656684305,
"acc_norm": 0.6953210010881393,
"acc_norm_stderr": 0.010738889044325161
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}