lm1-misc-pile / 3b977b77b /evaluation /lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-03-12-23-46.json
Muennighoff's picture
Add
3970485
raw
history blame
1.12 kB
{
"results": {
"boolq": {
"acc": 0.5859327217125382,
"acc_stderr": 0.008614932353134947
},
"hellaswag": {
"acc": 0.40689105755825533,
"acc_stderr": 0.004902502514738602,
"acc_norm": 0.5210117506472814,
"acc_norm_stderr": 0.0049853735507751065
},
"arc_challenge": {
"acc": 0.26535836177474403,
"acc_stderr": 0.012902554762313967,
"acc_norm": 0.295221843003413,
"acc_norm_stderr": 0.013329750293382316
},
"arc_easy": {
"acc": 0.5896464646464646,
"acc_stderr": 0.010093531255765457,
"acc_norm": 0.5404040404040404,
"acc_norm_stderr": 0.010226230740889027
},
"sciq": {
"acc": 0.851,
"acc_stderr": 0.011266140684632175,
"acc_norm": 0.795,
"acc_norm_stderr": 0.012772554096113132
}
},
"versions": {
"boolq": 1,
"hellaswag": 0,
"arc_challenge": 0,
"arc_easy": 0,
"sciq": 0
}
}