lm1-misc-pile / 1b11b51b5 /evaluation /rankeval /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-23-19-58-55_1shots.json
Muennighoff's picture
Add
b1799ed
raw
history blame
2.47 kB
{
"results": {
"anli_r1": {
"acc": 0.323,
"acc_stderr": 0.014794927843348637
},
"anli_r2": {
"acc": 0.319,
"acc_stderr": 0.01474640486547348
},
"anli_r3": {
"acc": 0.3358333333333333,
"acc_stderr": 0.013639261190932884
},
"cb": {
"acc": 0.48214285714285715,
"acc_stderr": 0.06737697508644648,
"f1": 0.3421052631578947
},
"copa": {
"acc": 0.54,
"acc_stderr": 0.05009082659620332
},
"hellaswag": {
"acc": 0.2638916550487951,
"acc_stderr": 0.004398404992933851,
"acc_norm": 0.26419040031866164,
"acc_norm_stderr": 0.004400000822742047
},
"rte": {
"acc": 0.5306859205776173,
"acc_stderr": 0.03003973059219781
},
"winogrande": {
"acc": 0.5090765588003157,
"acc_stderr": 0.014050170094497704
},
"storycloze_2016": {
"acc": 0.5189738107963656,
"acc_stderr": 0.011554104174019694
},
"boolq": {
"acc": 0.5226299694189602,
"acc_stderr": 0.008736093428015826
},
"arc_easy": {
"acc": 0.34764309764309764,
"acc_stderr": 0.009771868846830912,
"acc_norm": 0.3186026936026936,
"acc_norm_stderr": 0.009560775507673364
},
"arc_challenge": {
"acc": 0.17235494880546076,
"acc_stderr": 0.011037113093461295,
"acc_norm": 0.2098976109215017,
"acc_norm_stderr": 0.011900548748047446
},
"sciq": {
"acc": 0.568,
"acc_stderr": 0.01567232023733621,
"acc_norm": 0.546,
"acc_norm_stderr": 0.015752210388771844
},
"piqa": {
"acc": 0.5723612622415669,
"acc_stderr": 0.011543009623282828,
"acc_norm": 0.5652883569096845,
"acc_norm_stderr": 0.011565943814308855
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}