{ "results": { "anli_r1": { "acc": 0.32, "acc_stderr": 0.014758652303574876 }, "anli_r2": { "acc": 0.334, "acc_stderr": 0.014922019523732963 }, "anli_r3": { "acc": 0.32916666666666666, "acc_stderr": 0.013570806258433633 }, "cb": { "acc": 0.35714285714285715, "acc_stderr": 0.0646095738380922, "f1": 0.24356089022255564 }, "copa": { "acc": 0.62, "acc_stderr": 0.048783173121456316 }, "hellaswag": { "acc": 0.26837283409679347, "acc_stderr": 0.004422070927212532, "acc_norm": 0.27982473610834496, "acc_norm_stderr": 0.004479955169853626 }, "rte": { "acc": 0.51985559566787, "acc_stderr": 0.030072723167317184 }, "winogrande": { "acc": 0.5169692186266772, "acc_stderr": 0.014044390401612981 }, "storycloze_2016": { "acc": 0.5462319615179049, "acc_stderr": 0.01151289919986303 }, "boolq": { "acc": 0.5944954128440367, "acc_stderr": 0.008587459055441612 }, "arc_easy": { "acc": 0.37415824915824913, "acc_stderr": 0.009929516948977625, "acc_norm": 0.3707912457912458, "acc_norm_stderr": 0.009911292822056923 }, "arc_challenge": { "acc": 0.15870307167235495, "acc_stderr": 0.010677974278076945, "acc_norm": 0.2158703071672355, "acc_norm_stderr": 0.012022975360030665 }, "sciq": { "acc": 0.736, "acc_stderr": 0.013946271849440472, "acc_norm": 0.691, "acc_norm_stderr": 0.014619600977206486 }, "piqa": { "acc": 0.5843307943416758, "acc_stderr": 0.011498699770894783, "acc_norm": 0.5897714907508161, "acc_norm_stderr": 0.011476256036359114 } }, "versions": { "anli_r1": 0, "anli_r2": 0, "anli_r3": 0, "cb": 1, "copa": 0, "hellaswag": 0, "rte": 0, "winogrande": 0, "storycloze_2016": 0, "boolq": 1, "arc_easy": 0, "arc_challenge": 0, "sciq": 0, "piqa": 0 } }