lm1-misc-pile
/
8b712b12b
/evaluation
/rankeval
/lm1-8b7-12b-results_lm-eval_global_step5494_2023-01-24-13-57-43_3shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.354,0.015129868238451773,0 | |
anli_r2,acc,0.346,0.015050266127564445,0 | |
anli_r3,acc,0.3358333333333333,0.013639261190932875,0 | |
arc_challenge,acc,0.1885665529010239,0.011430897647675822,0 | |
arc_challenge,acc_norm,0.22525597269624573,0.012207839995407307,0 | |
arc_easy,acc,0.4537037037037037,0.010215708295494128,0 | |
arc_easy,acc_norm,0.42424242424242425,0.010141333654958565,0 | |
boolq,acc,0.5048929663608562,0.008744636233555066,1 | |
cb,acc,0.42857142857142855,0.06672848092813058,1 | |
cb,f1,0.26455026455026454,,1 | |
copa,acc,0.61,0.04902071300001975,0 | |
hellaswag,acc,0.2927703644692292,0.004541039698729834,0 | |
hellaswag,acc_norm,0.3194582752439753,0.00465313836094811,0 | |
piqa,acc,0.6409140369967355,0.011192949073844105,0 | |
piqa,acc_norm,0.6360174102285092,0.011225875703487166,0 | |
rte,acc,0.5126353790613718,0.030086851767188564,0 | |
sciq,acc,0.791,0.01286407728849932,0 | |
sciq,acc_norm,0.767,0.013374972519220079,0 | |
storycloze_2016,acc,0.5788348476750401,0.011417808278216115,0 | |
winogrande,acc,0.4964483030781373,0.01405213114691586,0 | |