lm1-misc-pile
/
280m5b95b9
/280m5b95b9pile
/evaluation
/rankeval
/lm1-280m-5b9-results_lm-eval_global_step11269_2023-01-24-13-57-02_4shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.343,0.015019206922356951,0 | |
anli_r2,acc,0.338,0.014965960710224494,0 | |
anli_r3,acc,0.33166666666666667,0.013596836729485163,0 | |
arc_challenge,acc,0.18430034129692832,0.011330517933037413,0 | |
arc_challenge,acc_norm,0.22525597269624573,0.012207839995407303,0 | |
arc_easy,acc,0.3926767676767677,0.010020646555538687,0 | |
arc_easy,acc_norm,0.3602693602693603,0.009851002584732387,0 | |
boolq,acc,0.5914373088685015,0.008597580502718662,1 | |
cb,acc,0.4107142857142857,0.06633634150359541,1 | |
cb,f1,0.27010658914728686,,1 | |
copa,acc,0.6,0.049236596391733084,0 | |
hellaswag,acc,0.269169488149771,0.004426217654918004,0 | |
hellaswag,acc_norm,0.2762397928699462,0.004462230363982151,0 | |
piqa,acc,0.5935799782372143,0.011459682650123575,0 | |
piqa,acc_norm,0.5767138193688792,0.011527699473614475,0 | |
rte,acc,0.47653429602888087,0.030063300411902652,0 | |
sciq,acc,0.7,0.014498627873361427,0 | |
sciq,acc_norm,0.674,0.01483050720454105,0 | |
storycloze_2016,acc,0.5446285408872261,0.011516282203726658,0 | |
winogrande,acc,0.5185477505919495,0.014042813708888378,0 | |