lm1-misc-pile
/
619m2b72b7
/evaluation
/lm1-619m-2b7-results_lm-eval_global_step5111_2023-01-24-13-53-29_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.312,0.014658474370509,0 | |
anli_r2,acc,0.343,0.015019206922356951,0 | |
anli_r3,acc,0.3433333333333333,0.01371263383046586,0 | |
arc_challenge,acc,0.15443686006825938,0.010560149230392597,0 | |
arc_challenge,acc_norm,0.197098976109215,0.011625047669880624,0 | |
arc_easy,acc,0.36826599326599324,0.009897286209010894,0 | |
arc_easy,acc_norm,0.3468013468013468,0.009766326091716005,0 | |
boolq,acc,0.5782874617737003,0.008637194202160971,1 | |
cb,acc,0.375,0.06527912098338669,1 | |
cb,f1,0.26285714285714284,,1 | |
copa,acc,0.61,0.04902071300001975,0 | |
hellaswag,acc,0.26847241585341564,0.00442259026238513,0 | |
hellaswag,acc_norm,0.2740489942242581,0.004451222241494057,0 | |
piqa,acc,0.5767138193688792,0.011527699473614478,0 | |
piqa,acc_norm,0.5832426550598476,0.011503015163618312,0 | |
rte,acc,0.5234657039711191,0.030063300411902666,0 | |
sciq,acc,0.67,0.014876872027456736,0 | |
sciq,acc_norm,0.635,0.0152317762262649,0 | |
storycloze_2016,acc,0.5339390700160342,0.01153576488164141,0 | |
winogrande,acc,0.5138121546961326,0.014047122916440412,0 | |