lm1-misc-pile / 3b92b62b6 /evaluation /rankeval /lm1-3b9-26b-results_lm-eval_global_step24424_2023-01-24-13-57-16_4shots.csv
Muennighoff's picture
Add
0461a88
task,metric,value,err,version
anli_r1,acc,0.34,0.014987482264363937,0
anli_r2,acc,0.353,0.015120172605483685,0
anli_r3,acc,0.3458333333333333,0.013736245342311012,0
arc_challenge,acc,0.23037542662116042,0.01230492841874761,0
arc_challenge,acc_norm,0.2593856655290102,0.012808273573927102,0
arc_easy,acc,0.5547138047138047,0.010198171137873871,0
arc_easy,acc_norm,0.5303030303030303,0.01024092360872654,0
boolq,acc,0.5146788990825688,0.00874128556866792,1
cb,acc,0.39285714285714285,0.0658538889806635,1
cb,f1,0.3383267705301604,,1
copa,acc,0.67,0.047258156262526066,0
hellaswag,acc,0.33499302927703645,0.004710234188047354,0
hellaswag,acc_norm,0.3973312089225254,0.0048834551889089675,0
piqa,acc,0.6692056583242655,0.010977520584714413,0
piqa,acc_norm,0.6637649619151251,0.011022346708970236,0
rte,acc,0.4548736462093863,0.029973636495415252,0
sciq,acc,0.888,0.009977753031397247,0
sciq,acc_norm,0.883,0.010169287802713329,0
storycloze_2016,acc,0.623730625334046,0.011202815067213618,0
winogrande,acc,0.5027624309392266,0.014052271211616429,0