lm1-misc-pile / 146m14b14b /evaluation /rankeval /lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.csv
Muennighoff's picture
Add
0f79083
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.353,0.015120172605483708,0
anli_r2,acc,0.353,0.015120172605483692,0
anli_r3,acc,0.345,0.013728421539454876,0
arc_challenge,acc,0.16638225255972697,0.010883248065964142,0
arc_challenge,acc_norm,0.2090443686006826,0.011882746987406457,0
arc_easy,acc,0.38804713804713803,0.009999295905750659,0
arc_easy,acc_norm,0.35648148148148145,0.009828046544504424,0
boolq,acc,0.6091743119266055,0.00853404406545651,1
cb,acc,0.4107142857142857,0.06633634150359541,1
cb,f1,0.28296670272869334,,1
copa,acc,0.61,0.04902071300001975,0
hellaswag,acc,0.2698665604461263,0.0044298311529146804,0
hellaswag,acc_norm,0.2778331009759012,0.004470152081675125,0
piqa,acc,0.5870511425462459,0.011487658725079095,0
piqa,acc_norm,0.5799782372143635,0.011515615810587486,0
rte,acc,0.516245487364621,0.030080573208738064,0
sciq,acc,0.748,0.013736254390651145,0
sciq,acc_norm,0.708,0.014385511563477345,0
storycloze_2016,acc,0.547300908605024,0.011510576955232206,0
winogrande,acc,0.5114443567482242,0.014048804199859329,0