lm1-misc-pile
/
146m14b14b
/evaluation
/rankeval
/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.353,0.015120172605483708,0 | |
anli_r2,acc,0.353,0.015120172605483692,0 | |
anli_r3,acc,0.345,0.013728421539454876,0 | |
arc_challenge,acc,0.16638225255972697,0.010883248065964142,0 | |
arc_challenge,acc_norm,0.2090443686006826,0.011882746987406457,0 | |
arc_easy,acc,0.38804713804713803,0.009999295905750659,0 | |
arc_easy,acc_norm,0.35648148148148145,0.009828046544504424,0 | |
boolq,acc,0.6091743119266055,0.00853404406545651,1 | |
cb,acc,0.4107142857142857,0.06633634150359541,1 | |
cb,f1,0.28296670272869334,,1 | |
copa,acc,0.61,0.04902071300001975,0 | |
hellaswag,acc,0.2698665604461263,0.0044298311529146804,0 | |
hellaswag,acc_norm,0.2778331009759012,0.004470152081675125,0 | |
piqa,acc,0.5870511425462459,0.011487658725079095,0 | |
piqa,acc_norm,0.5799782372143635,0.011515615810587486,0 | |
rte,acc,0.516245487364621,0.030080573208738064,0 | |
sciq,acc,0.748,0.013736254390651145,0 | |
sciq,acc_norm,0.708,0.014385511563477345,0 | |
storycloze_2016,acc,0.547300908605024,0.011510576955232206,0 | |
winogrande,acc,0.5114443567482242,0.014048804199859329,0 | |