lm1-misc-pile / 1b11b51b5 /evaluation /rankeval /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-22-18-55-55_0shots.csv
Muennighoff's picture
Add
b1799ed
raw
history blame
1.06 kB
task,metric,value,err,version
anli_r1,acc,0.332,0.01489959724281149,0
anli_r2,acc,0.308,0.01460648312734276,0
anli_r3,acc,0.3233333333333333,0.013508372867300228,0
arc_challenge,acc,0.16638225255972697,0.010883248065964142,0
arc_challenge,acc_norm,0.21245733788395904,0.011953482906582952,0
arc_easy,acc,0.3421717171717172,0.00973523677195875,0
arc_easy,acc_norm,0.31186868686868685,0.009505823345817666,0
boolq,acc,0.5529051987767584,0.00869596306417273,1
cb,acc,0.4107142857142857,0.0663363415035954,1
cb,f1,0.22313546423135466,,1
copa,acc,0.54,0.05009082659620332,0
hellaswag,acc,0.26269667396932883,0.004391995637542121,0
hellaswag,acc_norm,0.264389563831906,0.004401063265803207,0
piqa,acc,0.5788900979325353,0.01151970105915149,0
piqa,acc_norm,0.5690968443960827,0.011553893175901412,0
rte,acc,0.5342960288808665,0.030025579819366422,0
sciq,acc,0.548,0.01574623586588068,0
sciq,acc_norm,0.515,0.01581217964181491,0
storycloze_2016,acc,0.5307322287546766,0.011540570846495544,0
winogrande,acc,0.4980268350434096,0.014052376259225636,0