lm1-misc-pile / 8b712b12b /evaluation /rankeval /lm1-8b7-12b-results_lm-eval_global_step5494_2023-01-24-17-06-10_5shots.csv
Muennighoff's picture
Add
66cf324
raw
history blame
1.06 kB
task,metric,value,err,version
anli_r1,acc,0.342,0.015008706182121731,0
anli_r2,acc,0.344,0.015029633724408947,0
anli_r3,acc,0.3375,0.013655897185463652,0
arc_challenge,acc,0.19795221843003413,0.011643990971573395,0
arc_challenge,acc_norm,0.22696245733788395,0.01224049153613287,0
arc_easy,acc,0.45707070707070707,0.010221897564256045,0
arc_easy,acc_norm,0.41792929292929293,0.010120628211017897,0
boolq,acc,0.5107033639143731,0.008743051044836898,1
cb,acc,0.4107142857142857,0.06633634150359538,1
cb,f1,0.2779237023139462,,1
copa,acc,0.59,0.049431107042371025,0
hellaswag,acc,0.2936666002788289,0.004545102331041168,0
hellaswag,acc_norm,0.3227444732125075,0.004665704208339037,0
piqa,acc,0.6311207834602829,0.011257546676908804,0
piqa,acc_norm,0.6332970620239391,0.011243625019038257,0
rte,acc,0.5379061371841155,0.030009848912529113,0
sciq,acc,0.792,0.012841374572096921,0
sciq,acc_norm,0.779,0.013127502859696254,0
storycloze_2016,acc,0.5772314270443613,0.011423666836801129,0
winogrande,acc,0.4980268350434096,0.014052376259225636,0