lm1-misc-pile / 1b11b51b5 /evaluation /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-13-53-33_2shots.csv
Muennighoff's picture
Add
b1799ed
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.318,0.014734079309311901,0
anli_r2,acc,0.338,0.014965960710224473,0
anli_r3,acc,0.3275,0.01355321116725195,0
arc_challenge,acc,0.1621160409556314,0.01077026738079835,0
arc_challenge,acc_norm,0.2090443686006826,0.011882746987406446,0
arc_easy,acc,0.3303872053872054,0.009651430216428194,0
arc_easy,acc_norm,0.3253367003367003,0.00961342770899619,0
boolq,acc,0.5850152905198777,0.008617716361921567,1
cb,acc,0.48214285714285715,0.06737697508644648,1
cb,f1,0.33636151875145587,,1
copa,acc,0.55,0.04999999999999999,0
hellaswag,acc,0.26379207329217286,0.00439787247185492,0
hellaswag,acc_norm,0.2671778530173272,0.004415816696303073,0
piqa,acc,0.573993471164309,0.011537375448519445,0
piqa,acc_norm,0.5652883569096845,0.011565943814308855,0
rte,acc,0.49097472924187724,0.030091559826331334,0
sciq,acc,0.547,0.015749255189977586,0
sciq,acc_norm,0.534,0.015782683329937614,0
storycloze_2016,acc,0.5189738107963656,0.011554104174019695,0
winogrande,acc,0.5146014206787688,0.014046492383275837,0