lm1-misc-pile / 1b191b91b /1b191b91bpile /evaluation /rankeval /lm1-1b1-91b-results_lm-eval_global_step173500_2023-01-24-13-53-32_2shots.csv
Muennighoff's picture
Add'
0fc6405
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.314,0.014683991951087947,0
anli_r2,acc,0.342,0.015008706182121733,0
anli_r3,acc,0.3416666666666667,0.013696658778002517,0
arc_challenge,acc,0.2508532423208191,0.01266819862131543,0
arc_challenge,acc_norm,0.2773037542662116,0.013082095839059374,0
arc_easy,acc,0.5681818181818182,0.010163945352271726,0
arc_easy,acc_norm,0.5542929292929293,0.010199118183322984,0
boolq,acc,0.47889908256880737,0.008737264056948628,1
cb,acc,0.5,0.06741998624632421,1
cb,f1,0.32554047259929614,,1
copa,acc,0.68,0.04688261722621505,0
hellaswag,acc,0.36207926707827126,0.004796193584930084,0
hellaswag,acc_norm,0.44632543318064133,0.004960947388535101,0
piqa,acc,0.6898803046789989,0.010791876566843056,0
piqa,acc_norm,0.6964091403699674,0.010728079893076375,0
rte,acc,0.4693140794223827,0.03003973059219781,0
sciq,acc,0.909,0.009099549538400246,0
sciq,acc_norm,0.906,0.009233052000787733,0
storycloze_2016,acc,0.6451095670764297,0.011064787659904126,0
winogrande,acc,0.5398579321231255,0.014007765428365161,0