lm1-misc-pile
/
1b191b91b
/1b191b91bpile
/evaluation
/rankeval
/lm1-1b1-91b-results_lm-eval_global_step173500_2023-01-24-13-53-32_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.314,0.014683991951087947,0 | |
anli_r2,acc,0.342,0.015008706182121733,0 | |
anli_r3,acc,0.3416666666666667,0.013696658778002517,0 | |
arc_challenge,acc,0.2508532423208191,0.01266819862131543,0 | |
arc_challenge,acc_norm,0.2773037542662116,0.013082095839059374,0 | |
arc_easy,acc,0.5681818181818182,0.010163945352271726,0 | |
arc_easy,acc_norm,0.5542929292929293,0.010199118183322984,0 | |
boolq,acc,0.47889908256880737,0.008737264056948628,1 | |
cb,acc,0.5,0.06741998624632421,1 | |
cb,f1,0.32554047259929614,,1 | |
copa,acc,0.68,0.04688261722621505,0 | |
hellaswag,acc,0.36207926707827126,0.004796193584930084,0 | |
hellaswag,acc_norm,0.44632543318064133,0.004960947388535101,0 | |
piqa,acc,0.6898803046789989,0.010791876566843056,0 | |
piqa,acc_norm,0.6964091403699674,0.010728079893076375,0 | |
rte,acc,0.4693140794223827,0.03003973059219781,0 | |
sciq,acc,0.909,0.009099549538400246,0 | |
sciq,acc_norm,0.906,0.009233052000787733,0 | |
storycloze_2016,acc,0.6451095670764297,0.011064787659904126,0 | |
winogrande,acc,0.5398579321231255,0.014007765428365161,0 | |