lm1-misc-pile / 1b11b51b5 /evaluation /rankeval /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-13-57-07_3shots.csv
Muennighoff's picture
Add
b1799ed
raw
history blame
1.06 kB
task,metric,value,err,version
anli_r1,acc,0.314,0.014683991951087976,0
anli_r2,acc,0.336,0.014944140233795023,0
anli_r3,acc,0.32166666666666666,0.013490095282989521,0
arc_challenge,acc,0.17747440273037543,0.01116513876964396,0
arc_challenge,acc_norm,0.20819112627986347,0.011864866118448069,0
arc_easy,acc,0.3265993265993266,0.009623047038267647,0
arc_easy,acc_norm,0.31734006734006737,0.009550648343947771,0
boolq,acc,0.6021406727828746,0.008560641169303364,1
cb,acc,0.4107142857142857,0.0663363415035954,1
cb,f1,0.286512928022362,,1
copa,acc,0.6,0.049236596391733084,0
hellaswag,acc,0.2657837084246166,0.004408468107262735,0
hellaswag,acc_norm,0.2660824536944832,0.00441004753083503,0
piqa,acc,0.5788900979325353,0.01151970105915149,0
piqa,acc_norm,0.5680087051142546,0.01155740721010026,0
rte,acc,0.4729241877256318,0.030052303463143706,0
sciq,acc,0.562,0.01569721001969469,0
sciq,acc_norm,0.552,0.015733516566347833,0
storycloze_2016,acc,0.5173703901656868,0.011555452669106639,0
winogrande,acc,0.5114443567482242,0.014048804199859332,0