lm1-misc-pile
/
1b11b51b5
/evaluation
/rankeval
/lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-13-57-07_3shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.314,0.014683991951087976,0 | |
anli_r2,acc,0.336,0.014944140233795023,0 | |
anli_r3,acc,0.32166666666666666,0.013490095282989521,0 | |
arc_challenge,acc,0.17747440273037543,0.01116513876964396,0 | |
arc_challenge,acc_norm,0.20819112627986347,0.011864866118448069,0 | |
arc_easy,acc,0.3265993265993266,0.009623047038267647,0 | |
arc_easy,acc_norm,0.31734006734006737,0.009550648343947771,0 | |
boolq,acc,0.6021406727828746,0.008560641169303364,1 | |
cb,acc,0.4107142857142857,0.0663363415035954,1 | |
cb,f1,0.286512928022362,,1 | |
copa,acc,0.6,0.049236596391733084,0 | |
hellaswag,acc,0.2657837084246166,0.004408468107262735,0 | |
hellaswag,acc_norm,0.2660824536944832,0.00441004753083503,0 | |
piqa,acc,0.5788900979325353,0.01151970105915149,0 | |
piqa,acc_norm,0.5680087051142546,0.01155740721010026,0 | |
rte,acc,0.4729241877256318,0.030052303463143706,0 | |
sciq,acc,0.562,0.01569721001969469,0 | |
sciq,acc_norm,0.552,0.015733516566347833,0 | |
storycloze_2016,acc,0.5173703901656868,0.011555452669106639,0 | |
winogrande,acc,0.5114443567482242,0.014048804199859332,0 | |