lm1-misc-pile
/
1b11b51b5
/evaluation
/rankeval
/lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-17-05-38_5shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.344,0.015029633724408945,0 | |
anli_r2,acc,0.329,0.014865395385928355,0 | |
anli_r3,acc,0.35583333333333333,0.013826518748493322,0 | |
arc_challenge,acc,0.16467576791808874,0.010838369209479231,0 | |
arc_challenge,acc_norm,0.20733788395904437,0.011846905782971364,0 | |
arc_easy,acc,0.3421717171717172,0.009735236771958743,0 | |
arc_easy,acc_norm,0.32365319865319864,0.009600478182273768,0 | |
boolq,acc,0.6128440366972477,0.008519429207594412,1 | |
cb,acc,0.4642857142857143,0.06724777654937658,1 | |
cb,f1,0.3235431235431235,,1 | |
copa,acc,0.62,0.048783173121456316,0 | |
hellaswag,acc,0.2658832901812388,0.0044089948686501,0 | |
hellaswag,acc_norm,0.26558454491137223,0.004407413723383407,0 | |
piqa,acc,0.5669205658324266,0.01156086442315138,0 | |
piqa,acc_norm,0.5571273122959739,0.011589430503509102,0 | |
rte,acc,0.5523465703971119,0.02993107036293953,0 | |
sciq,acc,0.556,0.01571976816340209,0 | |
sciq,acc_norm,0.541,0.015766025737882165,0 | |
storycloze_2016,acc,0.5227151256012827,0.011550494192008948,0 | |
winogrande,acc,0.5011838989739542,0.014052446290529015,0 | |