lm1-misc-pile / 1b11b51b5 /evaluation /rankeval /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-17-05-38_5shots.csv
Muennighoff's picture
Add
b1799ed
raw
history blame
1.07 kB
task,metric,value,err,version
anli_r1,acc,0.344,0.015029633724408945,0
anli_r2,acc,0.329,0.014865395385928355,0
anli_r3,acc,0.35583333333333333,0.013826518748493322,0
arc_challenge,acc,0.16467576791808874,0.010838369209479231,0
arc_challenge,acc_norm,0.20733788395904437,0.011846905782971364,0
arc_easy,acc,0.3421717171717172,0.009735236771958743,0
arc_easy,acc_norm,0.32365319865319864,0.009600478182273768,0
boolq,acc,0.6128440366972477,0.008519429207594412,1
cb,acc,0.4642857142857143,0.06724777654937658,1
cb,f1,0.3235431235431235,,1
copa,acc,0.62,0.048783173121456316,0
hellaswag,acc,0.2658832901812388,0.0044089948686501,0
hellaswag,acc_norm,0.26558454491137223,0.004407413723383407,0
piqa,acc,0.5669205658324266,0.01156086442315138,0
piqa,acc_norm,0.5571273122959739,0.011589430503509102,0
rte,acc,0.5523465703971119,0.02993107036293953,0
sciq,acc,0.556,0.01571976816340209,0
sciq,acc_norm,0.541,0.015766025737882165,0
storycloze_2016,acc,0.5227151256012827,0.011550494192008948,0
winogrande,acc,0.5011838989739542,0.014052446290529015,0