lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/rankeval
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-22-18-55-55_0shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.325,0.014818724459095524,0 | |
anli_r2,acc,0.345,0.015039986742055233,0 | |
anli_r3,acc,0.3516666666666667,0.013789711695404798,0 | |
arc_challenge,acc,0.19539249146757678,0.011586907189952911,0 | |
arc_challenge,acc_norm,0.24658703071672355,0.012595726268790124,0 | |
arc_easy,acc,0.4537037037037037,0.010215708295494128,0 | |
arc_easy,acc_norm,0.4036195286195286,0.010067368960348204,0 | |
boolq,acc,0.5235474006116208,0.008735351675636605,1 | |
cb,acc,0.4642857142857143,0.0672477765493766,1 | |
cb,f1,0.324184846352334,,1 | |
copa,acc,0.63,0.04852365870939099,0 | |
hellaswag,acc,0.2862975502887871,0.004511063351278702,0 | |
hellaswag,acc_norm,0.30501892053375823,0.004594744821762281,0 | |
piqa,acc,0.6147986942328618,0.011354179751257075,0 | |
piqa,acc_norm,0.6186071817192601,0.011332850406528672,0 | |
rte,acc,0.4620938628158845,0.030009848912529113,0 | |
sciq,acc,0.768,0.013354937452281557,0 | |
sciq,acc_norm,0.671,0.014865395385928354,0 | |
storycloze_2016,acc,0.5788348476750401,0.011417808278216117,0 | |
winogrande,acc,0.5027624309392266,0.014052271211616436,0 | |