lm1-misc-pile / 1b11b51b5 /evaluation /rankeval /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-23-19-58-55_1shots.csv
Muennighoff's picture
Add
b1799ed
raw
history blame
1.07 kB
task,metric,value,err,version
anli_r1,acc,0.323,0.014794927843348637,0
anli_r2,acc,0.319,0.01474640486547348,0
anli_r3,acc,0.3358333333333333,0.013639261190932884,0
arc_challenge,acc,0.17235494880546076,0.011037113093461295,0
arc_challenge,acc_norm,0.2098976109215017,0.011900548748047446,0
arc_easy,acc,0.34764309764309764,0.009771868846830912,0
arc_easy,acc_norm,0.3186026936026936,0.009560775507673364,0
boolq,acc,0.5226299694189602,0.008736093428015826,1
cb,acc,0.48214285714285715,0.06737697508644648,1
cb,f1,0.3421052631578947,,1
copa,acc,0.54,0.05009082659620332,0
hellaswag,acc,0.2638916550487951,0.004398404992933851,0
hellaswag,acc_norm,0.26419040031866164,0.004400000822742047,0
piqa,acc,0.5723612622415669,0.011543009623282828,0
piqa,acc_norm,0.5652883569096845,0.011565943814308855,0
rte,acc,0.5306859205776173,0.03003973059219781,0
sciq,acc,0.568,0.01567232023733621,0
sciq,acc_norm,0.546,0.015752210388771844,0
storycloze_2016,acc,0.5189738107963656,0.011554104174019694,0
winogrande,acc,0.5090765588003157,0.014050170094497704,0