lm1-misc-pile
/
1b11b51b5
/evaluation
/rankeval
/lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-23-19-58-55_1shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.323,0.014794927843348637,0 | |
anli_r2,acc,0.319,0.01474640486547348,0 | |
anli_r3,acc,0.3358333333333333,0.013639261190932884,0 | |
arc_challenge,acc,0.17235494880546076,0.011037113093461295,0 | |
arc_challenge,acc_norm,0.2098976109215017,0.011900548748047446,0 | |
arc_easy,acc,0.34764309764309764,0.009771868846830912,0 | |
arc_easy,acc_norm,0.3186026936026936,0.009560775507673364,0 | |
boolq,acc,0.5226299694189602,0.008736093428015826,1 | |
cb,acc,0.48214285714285715,0.06737697508644648,1 | |
cb,f1,0.3421052631578947,,1 | |
copa,acc,0.54,0.05009082659620332,0 | |
hellaswag,acc,0.2638916550487951,0.004398404992933851,0 | |
hellaswag,acc_norm,0.26419040031866164,0.004400000822742047,0 | |
piqa,acc,0.5723612622415669,0.011543009623282828,0 | |
piqa,acc_norm,0.5652883569096845,0.011565943814308855,0 | |
rte,acc,0.5306859205776173,0.03003973059219781,0 | |
sciq,acc,0.568,0.01567232023733621,0 | |
sciq,acc_norm,0.546,0.015752210388771844,0 | |
storycloze_2016,acc,0.5189738107963656,0.011554104174019694,0 | |
winogrande,acc,0.5090765588003157,0.014050170094497704,0 | |