lm1-misc-pile
/
574m174b174b
/evaluation
/rankeval
/lm1-574m-174b-results_lm-eval_global_step331103_2023-01-23-19-59-47_1shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.317,0.014721675438880219,0 | |
anli_r2,acc,0.32,0.014758652303574885,0 | |
anli_r3,acc,0.3308333333333333,0.013588208070709,0 | |
arc_challenge,acc,0.2440273037542662,0.012551447627856259,0 | |
arc_challenge,acc_norm,0.2645051194539249,0.01288927294931337,0 | |
arc_easy,acc,0.5500841750841751,0.010208181969301794,0 | |
arc_easy,acc_norm,0.5206228956228957,0.010251052755716122,0 | |
boolq,acc,0.5055045871559632,0.008744525001616656,1 | |
cb,acc,0.48214285714285715,0.06737697508644648,1 | |
cb,f1,0.3421052631578947,,1 | |
copa,acc,0.69,0.04648231987117316,0 | |
hellaswag,acc,0.3437562238597889,0.004739902411944552,0 | |
hellaswag,acc_norm,0.4162517426807409,0.004919289113027514,0 | |
piqa,acc,0.6800870511425462,0.010882873582092056,0 | |
piqa,acc_norm,0.6893362350380848,0.010797078933727671,0 | |
rte,acc,0.5379061371841155,0.030009848912529113,0 | |
sciq,acc,0.882,0.01020686926438179,0 | |
sciq,acc_norm,0.87,0.01064016979249936,0 | |
storycloze_2016,acc,0.6360235168359166,0.011126343044992838,0 | |
winogrande,acc,0.510655090765588,0.014049294536290393,0 | |