lm1-misc-pile
/
280m5b95b9
/280m5b95b9pile
/evaluation
/lm1-280m-5b9-results_lm-eval_global_step11269_2023-01-23-19-58-52.csv
task,metric,value,err,version | |
anli_r1,acc,0.322,0.014782913600996667,0 | |
anli_r2,acc,0.335,0.014933117490932573,0 | |
anli_r3,acc,0.3333333333333333,0.013613950010225606,0 | |
arc_challenge,acc,0.1757679180887372,0.011122850863120485,0 | |
arc_challenge,acc_norm,0.2158703071672355,0.012022975360030663,0 | |
arc_easy,acc,0.380050505050505,0.009960175831493116,0 | |
arc_easy,acc_norm,0.36363636363636365,0.009870849346011767,0 | |
boolq,acc,0.5452599388379205,0.00870915345549762,1 | |
cb,acc,0.4107142857142857,0.0663363415035954,1 | |
cb,f1,0.2751322751322751,,1 | |
copa,acc,0.62,0.048783173121456316,0 | |
hellaswag,acc,0.26837283409679347,0.004422070927212535,0 | |
hellaswag,acc_norm,0.27504481179047996,0.004456242601950632,0 | |
piqa,acc,0.5984766050054406,0.01143732437339785,0 | |
piqa,acc_norm,0.5903155603917302,0.011473932007187613,0 | |
rte,acc,0.5415162454873647,0.02999253538537331,0 | |
sciq,acc,0.707,0.014399942998441271,0 | |
sciq,acc_norm,0.659,0.014998131348402697,0 | |
storycloze_2016,acc,0.5451630144307856,0.011515167912227987,0 | |
winogrande,acc,0.5240726124704025,0.014036189665395132,0 | |