lm1-misc-pile
/
574m174b174b
/evaluation
/lm1-574m-174b-results_lm-eval_global_step331103_2023-01-24-13-53-29_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.308,0.014606483127342765,0 | |
anli_r2,acc,0.346,0.015050266127564443,0 | |
anli_r3,acc,0.3233333333333333,0.013508372867300224,0 | |
arc_challenge,acc,0.24658703071672355,0.012595726268790125,0 | |
arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 | |
arc_easy,acc,0.5526094276094277,0.010202832385415646,0 | |
arc_easy,acc_norm,0.5484006734006734,0.010211600726405236,0 | |
boolq,acc,0.4932721712538226,0.008744263273827433,1 | |
cb,acc,0.32142857142857145,0.06297362289056341,1 | |
cb,f1,0.23303167420814477,,1 | |
copa,acc,0.75,0.04351941398892446,0 | |
hellaswag,acc,0.3419637522405895,0.004733980470799225,0 | |
hellaswag,acc_norm,0.4202350129456284,0.004925877705771198,0 | |
piqa,acc,0.676278563656148,0.010916765010708781,0 | |
piqa,acc_norm,0.675734494015234,0.010921539041347985,0 | |
rte,acc,0.5090252707581228,0.030091559826331334,0 | |
sciq,acc,0.891,0.009859828407037188,0 | |
sciq,acc_norm,0.884,0.010131468138757005,0 | |
storycloze_2016,acc,0.6435061464457509,0.011075964871050996,0 | |
winogrande,acc,0.516179952644041,0.0140451261309786,0 | |