w11wo commited on
Commit
954b42a
·
1 Parent(s): 53ac750

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 3653, "</s>": 3654}
config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
+ "activation_dropout": 0.1,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout": 0.0,
57
+ "hidden_size": 1024,
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 4096,
60
+ "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.0,
62
+ "mask_feature_length": 64,
63
+ "mask_feature_min_masks": 0,
64
+ "mask_feature_prob": 0.25,
65
+ "mask_time_length": 10,
66
+ "mask_time_min_masks": 2,
67
+ "mask_time_prob": 0.75,
68
+ "model_type": "wav2vec2",
69
+ "num_adapter_layers": 3,
70
+ "num_attention_heads": 16,
71
+ "num_codevector_groups": 2,
72
+ "num_codevectors_per_group": 320,
73
+ "num_conv_pos_embedding_groups": 16,
74
+ "num_conv_pos_embeddings": 128,
75
+ "num_feat_extract_layers": 7,
76
+ "num_hidden_layers": 24,
77
+ "num_negatives": 100,
78
+ "output_hidden_size": 1024,
79
+ "pad_token_id": 3652,
80
+ "proj_codevector_dim": 768,
81
+ "tdnn_dilation": [
82
+ 1,
83
+ 2,
84
+ 3,
85
+ 1,
86
+ 1
87
+ ],
88
+ "tdnn_dim": [
89
+ 512,
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 1500
94
+ ],
95
+ "tdnn_kernel": [
96
+ 5,
97
+ 3,
98
+ 3,
99
+ 1,
100
+ 1
101
+ ],
102
+ "torch_dtype": "float32",
103
+ "transformers_version": "4.17.0.dev0",
104
+ "use_weighted_layer_sum": false,
105
+ "vocab_size": 3655,
106
+ "xvector_output_dim": 512
107
+ }
nohup.out ADDED
@@ -0,0 +1,800 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/1 [00:00<?, ?ba/s]
 
1
  0%| | 0/1 [00:00<?, ?ba/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  0%| | 0/3 [00:00<?, ?ba/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  0%| | 0/37300 [00:00<?, ?it/s]
4
  0%| | 1/37300 [00:02<27:52:51, 2.69s/it]
5
  0%| | 2/37300 [00:05<27:44:40, 2.68s/it]
6
  0%| | 3/37300 [00:07<26:20:20, 2.54s/it]
7
  0%| | 4/37300 [00:10<25:14:13, 2.44s/it]
8
  0%| | 5/37300 [00:12<24:34:00, 2.37s/it]
9
  0%| | 6/37300 [00:14<23:51:17, 2.30s/it]
10
  0%| | 7/37300 [00:16<23:12:30, 2.24s/it]
11
  0%| | 8/37300 [00:18<22:34:15, 2.18s/it]
12
  0%| | 9/37300 [00:20<21:43:40, 2.10s/it]
13
  0%| | 10/37300 [00:22<21:09:24, 2.04s/it]
14
  0%| | 11/37300 [00:24<20:23:14, 1.97s/it]
15
  0%| | 12/37300 [00:26<19:48:38, 1.91s/it]
16
  0%| | 13/37300 [00:27<19:19:05, 1.87s/it]
17
  0%| | 14/37300 [00:29<18:58:02, 1.83s/it]
18
  0%| | 15/37300 [00:31<18:35:09, 1.79s/it]
19
  0%| | 16/37300 [00:32<18:20:58, 1.77s/it]
20
  0%| | 17/37300 [00:34<18:01:00, 1.74s/it]
21
  0%| | 18/37300 [00:36<17:45:52, 1.72s/it]
22
  0%| | 19/37300 [00:37<17:25:44, 1.68s/it]
23
  0%| | 20/37300 [00:39<17:09:19, 1.66s/it]
24
  0%| | 21/37300 [00:41<16:53:34, 1.63s/it]
25
  0%| | 22/37300 [00:42<16:33:49, 1.60s/it]
26
  0%| | 23/37300 [00:44<16:24:45, 1.59s/it]
27
  0%| | 24/37300 [00:45<16:23:26, 1.58s/it]
28
  0%| | 25/37300 [00:47<16:09:47, 1.56s/it]
29
  0%| | 26/37300 [00:48<15:54:32, 1.54s/it]
30
  0%| | 27/37300 [00:50<15:33:01, 1.50s/it]
31
  0%| | 28/37300 [00:51<15:34:21, 1.50s/it]
32
  0%| | 29/37300 [00:53<15:14:30, 1.47s/it]
33
  0%| | 30/37300 [00:54<14:53:39, 1.44s/it]
34
  0%| | 31/37300 [00:55<14:28:05, 1.40s/it]
35
  0%| | 32/37300 [00:56<14:10:28, 1.37s/it]
36
  0%| | 33/37300 [00:58<13:55:51, 1.35s/it]
37
  0%| | 34/37300 [00:59<13:37:37, 1.32s/it]
38
  0%| | 35/37300 [01:00<13:25:49, 1.30s/it]
39
  0%| | 36/37300 [01:01<13:05:22, 1.26s/it]
40
  0%| | 37/37300 [01:03<12:58:00, 1.25s/it]
41
  0%| | 38/37300 [01:04<12:54:34, 1.25s/it]
42
  0%| | 39/37300 [01:05<12:45:42, 1.23s/it]
43
  0%| | 40/37300 [01:06<12:35:43, 1.22s/it]
44
  0%| | 41/37300 [01:07<12:29:24, 1.21s/it]
45
  0%| | 42/37300 [01:09<12:24:22, 1.20s/it]
46
  0%| | 43/37300 [01:10<12:09:58, 1.18s/it]
47
  0%| | 44/37300 [01:11<11:54:30, 1.15s/it]
48
  0%| | 45/37300 [01:12<11:37:04, 1.12s/it]
49
  0%| | 46/37300 [01:13<11:22:02, 1.10s/it]
50
  0%| | 47/37300 [01:14<11:10:26, 1.08s/it]
51
  0%| | 48/37300 [01:15<10:59:46, 1.06s/it]
52
  0%| | 49/37300 [01:16<10:45:22, 1.04s/it]
53
  0%| | 50/37300 [01:17<10:30:56, 1.02s/it]
54
  0%| | 51/37300 [01:20<15:17:02, 1.48s/it]
55
  0%| | 52/37300 [01:22<18:47:59, 1.82s/it]
56
  0%| | 53/37300 [01:25<20:33:46, 1.99s/it]
57
  0%| | 54/37300 [01:27<21:31:50, 2.08s/it]
58
  0%| | 55/37300 [01:29<21:58:23, 2.12s/it]
59
  0%| | 56/37300 [01:31<22:08:50, 2.14s/it]
60
  0%| | 57/37300 [01:33<22:03:41, 2.13s/it]
61
  0%| | 58/37300 [01:35<21:56:25, 2.12s/it]
62
  0%| | 59/37300 [01:38<21:45:05, 2.10s/it]
63
  0%| | 60/37300 [01:39<21:12:29, 2.05s/it]
64
  0%| | 61/37300 [01:41<20:39:19, 2.00s/it]
65
  0%| | 62/37300 [01:43<20:08:46, 1.95s/it]
66
  0%| | 63/37300 [01:45<19:34:13, 1.89s/it]
67
  0%| | 64/37300 [01:47<19:05:14, 1.85s/it]
68
  0%| | 65/37300 [01:48<18:38:25, 1.80s/it]
69
  0%| | 66/37300 [01:50<18:18:23, 1.77s/it]
70
  0%| | 67/37300 [01:52<17:59:04, 1.74s/it]
71
  0%| | 68/37300 [01:53<17:39:44, 1.71s/it]
72
  0%| | 69/37300 [01:55<17:23:14, 1.68s/it]
73
  0%| | 70/37300 [01:57<17:14:44, 1.67s/it]
74
  0%| | 71/37300 [01:58<16:47:48, 1.62s/it]
75
  0%| | 72/37300 [02:00<16:31:57, 1.60s/it]
76
  0%| | 73/37300 [02:01<16:13:28, 1.57s/it]
77
  0%| | 74/37300 [02:03<16:03:25, 1.55s/it]
78
  0%| | 75/37300 [02:04<15:45:47, 1.52s/it]
79
  0%| | 76/37300 [02:06<15:34:44, 1.51s/it]
80
  0%| | 77/37300 [02:07<15:33:55, 1.51s/it]
81
  0%| | 78/37300 [02:09<15:20:42, 1.48s/it]
82
  0%| | 79/37300 [02:10<15:07:12, 1.46s/it]
83
  0%| | 80/37300 [02:11<14:56:02, 1.44s/it]
84
  0%| | 81/37300 [02:13<14:43:08, 1.42s/it]
85
  0%| | 82/37300 [02:14<14:20:35, 1.39s/it]
86
  0%| | 83/37300 [02:15<13:58:56, 1.35s/it]
87
  0%| | 84/37300 [02:17<13:46:27, 1.33s/it]
88
  0%| | 85/37300 [02:18<13:32:48, 1.31s/it]
89
  0%| | 86/37300 [02:19<13:15:56, 1.28s/it]
90
  0%| | 87/37300 [02:20<13:06:35, 1.27s/it]
91
  0%| | 88/37300 [02:22<13:08:02, 1.27s/it]
92
  0%| | 89/37300 [02:23<13:17:08, 1.29s/it]
93
  0%| | 90/37300 [02:24<13:10:15, 1.27s/it]
94
  0%| | 91/37300 [02:25<12:51:09, 1.24s/it]
95
  0%| | 92/37300 [02:26<12:38:10, 1.22s/it]
96
  0%| | 93/37300 [02:28<12:31:19, 1.21s/it]
97
  0%| | 94/37300 [02:29<12:20:50, 1.19s/it]
98
  0%| | 95/37300 [02:30<12:04:08, 1.17s/it]
99
  0%| | 96/37300 [02:31<11:58:35, 1.16s/it]
100
  0%| | 97/37300 [02:32<11:51:50, 1.15s/it]
101
  0%| | 98/37300 [02:33<11:57:28, 1.16s/it]
102
  0%| | 99/37300 [02:34<11:27:51, 1.11s/it]
103
  0%| | 100/37300 [02:35<10:59:19, 1.06s/it]
104
 
105
  0%| | 100/37300 [02:35<10:59:19, 1.06s/it]
106
  0%| | 101/37300 [02:38<15:45:32, 1.53s/it]
107
  0%| | 102/37300 [02:40<18:45:01, 1.81s/it]
108
  0%| | 103/37300 [02:43<20:54:43, 2.02s/it]
109
  0%| | 104/37300 [02:45<21:57:30, 2.13s/it]
110
  0%| | 105/37300 [02:48<22:31:20, 2.18s/it]
111
  0%| | 106/37300 [02:50<22:43:03, 2.20s/it]
112
  0%| | 107/37300 [02:52<22:35:22, 2.19s/it]
113
  0%| | 108/37300 [02:54<22:28:01, 2.17s/it]
114
  0%| | 109/37300 [02:56<22:07:51, 2.14s/it]
115
  0%| | 110/37300 [02:58<21:38:46, 2.10s/it]
116
  0%| | 111/37300 [03:00<20:54:47, 2.02s/it]
117
  0%| | 112/37300 [03:02<20:20:21, 1.97s/it]
118
  0%| | 113/37300 [03:04<19:49:19, 1.92s/it]
119
  0%| | 114/37300 [03:05<19:12:13, 1.86s/it]
120
  0%| | 115/37300 [03:07<18:44:03, 1.81s/it]
121
  0%| | 116/37300 [03:09<18:22:26, 1.78s/it]
122
  0%| | 117/37300 [03:11<18:04:39, 1.75s/it]
123
  0%| | 118/37300 [03:12<17:47:09, 1.72s/it]
124
  0%| | 119/37300 [03:14<17:27:27, 1.69s/it]
125
  0%| | 120/37300 [03:15<17:20:24, 1.68s/it]
126
  0%| | 121/37300 [03:17<16:59:01, 1.64s/it]
127
  0%| | 122/37300 [03:19<16:42:22, 1.62s/it]
128
  0%| | 123/37300 [03:20<16:21:49, 1.58s/it]
129
  0%| | 124/37300 [03:22<16:05:35, 1.56s/it]
130
  0%| | 125/37300 [03:23<15:48:51, 1.53s/it]
131
  0%| | 126/37300 [03:24<15:35:04, 1.51s/it]
132
  0%| | 127/37300 [03:26<15:18:16, 1.48s/it]
133
  0%| | 128/37300 [03:27<15:03:16, 1.46s/it]
134
  0%| | 129/37300 [03:29<14:59:34, 1.45s/it]
135
  0%| | 130/37300 [03:30<14:46:24, 1.43s/it]
136
  0%| | 131/37300 [03:31<14:31:11, 1.41s/it]
137
  0%| | 132/37300 [03:33<14:15:12, 1.38s/it]
138
  0%| | 133/37300 [03:34<13:58:47, 1.35s/it]
139
  0%| | 134/37300 [03:35<13:42:15, 1.33s/it]
140
  0%| | 135/37300 [03:37<13:30:43, 1.31s/it]
141
  0%| | 136/37300 [03:38<13:19:58, 1.29s/it]
142
  0%| | 137/37300 [03:39<13:06:24, 1.27s/it]
143
  0%| | 138/37300 [03:40<12:58:20, 1.26s/it]
144
  0%| | 139/37300 [03:42<12:48:51, 1.24s/it]
145
  0%| | 140/37300 [03:43<12:39:14, 1.23s/it]
146
  0%| | 141/37300 [03:44<12:31:25, 1.21s/it]
147
  0%| | 142/37300 [03:45<12:21:48, 1.20s/it]
148
  0%| | 143/37300 [03:46<12:11:52, 1.18s/it]
149
  0%| | 144/37300 [03:47<12:02:04, 1.17s/it]
150
  0%| | 145/37300 [03:48<11:45:59, 1.14s/it]
151
  0%| | 146/37300 [03:49<11:28:24, 1.11s/it]
152
  0%| | 147/37300 [03:50<11:14:51, 1.09s/it]
153
  0%| | 148/37300 [03:52<11:00:52, 1.07s/it]
154
  0%| | 149/37300 [03:53<10:48:50, 1.05s/it]
155
  0%| | 150/37300 [03:54<10:38:20, 1.03s/it]
156
  0%| | 151/37300 [03:56<15:47:29, 1.53s/it]
157
  0%| | 152/37300 [03:59<18:55:25, 1.83s/it]
158
  0%| | 153/37300 [04:01<20:26:23, 1.98s/it]
159
  0%| | 154/37300 [04:03<21:20:13, 2.07s/it]
160
  0%| | 155/37300 [04:06<21:41:01, 2.10s/it]
161
  0%| | 156/37300 [04:08<21:49:57, 2.12s/it]
162
  0%| | 157/37300 [04:10<21:52:03, 2.12s/it]
163
  0%| | 158/37300 [04:12<21:41:55, 2.10s/it]
164
  0%| | 159/37300 [04:14<21:18:45, 2.07s/it]
165
  0%| | 160/37300 [04:16<20:51:07, 2.02s/it]
166
  0%| | 161/37300 [04:18<20:24:13, 1.98s/it]
167
  0%| | 162/37300 [04:19<20:01:35, 1.94s/it]
168
  0%| | 163/37300 [04:21<19:33:34, 1.90s/it]
169
  0%| | 164/37300 [04:23<19:05:20, 1.85s/it]
170
  0%| | 165/37300 [04:25<18:42:30, 1.81s/it]
171
  0%| | 166/37300 [04:26<18:20:56, 1.78s/it]
172
  0%| | 167/37300 [04:28<17:59:54, 1.74s/it]
173
  0%| | 168/37300 [04:30<17:40:57, 1.71s/it]
174
  0%| | 169/37300 [04:31<17:23:10, 1.69s/it]
175
  0%| | 170/37300 [04:33<17:06:21, 1.66s/it]
176
  0%| | 171/37300 [04:35<16:55:37, 1.64s/it]
177
  0%| | 172/37300 [04:36<16:37:58, 1.61s/it]
178
  0%| | 173/37300 [04:38<16:18:04, 1.58s/it]
179
  0%| | 174/37300 [04:39<16:04:23, 1.56s/it]
180
  0%| | 175/37300 [04:41<15:48:34, 1.53s/it]
181
  0%| | 176/37300 [04:42<15:34:02, 1.51s/it]
182
  0%| | 177/37300 [04:43<15:21:04, 1.49s/it]
183
  0%| | 178/37300 [04:45<15:07:04, 1.47s/it]
184
  0%| | 179/37300 [04:46<14:52:21, 1.44s/it]
185
  0%| | 180/37300 [04:48<14:37:16, 1.42s/it]
186
  0%| | 181/37300 [04:49<14:17:20, 1.39s/it]
187
  0%| | 182/37300 [04:50<14:02:50, 1.36s/it]
188
  0%| | 183/37300 [04:52<13:46:55, 1.34s/it]
189
  0%| | 184/37300 [04:53<13:31:47, 1.31s/it]
190
  0%| | 185/37300 [04:54<13:19:05, 1.29s/it]
191
  0%| | 186/37300 [04:55<13:04:14, 1.27s/it]
192
  1%| | 187/37300 [04:56<12:56:13, 1.25s/it]
193
  1%| | 188/37300 [04:58<12:50:37, 1.25s/it]
194
  1%| | 189/37300 [04:59<12:44:06, 1.24s/it]
195
  1%| | 190/37300 [05:00<12:32:33, 1.22s/it]
196
  1%| | 191/37300 [05:01<12:26:03, 1.21s/it]
197
  1%| | 192/37300 [05:02<12:23:02, 1.20s/it]
198
  1%| | 193/37300 [05:04<12:13:22, 1.19s/it]
199
  1%| | 194/37300 [05:05<11:58:56, 1.16s/it]
200
  1%| | 195/37300 [05:06<11:38:50, 1.13s/it]
201
  1%| | 196/37300 [05:07<11:23:43, 1.11s/it]
202
  1%| | 197/37300 [05:08<11:11:30, 1.09s/it]
203
  1%| | 198/37300 [05:09<10:57:12, 1.06s/it]
204
  1%| | 199/37300 [05:10<10:44:39, 1.04s/it]
205
  1%| | 200/37300 [05:11<10:33:35, 1.02s/it]
206
 
207
  1%| | 200/37300 [05:11<10:33:35, 1.02s/it]
208
  1%| | 201/37300 [05:13<15:31:47, 1.51s/it]
209
  1%| | 202/37300 [05:16<18:41:01, 1.81s/it]
210
  1%| | 203/37300 [05:18<20:25:35, 1.98s/it]
211
  1%| | 204/37300 [05:21<21:20:46, 2.07s/it]
212
  1%| | 205/37300 [05:23<21:47:59, 2.12s/it]
213
  1%| | 206/37300 [05:25<21:57:05, 2.13s/it]
214
  1%| | 207/37300 [05:27<21:46:58, 2.11s/it]
215
  1%| | 208/37300 [05:29<21:27:39, 2.08s/it]
216
  1%| | 209/37300 [05:31<21:09:13, 2.05s/it]
217
  1%| | 210/37300 [05:33<20:38:21, 2.00s/it]
218
  1%| | 211/37300 [05:35<20:06:57, 1.95s/it]
219
  1%| | 212/37300 [05:37<19:39:36, 1.91s/it]
220
  1%| | 213/37300 [05:38<19:13:12, 1.87s/it]
221
  1%| | 214/37300 [05:40<18:48:22, 1.83s/it]
222
  1%| | 215/37300 [05:42<18:28:07, 1.79s/it]
223
  1%| | 216/37300 [05:44<18:11:30, 1.77s/it]
224
  1%| | 217/37300 [05:45<17:56:19, 1.74s/it]
225
  1%| | 218/37300 [05:47<17:38:08, 1.71s/it]
226
  1%| | 219/37300 [05:49<17:21:49, 1.69s/it]
227
  1%| | 220/37300 [05:50<17:03:17, 1.66s/it]
228
  1%| | 221/37300 [05:52<16:48:03, 1.63s/it]
229
  1%| | 222/37300 [05:53<16:31:37, 1.60s/it]
230
  1%| | 223/37300 [05:55<16:13:34, 1.58s/it]
231
  1%| | 224/37300 [05:56<15:51:53, 1.54s/it]
232
  1%| | 225/37300 [05:58<15:38:33, 1.52s/it]
233
  1%| | 226/37300 [05:59<15:27:25, 1.50s/it]
234
  1%| | 227/37300 [06:01<15:12:26, 1.48s/it]
235
  1%| | 228/37300 [06:02<14:57:47, 1.45s/it]
236
  1%| | 229/37300 [06:03<14:52:51, 1.45s/it]
237
  1%| | 230/37300 [06:05<14:39:29, 1.42s/it]
238
  1%| | 231/37300 [06:06<14:21:08, 1.39s/it]
239
  1%| | 232/37300 [06:07<14:05:47, 1.37s/it]
240
  1%| | 233/37300 [06:09<13:51:04, 1.35s/it]
241
  1%| | 234/37300 [06:10<13:37:55, 1.32s/it]
242
  1%| | 235/37300 [06:11<13:24:52, 1.30s/it]
243
  1%| | 236/37300 [06:12<13:16:21, 1.29s/it]
244
  1%| | 237/37300 [06:14<13:04:33, 1.27s/it]
245
  1%| | 238/37300 [06:15<12:51:23, 1.25s/it]
246
  1%| | 239/37300 [06:16<12:39:44, 1.23s/it]
247
  1%| | 240/37300 [06:17<12:28:34, 1.21s/it]
248
  1%| | 241/37300 [06:18<12:18:29, 1.20s/it]
249
  1%| | 242/37300 [06:20<12:10:01, 1.18s/it]
250
  1%| | 243/37300 [06:21<11:57:15, 1.16s/it]
251
  1%| | 244/37300 [06:22<11:42:43, 1.14s/it]
252
  1%| | 245/37300 [06:23<11:29:25, 1.12s/it]
253
  1%| | 246/37300 [06:24<11:16:00, 1.09s/it]
254
  1%| | 247/37300 [06:25<11:04:52, 1.08s/it]
255
  1%| | 248/37300 [06:26<10:52:38, 1.06s/it]
256
  1%| | 249/37300 [06:27<10:42:50, 1.04s/it]
257
  1%| | 250/37300 [06:28<10:29:23, 1.02s/it]
258
  1%| | 251/37300 [06:31<15:29:58, 1.51s/it]
259
  1%| | 252/37300 [06:33<18:28:44, 1.80s/it]
260
  1%| | 253/37300 [06:35<20:06:55, 1.95s/it]
261
  1%| | 254/37300 [06:38<21:04:00, 2.05s/it]
262
  1%| | 255/37300 [06:40<21:34:26, 2.10s/it]
263
  1%| | 256/37300 [06:42<21:45:33, 2.11s/it]
264
  1%| | 257/37300 [06:44<21:38:14, 2.10s/it]
265
  1%| | 258/37300 [06:46<21:19:51, 2.07s/it]
266
  1%| | 259/37300 [06:48<20:58:49, 2.04s/it]
267
  1%| | 260/37300 [06:50<20:38:07, 2.01s/it]
268
  1%| | 261/37300 [06:52<20:03:34, 1.95s/it]
269
  1%| | 262/37300 [06:54<19:34:28, 1.90s/it]
270
  1%| | 263/37300 [06:55<19:09:15, 1.86s/it]
271
  1%| | 264/37300 [06:57<18:43:50, 1.82s/it]
272
  1%| | 265/37300 [06:59<18:23:06, 1.79s/it]
273
  1%| | 266/37300 [07:00<18:02:27, 1.75s/it]
274
  1%| | 267/37300 [07:02<17:41:46, 1.72s/it]
275
  1%| | 268/37300 [07:04<17:21:27, 1.69s/it]
276
  1%| | 269/37300 [07:05<17:02:20, 1.66s/it]
277
  1%| | 270/37300 [07:07<16:41:34, 1.62s/it]
278
  1%| | 271/37300 [07:08<16:24:24, 1.60s/it]
279
  1%| | 272/37300 [07:10<16:10:44, 1.57s/it]
280
  1%| | 273/37300 [07:11<15:58:13, 1.55s/it]
281
  1%| | 274/37300 [07:13<15:48:52, 1.54s/it]
282
  1%| | 275/37300 [07:14<15:25:32, 1.50s/it]
283
  1%| | 276/37300 [07:16<15:19:58, 1.49s/it]
284
  1%| | 277/37300 [07:17<15:09:44, 1.47s/it]
285
  1%| | 278/37300 [07:19<14:54:52, 1.45s/it]
286
  1%| | 279/37300 [07:20<14:38:45, 1.42s/it]
287
  1%| | 280/37300 [07:21<14:25:20, 1.40s/it]
288
  1%| | 281/37300 [07:23<14:02:50, 1.37s/it]
289
  1%| | 282/37300 [07:24<13:51:04, 1.35s/it]
290
  1%| | 283/37300 [07:25<13:40:56, 1.33s/it]
291
  1%| | 284/37300 [07:26<13:29:15, 1.31s/it]
292
  1%| | 285/37300 [07:28<13:18:39, 1.29s/it]
293
  1%| | 286/37300 [07:29<13:08:10, 1.28s/it]
294
  1%| | 287/37300 [07:30<12:59:39, 1.26s/it]
295
  1%| | 288/37300 [07:31<12:52:29, 1.25s/it]
296
  1%| | 289/37300 [07:33<12:45:20, 1.24s/it]
297
  1%| | 290/37300 [07:34<12:36:16, 1.23s/it]
298
  1%| | 291/37300 [07:35<12:24:28, 1.21s/it]
299
  1%| | 292/37300 [07:36<12:12:44, 1.19s/it]
300
  1%| | 293/37300 [07:37<11:58:28, 1.16s/it]
301
  1%| | 294/37300 [07:38<11:40:55, 1.14s/it]
302
  1%| | 295/37300 [07:39<11:26:52, 1.11s/it]
303
  1%| | 296/37300 [07:40<11:13:02, 1.09s/it]
304
  1%| | 297/37300 [07:41<11:04:15, 1.08s/it]
305
  1%| | 298/37300 [07:42<11:03:23, 1.08s/it]
306
  1%| | 299/37300 [07:44<11:00:40, 1.07s/it]
307
  1%| | 300/37300 [07:45<10:55:45, 1.06s/it]
308
 
309
  1%| | 300/37300 [07:45<10:55:45, 1.06s/it]
310
  1%| | 301/37300 [07:47<16:10:58, 1.57s/it]
311
  1%| | 302/37300 [07:50<19:12:13, 1.87s/it]
312
  1%| | 303/37300 [07:52<20:47:24, 2.02s/it]
313
  1%| | 304/37300 [07:55<21:33:25, 2.10s/it]
314
  1%| | 305/37300 [07:57<21:50:10, 2.12s/it]
315
  1%| | 306/37300 [07:59<21:47:30, 2.12s/it]
316
  1%| | 307/37300 [08:01<21:32:10, 2.10s/it]
317
  1%| | 308/37300 [08:03<21:16:45, 2.07s/it]
318
  1%| | 309/37300 [08:05<20:45:55, 2.02s/it]
319
  1%| | 310/37300 [08:07<20:20:14, 1.98s/it]
320
  1%| | 311/37300 [08:09<19:54:30, 1.94s/it]
321
  1%| | 312/37300 [08:10<19:26:14, 1.89s/it]
322
  1%| | 313/37300 [08:12<19:03:03, 1.85s/it]
323
  1%| | 314/37300 [08:14<18:36:53, 1.81s/it]
324
  1%| | 315/37300 [08:15<18:18:38, 1.78s/it]
325
  1%| | 316/37300 [08:17<17:59:06, 1.75s/it]
326
  1%| | 317/37300 [08:19<17:42:03, 1.72s/it]
327
  1%| | 318/37300 [08:20<17:25:54, 1.70s/it]
328
  1%| | 319/37300 [08:22<17:09:21, 1.67s/it]
329
  1%| | 320/37300 [08:24<16:58:56, 1.65s/it]
330
  1%| | 321/37300 [08:25<16:45:02, 1.63s/it]
331
  1%| | 322/37300 [08:27<16:20:16, 1.59s/it]
332
  1%| | 323/37300 [08:28<15:56:38, 1.55s/it]
333
  1%| | 324/37300 [08:30<15:37:13, 1.52s/it]
334
  1%| | 325/37300 [08:31<15:21:09, 1.49s/it]
335
  1%| | 326/37300 [08:33<15:06:23, 1.47s/it]
336
  1%| | 327/37300 [08:34<14:57:39, 1.46s/it]
337
  1%| | 328/37300 [08:35<14:49:20, 1.44s/it]
338
  1%| | 329/37300 [08:37<14:39:49, 1.43s/it]
339
  1%| | 330/37300 [08:38<14:27:18, 1.41s/it]
340
  1%| | 331/37300 [08:39<13:59:32, 1.36s/it]
341
  1%| | 332/37300 [08:41<13:33:00, 1.32s/it]
342
  1%| | 333/37300 [08:42<13:10:45, 1.28s/it]
343
  1%| | 334/37300 [08:43<12:51:01, 1.25s/it]
344
  1%| | 335/37300 [08:44<12:44:14, 1.24s/it]
345
  1%| | 336/37300 [08:45<12:40:36, 1.23s/it]
346
  1%| | 337/37300 [08:47<12:35:46, 1.23s/it]
347
  1%| | 338/37300 [08:48<12:31:18, 1.22s/it]
348
  1%| | 339/37300 [08:49<12:28:38, 1.22s/it]
349
  1%| | 340/37300 [08:50<12:20:34, 1.20s/it]
350
  1%| | 341/37300 [08:51<12:14:34, 1.19s/it]
351
  1%| | 342/37300 [08:53<12:11:17, 1.19s/it]
352
  1%| | 343/37300 [08:54<12:02:42, 1.17s/it]
353
  1%| | 344/37300 [08:55<11:49:23, 1.15s/it]
354
  1%| | 345/37300 [08:56<11:34:14, 1.13s/it]
355
  1%| | 346/37300 [08:57<11:24:38, 1.11s/it]
356
  1%| | 347/37300 [08:58<11:17:38, 1.10s/it]
357
  1%| | 348/37300 [08:59<11:07:13, 1.08s/it]
358
  1%| | 349/37300 [09:00<11:01:01, 1.07s/it]
359
  1%| | 350/37300 [09:01<10:51:43, 1.06s/it]
360
  1%| | 351/37300 [09:04<15:53:24, 1.55s/it]
361
  1%| | 352/37300 [09:06<18:25:47, 1.80s/it]
362
  1%| | 353/37300 [09:08<19:34:30, 1.91s/it]
363
  1%| | 354/37300 [09:10<19:51:19, 1.93s/it]
364
  1%| | 355/37300 [09:12<19:55:51, 1.94s/it]
365
  1%| | 356/37300 [09:14<19:22:22, 1.89s/it]
366
  1%| | 357/37300 [09:16<18:51:28, 1.84s/it]
367
  1%| | 358/37300 [09:17<18:19:14, 1.79s/it]
368
  1%| | 359/37300 [09:19<17:44:58, 1.73s/it]
369
  1%| | 360/37300 [09:21<17:08:49, 1.67s/it]
370
  1%| | 361/37300 [09:22<16:34:13, 1.61s/it]
371
  1%| | 362/37300 [09:24<16:03:16, 1.56s/it]
372
  1%| | 363/37300 [09:25<15:38:10, 1.52s/it]
373
  1%| | 364/37300 [09:26<15:09:52, 1.48s/it]
374
  1%| | 365/37300 [09:28<14:35:35, 1.42s/it]
375
  1%| | 366/37300 [09:29<14:03:17, 1.37s/it]
376
  1%| | 367/37300 [09:30<13:37:19, 1.33s/it]
377
  1%| | 368/37300 [09:31<13:13:03, 1.29s/it]
378
  1%| | 369/37300 [09:32<12:51:08, 1.25s/it]
379
  1%| | 370/37300 [09:34<12:38:03, 1.23s/it]
380
  1%| | 371/37300 [09:35<12:28:26, 1.22s/it]
381
  1%| | 372/37300 [09:36<12:14:34, 1.19s/it]
382
  1%| | 373/37300 [09:37<12:02:00, 1.17s/it]
383
  1%| | 374/37300 [09:40<18:23:02, 1.79s/it]
384
  1%| | 375/37300 [09:43<21:07:36, 2.06s/it]
385
  1%| | 376/37300 [09:46<22:35:22, 2.20s/it]
386
  1%| | 377/37300 [09:48<23:14:10, 2.27s/it]
387
  1%| | 378/37300 [09:50<23:30:37, 2.29s/it]
388
  1%| | 379/37300 [09:53<23:30:25, 2.29s/it]
389
  1%| | 380/37300 [09:55<23:05:48, 2.25s/it]
390
  1%| | 381/37300 [09:57<22:25:43, 2.19s/it]
391
  1%| | 382/37300 [09:59<21:33:51, 2.10s/it]
392
  1%| | 383/37300 [10:01<20:54:02, 2.04s/it]
393
  1%| | 384/37300 [10:02<20:13:07, 1.97s/it]
394
  1%| | 385/37300 [10:04<19:38:56, 1.92s/it]
395
  1%| | 386/37300 [10:06<19:08:45, 1.87s/it]
396
  1%| | 387/37300 [10:08<18:41:06, 1.82s/it]
397
  1%| | 388/37300 [10:09<18:14:14, 1.78s/it]
398
  1%| | 389/37300 [10:11<17:53:33, 1.75s/it]
399
  1%| | 390/37300 [10:13<17:33:59, 1.71s/it]
400
  1%| | 391/37300 [10:14<17:23:13, 1.70s/it]
401
  1%| | 392/37300 [10:16<17:04:20, 1.67s/it]
402
  1%| | 393/37300 [10:17<16:42:16, 1.63s/it]
403
  1%| | 394/37300 [10:19<16:25:13, 1.60s/it]
404
  1%| | 395/37300 [10:21<16:16:03, 1.59s/it]
405
  1%| | 396/37300 [10:22<15:51:12, 1.55s/it]
406
  1%| | 397/37300 [10:23<15:31:56, 1.52s/it]
407
  1%| | 398/37300 [10:25<15:21:09, 1.50s/it]
408
  1%| | 399/37300 [10:26<15:11:11, 1.48s/it]
409
  1%| | 400/37300 [10:28<15:00:01, 1.46s/it]
410
 
411
  1%| | 400/37300 [10:28<15:00:01, 1.46s/it]
412
  1%| | 401/37300 [10:29<14:47:32, 1.44s/it]
413
  1%| | 402/37300 [10:31<14:38:28, 1.43s/it]
414
  1%| | 403/37300 [10:32<14:24:39, 1.41s/it]
415
  1%| | 404/37300 [10:33<14:03:43, 1.37s/it]
416
  1%| | 405/37300 [10:34<13:52:50, 1.35s/it]
417
  1%| | 406/37300 [10:36<13:39:16, 1.33s/it]
418
  1%| | 407/37300 [10:37<13:23:59, 1.31s/it]
419
  1%| | 408/37300 [10:38<13:13:51, 1.29s/it]
420
  1%| | 409/37300 [10:39<13:00:44, 1.27s/it]
421
  1%| | 410/37300 [10:41<13:07:06, 1.28s/it]
422
  1%| | 411/37300 [10:42<13:04:49, 1.28s/it]
423
  1%| | 412/37300 [10:43<12:59:21, 1.27s/it]
424
  1%| | 413/37300 [10:45<12:54:01, 1.26s/it]
425
  1%| | 414/37300 [10:46<12:36:07, 1.23s/it]
426
  1%| | 415/37300 [10:47<12:23:33, 1.21s/it]
427
  1%| | 416/37300 [10:48<12:13:15, 1.19s/it]
428
  1%| | 417/37300 [10:49<11:52:20, 1.16s/it]
429
  1%| | 418/37300 [10:50<11:43:37, 1.14s/it]
430
  1%| | 419/37300 [10:51<11:26:02, 1.12s/it]
431
  1%| | 420/37300 [10:52<11:09:22, 1.09s/it]
432
  1%| | 421/37300 [10:53<10:54:00, 1.06s/it]
433
  1%| | 422/37300 [10:54<10:38:28, 1.04s/it]
434
  1%| | 423/37300 [10:55<10:23:41, 1.01s/it]
435
  1%| | 424/37300 [10:58<15:23:42, 1.50s/it]
436
  1%| | 425/37300 [11:00<18:33:47, 1.81s/it]
437
  1%| | 426/37300 [11:03<20:13:15, 1.97s/it]
438
  1%| | 427/37300 [11:05<21:18:42, 2.08s/it]
439
  1%| | 428/37300 [11:07<21:46:07, 2.13s/it]
440
  1%| | 429/37300 [11:09<21:49:41, 2.13s/it]
441
  1%| | 430/37300 [11:12<21:44:41, 2.12s/it]
442
  1%| | 431/37300 [11:14<21:30:55, 2.10s/it]
443
  1%| | 432/37300 [11:16<21:14:23, 2.07s/it]
444
  1%| | 433/37300 [11:18<20:43:34, 2.02s/it]
445
  1%| | 434/37300 [11:19<20:18:57, 1.98s/it]
446
  1%| | 435/37300 [11:21<19:47:27, 1.93s/it]
447
  1%| | 436/37300 [11:23<19:22:53, 1.89s/it]
448
  1%| | 437/37300 [11:25<19:00:48, 1.86s/it]
449
  1%| | 438/37300 [11:27<18:51:44, 1.84s/it]
450
  1%| | 439/37300 [11:28<18:27:19, 1.80s/it]
451
  1%| | 440/37300 [11:30<18:08:23, 1.77s/it]
452
  1%| | 441/37300 [11:32<17:47:58, 1.74s/it]
453
  1%| | 442/37300 [11:33<17:31:46, 1.71s/it]
454
  1%| | 443/37300 [11:35<17:14:11, 1.68s/it]
455
  1%| | 444/37300 [11:37<16:54:51, 1.65s/it]
456
  1%| | 445/37300 [11:38<16:40:53, 1.63s/it]
457
  1%| | 446/37300 [11:40<16:22:57, 1.60s/it]
458
  1%| | 447/37300 [11:41<16:05:18, 1.57s/it]
459
  1%| | 448/37300 [11:43<15:49:17, 1.55s/it]
460
  1%| | 449/37300 [11:44<15:38:40, 1.53s/it]
461
  1%| | 450/37300 [11:46<15:24:17, 1.50s/it]
462
  1%| | 451/37300 [11:47<15:09:51, 1.48s/it]
463
  1%| | 452/37300 [11:48<14:56:31, 1.46s/it]
464
  1%| | 453/37300 [11:50<14:41:32, 1.44s/it]
465
  1%| | 454/37300 [11:51<14:31:58, 1.42s/it]
466
  1%| | 455/37300 [11:52<14:12:11, 1.39s/it]
467
  1%| | 456/37300 [11:54<13:56:00, 1.36s/it]
468
  1%| | 457/37300 [11:55<13:37:46, 1.33s/it]
469
  1%| | 458/37300 [11:56<13:19:43, 1.30s/it]
470
  1%| | 459/37300 [11:58<13:08:43, 1.28s/it]
471
  1%| | 460/37300 [11:59<12:55:34, 1.26s/it]
472
  1%| | 461/37300 [12:00<12:50:36, 1.26s/it]
473
  1%| | 462/37300 [12:01<12:42:06, 1.24s/it]
474
  1%| | 463/37300 [12:02<12:31:09, 1.22s/it]
475
  1%| | 464/37300 [12:04<12:19:40, 1.20s/it]
476
  1%| | 465/37300 [12:05<12:09:04, 1.19s/it]
477
  1%| | 466/37300 [12:06<11:57:00, 1.17s/it]
478
  1%|▏ | 467/37300 [12:07<11:40:36, 1.14s/it]
479
  1%|▏ | 468/37300 [12:08<11:26:55, 1.12s/it]
480
  1%|▏ | 469/37300 [12:09<11:13:20, 1.10s/it]
481
  1%|▏ | 470/37300 [12:10<11:00:30, 1.08s/it]
482
  1%|▏ | 471/37300 [12:11<10:49:08, 1.06s/it]
483
  1%|▏ | 472/37300 [12:12<10:36:47, 1.04s/it]
484
  1%|▏ | 473/37300 [12:13<10:23:46, 1.02s/it]
485
  1%|▏ | 474/37300 [12:16<15:07:13, 1.48s/it]
486
  1%|▏ | 475/37300 [12:18<18:21:41, 1.80s/it]
487
  1%|▏ | 476/37300 [12:20<20:09:53, 1.97s/it]
488
  1%|▏ | 477/37300 [12:23<21:01:58, 2.06s/it]
489
  1%|▏ | 478/37300 [12:25<21:27:36, 2.10s/it]
490
  1%|▏ | 479/37300 [12:27<21:35:40, 2.11s/it]
491
  1%|▏ | 480/37300 [12:29<21:23:24, 2.09s/it]
492
  1%|▏ | 481/37300 [12:31<21:10:37, 2.07s/it]
493
  1%|▏ | 482/37300 [12:33<20:46:29, 2.03s/it]
494
  1%|▏ | 483/37300 [12:35<20:20:44, 1.99s/it]
495
  1%|▏ | 484/37300 [12:37<19:52:00, 1.94s/it]
496
  1%|▏ | 485/37300 [12:39<19:23:01, 1.90s/it]
497
  1%|▏ | 486/37300 [12:40<19:02:47, 1.86s/it]
498
  1%|▏ | 487/37300 [12:42<18:35:06, 1.82s/it]
499
  1%|▏ | 488/37300 [12:44<18:10:05, 1.78s/it]
500
  1%|▏ | 489/37300 [12:45<17:56:05, 1.75s/it]
501
  1%|▏ | 490/37300 [12:47<17:39:09, 1.73s/it]
502
  1%|▏ | 491/37300 [12:49<17:27:46, 1.71s/it]
503
  1%|▏ | 492/37300 [12:50<17:15:08, 1.69s/it]
504
  1%|▏ | 493/37300 [12:52<16:57:58, 1.66s/it]
505
  1%|▏ | 494/37300 [12:54<16:42:08, 1.63s/it]
506
  1%|▏ | 495/37300 [12:55<16:29:22, 1.61s/it]
507
  1%|▏ | 496/37300 [12:57<16:13:01, 1.59s/it]
508
  1%|▏ | 497/37300 [12:58<15:59:08, 1.56s/it]
509
  1%|▏ | 498/37300 [13:00<15:37:34, 1.53s/it]
510
  1%|▏ | 499/37300 [13:01<15:14:28, 1.49s/it]
511
  1%|▏ | 500/37300 [13:02<14:56:05, 1.46s/it]
512
 
513
  1%|▏ | 500/37300 [13:02<14:56:05, 1.46s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
 
 
 
 
 
 
 
 
 
 
514
  0%| | 0/288 [00:00<?, ?it/s]
 
515
  1%| | 2/288 [00:00<00:39, 7.26it/s]
 
516
  1%| | 3/288 [00:00<01:06, 4.26it/s]
 
517
  1%|▏ | 4/288 [00:00<01:15, 3.75it/s]
 
518
  2%|▏ | 5/288 [00:01<01:13, 3.84it/s]
 
519
  2%|▏ | 6/288 [00:01<01:15, 3.74it/s]
 
520
  2%|▏ | 7/288 [00:01<01:18, 3.57it/s]
 
521
  3%|▎ | 8/288 [00:02<01:15, 3.72it/s]
 
522
  3%|▎ | 9/288 [00:02<01:19, 3.53it/s]
 
523
  3%|▎ | 10/288 [00:02<01:27, 3.17it/s]
 
524
  4%|▍ | 11/288 [00:03<01:29, 3.10it/s]
 
525
  4%|▍ | 12/288 [00:03<01:24, 3.28it/s]
 
526
  5%|▍ | 13/288 [00:03<01:25, 3.21it/s]
 
527
  5%|▍ | 14/288 [00:03<01:24, 3.23it/s]
 
528
  5%|▌ | 15/288 [00:04<01:21, 3.36it/s]
 
529
  6%|▌ | 16/288 [00:04<01:22, 3.30it/s]
 
530
  6%|▌ | 17/288 [00:04<01:21, 3.34it/s]
 
531
  6%|▋ | 18/288 [00:05<01:23, 3.21it/s]
 
532
  7%|▋ | 19/288 [00:05<01:26, 3.12it/s]
 
533
  7%|▋ | 20/288 [00:05<01:23, 3.21it/s]
 
534
  7%|▋ | 21/288 [00:06<01:22, 3.24it/s]
 
535
  8%|▊ | 22/288 [00:06<01:20, 3.30it/s]
 
536
  8%|▊ | 23/288 [00:06<01:22, 3.20it/s]
 
537
  8%|▊ | 24/288 [00:07<01:24, 3.12it/s]
 
538
  9%|▊ | 25/288 [00:07<01:23, 3.15it/s]
 
539
  9%|▉ | 26/288 [00:07<01:27, 2.99it/s]
 
540
  9%|▉ | 27/288 [00:08<01:26, 3.01it/s]
 
541
  10%|▉ | 28/288 [00:08<01:23, 3.11it/s]
 
542
  10%|█ | 29/288 [00:08<01:19, 3.26it/s]
 
543
  10%|█ | 30/288 [00:08<01:11, 3.60it/s]
 
544
  11%|█ | 31/288 [00:09<01:10, 3.66it/s]
 
545
  11%|█ | 32/288 [00:09<01:12, 3.52it/s]
 
546
  11%|█▏ | 33/288 [00:09<01:10, 3.62it/s]
 
547
  12%|█▏ | 34/288 [00:10<01:11, 3.55it/s]
 
548
  12%|█▏ | 35/288 [00:10<01:12, 3.48it/s]
 
549
  12%|█▎ | 36/288 [00:10<01:14, 3.40it/s]
 
550
  13%|█▎ | 37/288 [00:10<01:13, 3.40it/s]
 
551
  13%|█▎ | 38/288 [00:11<01:13, 3.41it/s]
 
552
  14%|█▎ | 39/288 [00:11<01:14, 3.34it/s]
 
553
  14%|█▍ | 40/288 [00:11<01:17, 3.19it/s]
 
554
  14%|█▍ | 41/288 [00:12<01:15, 3.26it/s]
 
555
  15%|█▍ | 42/288 [00:12<01:17, 3.16it/s]
 
556
  15%|█▍ | 43/288 [00:12<01:18, 3.11it/s]
 
557
  15%|█▌ | 44/288 [00:13<01:17, 3.15it/s]
 
558
  16%|█▌ | 45/288 [00:13<01:17, 3.13it/s]
 
559
  16%|█▌ | 46/288 [00:13<01:16, 3.16it/s]
 
560
  16%|█▋ | 47/288 [00:14<01:20, 3.01it/s]
 
561
  17%|█▋ | 48/288 [00:14<01:24, 2.85it/s]
 
562
  17%|█▋ | 49/288 [00:14<01:20, 2.96it/s]
 
563
  17%|█▋ | 50/288 [00:15<01:18, 3.05it/s]
 
564
  18%|█▊ | 51/288 [00:15<01:16, 3.08it/s]
 
565
  18%|█▊ | 52/288 [00:15<01:19, 2.95it/s]
 
566
  18%|█▊ | 53/288 [00:16<01:21, 2.88it/s]
 
567
  19%|█▉ | 54/288 [00:16<01:19, 2.95it/s]
 
568
  19%|█▉ | 55/288 [00:16<01:22, 2.84it/s]
 
569
  19%|█▉ | 56/288 [00:17<01:19, 2.91it/s]
 
570
  20%|█▉ | 57/288 [00:17<01:15, 3.08it/s]
 
571
  20%|██ | 58/288 [00:17<01:10, 3.24it/s]
 
572
  20%|██ | 59/288 [00:18<01:07, 3.37it/s]
 
573
  21%|██ | 60/288 [00:18<01:04, 3.56it/s]
 
574
  21%|██ | 61/288 [00:18<01:08, 3.33it/s]
 
575
  22%|██▏ | 62/288 [00:18<01:08, 3.28it/s]
 
576
  22%|██▏ | 63/288 [00:19<01:09, 3.23it/s]
 
577
  22%|██▏ | 64/288 [00:19<01:07, 3.32it/s]
 
578
  23%|██▎ | 65/288 [00:19<01:10, 3.17it/s]
 
579
  23%|██▎ | 66/288 [00:20<01:11, 3.10it/s]
 
580
  23%|██▎ | 67/288 [00:20<01:09, 3.16it/s]
 
581
  24%|██▎ | 68/288 [00:20<01:11, 3.09it/s]
 
582
  24%|██▍ | 69/288 [00:21<01:12, 3.04it/s]
 
583
  24%|██▍ | 70/288 [00:21<01:10, 3.07it/s]
 
584
  25%|██▍ | 71/288 [00:21<01:13, 2.95it/s]
 
585
  25%|██▌ | 72/288 [00:22<01:16, 2.84it/s]
 
586
  25%|██▌ | 73/288 [00:22<01:14, 2.89it/s]
 
587
  26%|██▌ | 74/288 [00:22<01:14, 2.88it/s]
 
588
  26%|██▌ | 75/288 [00:23<01:14, 2.87it/s]
 
589
  26%|██▋ | 76/288 [00:23<01:10, 2.99it/s]
 
590
  27%|██▋ | 77/288 [00:23<01:10, 2.98it/s]
 
591
  27%|██▋ | 78/288 [00:24<01:09, 3.04it/s]
 
592
  27%|██▋ | 79/288 [00:24<01:07, 3.11it/s]
 
593
  28%|██▊ | 80/288 [00:24<01:05, 3.20it/s]
 
594
  28%|██▊ | 81/288 [00:25<01:07, 3.06it/s]
 
595
  28%|██▊ | 82/288 [00:25<01:05, 3.13it/s]
 
596
  29%|██▉ | 83/288 [00:25<01:05, 3.13it/s]
 
597
  29%|██▉ | 84/288 [00:26<01:00, 3.39it/s]
 
598
  30%|██▉ | 85/288 [00:26<00:58, 3.44it/s]
 
599
  30%|██▉ | 86/288 [00:26<01:02, 3.24it/s]
 
600
  30%|███ | 87/288 [00:27<01:07, 2.99it/s]
 
601
  31%|███ | 88/288 [00:27<01:06, 2.99it/s]
 
602
  31%|███ | 89/288 [00:27<01:04, 3.09it/s]
 
603
  31%|███▏ | 90/288 [00:28<01:02, 3.17it/s]
 
604
  32%|███▏ | 91/288 [00:28<01:03, 3.12it/s]
 
605
  32%|███▏ | 92/288 [00:28<01:02, 3.15it/s]
 
606
  32%|███▏ | 93/288 [00:29<01:03, 3.06it/s]
 
607
  33%|███▎ | 94/288 [00:29<01:02, 3.09it/s]
 
608
  33%|███▎ | 95/288 [00:29<01:02, 3.08it/s]
 
609
  33%|███▎ | 96/288 [00:29<00:58, 3.26it/s]
 
610
  34%|███▎ | 97/288 [00:30<00:56, 3.36it/s]
 
611
  34%|███▍ | 98/288 [00:30<00:58, 3.24it/s]
 
612
  34%|███▍ | 99/288 [00:30<01:00, 3.11it/s]
 
613
  35%|███▍ | 100/288 [00:31<01:05, 2.87it/s]
 
614
  35%|███▌ | 101/288 [00:31<01:03, 2.97it/s]
 
615
  35%|███▌ | 102/288 [00:31<00:59, 3.11it/s]
 
616
  36%|███▌ | 103/288 [00:32<00:58, 3.17it/s]
 
617
  36%|███▌ | 104/288 [00:32<00:58, 3.13it/s]
 
618
  36%|███▋ | 105/288 [00:32<00:55, 3.31it/s]
 
619
  37%|███▋ | 106/288 [00:33<00:57, 3.16it/s]
 
620
  37%|███▋ | 107/288 [00:33<00:57, 3.15it/s]
 
621
  38%|███▊ | 108/288 [00:33<00:57, 3.13it/s]
 
622
  38%|███▊ | 109/288 [00:34<00:57, 3.11it/s]
 
623
  38%|███▊ | 110/288 [00:34<00:56, 3.15it/s]
 
624
  39%|███▊ | 111/288 [00:34<01:02, 2.82it/s]
 
625
  39%|███▉ | 112/288 [00:35<01:01, 2.85it/s]
 
626
  39%|███▉ | 113/288 [00:35<00:59, 2.93it/s]
 
627
  40%|███▉ | 114/288 [00:35<00:58, 2.96it/s]
 
628
  40%|███▉ | 115/288 [00:36<00:56, 3.06it/s]
 
629
  40%|████ | 116/288 [00:36<00:56, 3.02it/s]
 
630
  41%|████ | 117/288 [00:36<00:55, 3.06it/s]
 
631
  41%|████ | 118/288 [00:37<00:52, 3.23it/s]
 
632
  41%|████▏ | 119/288 [00:37<00:53, 3.13it/s]
 
633
  42%|████▏ | 120/288 [00:37<01:00, 2.78it/s]
 
634
  42%|████▏ | 121/288 [00:38<00:56, 2.94it/s]
 
635
  42%|████▏ | 122/288 [00:38<00:51, 3.20it/s]
 
636
  43%|████▎ | 123/288 [00:38<00:54, 3.05it/s]
 
637
  43%|████▎ | 124/288 [00:39<00:53, 3.07it/s]
 
638
  43%|████▎ | 125/288 [00:39<00:52, 3.10it/s]
 
639
  44%|████▍ | 126/288 [00:39<00:54, 2.99it/s]
 
640
  44%|████▍ | 127/288 [00:40<00:52, 3.07it/s]
 
641
  44%|████▍ | 128/288 [00:40<00:51, 3.08it/s]
 
642
  45%|████▍ | 129/288 [00:41<01:05, 2.41it/s]
 
643
  45%|████▌ | 130/288 [00:41<01:01, 2.56it/s]
 
644
  45%|████▌ | 131/288 [00:41<00:57, 2.75it/s]
 
645
  46%|████▌ | 132/288 [00:42<00:55, 2.83it/s]
 
646
  46%|████▌ | 133/288 [00:42<00:55, 2.78it/s]
 
647
  47%|████▋ | 134/288 [00:42<00:55, 2.77it/s]
 
648
  47%|████▋ | 135/288 [00:43<00:53, 2.85it/s]
 
649
  47%|████▋ | 136/288 [00:43<00:51, 2.93it/s]
 
650
  48%|████▊ | 137/288 [00:43<01:01, 2.47it/s]
 
651
  48%|████▊ | 138/288 [00:44<00:58, 2.57it/s]
 
652
  48%|████▊ | 139/288 [00:44<00:55, 2.66it/s]
 
653
  49%|████▊ | 140/288 [00:45<00:53, 2.74it/s]
 
654
  49%|████▉ | 141/288 [00:45<00:53, 2.74it/s]
 
655
  49%|████▉ | 142/288 [00:45<00:50, 2.87it/s]
 
656
  50%|████▉ | 143/288 [00:46<00:50, 2.87it/s]
 
657
  50%|█████ | 144/288 [00:46<00:50, 2.85it/s]
 
658
  50%|█████ | 145/288 [00:46<00:49, 2.88it/s]
 
659
  51%|█████ | 146/288 [00:47<00:47, 3.00it/s]
 
660
  51%|█████ | 147/288 [00:47<00:45, 3.10it/s]
 
661
  51%|█████▏ | 148/288 [00:47<00:45, 3.08it/s]
 
662
  52%|█████▏ | 149/288 [00:47<00:44, 3.10it/s]
 
663
  52%|█████▏ | 150/288 [00:48<00:44, 3.13it/s]
 
664
  52%|█████▏ | 151/288 [00:48<00:49, 2.77it/s]
 
665
  53%|█████▎ | 152/288 [00:49<00:48, 2.82it/s]
 
666
  53%|█████▎ | 153/288 [00:49<00:45, 2.99it/s]
 
667
  53%|█████▎ | 154/288 [00:49<00:45, 2.93it/s]
 
668
  54%|█████▍ | 155/288 [00:50<00:44, 2.96it/s]
 
669
  54%|█████▍ | 156/288 [00:50<00:43, 3.03it/s]
 
670
  55%|███���█▍ | 157/288 [00:50<00:43, 3.03it/s]
 
671
  55%|█████▍ | 158/288 [00:51<00:50, 2.58it/s]
 
672
  55%|█████▌ | 159/288 [00:51<00:47, 2.69it/s]
 
673
  56%|█████▌ | 160/288 [00:51<00:44, 2.88it/s]
 
674
  56%|█████▌ | 161/288 [00:52<00:41, 3.02it/s]
 
675
  56%|█████▋ | 162/288 [00:52<00:40, 3.14it/s]
 
676
  57%|█████▋ | 163/288 [00:52<00:40, 3.09it/s]
 
677
  57%|█████▋ | 164/288 [00:53<00:45, 2.75it/s]
 
678
  57%|█████▋ | 165/288 [00:53<00:43, 2.83it/s]
 
679
  58%|█████▊ | 166/288 [00:53<00:42, 2.87it/s]
 
680
  58%|█████▊ | 167/288 [00:54<00:40, 2.99it/s]
 
681
  58%|█████▊ | 168/288 [00:54<00:40, 2.93it/s]
 
682
  59%|█████▊ | 169/288 [00:54<00:40, 2.94it/s]
 
683
  59%|█████▉ | 170/288 [00:55<00:45, 2.57it/s]
 
684
  59%|█████▉ | 171/288 [00:55<00:43, 2.68it/s]
 
685
  60%|█████▉ | 172/288 [00:56<00:41, 2.79it/s]
 
686
  60%|██████ | 173/288 [00:56<00:40, 2.83it/s]
 
687
  60%|██████ | 174/288 [00:56<00:39, 2.89it/s]
 
688
  61%|██████ | 175/288 [00:57<00:37, 3.01it/s]
 
689
  61%|██████ | 176/288 [00:57<00:51, 2.17it/s]
 
690
  61%|██████▏ | 177/288 [00:58<00:47, 2.36it/s]
 
691
  62%|██████▏ | 178/288 [00:58<00:43, 2.50it/s]
 
692
  62%|██████▏ | 179/288 [00:58<00:40, 2.67it/s]
 
693
  62%|██████▎ | 180/288 [00:59<00:39, 2.77it/s]
 
694
  63%|██████▎ | 181/288 [00:59<00:39, 2.69it/s]
 
695
  63%|██████▎ | 182/288 [00:59<00:39, 2.71it/s]
 
696
  64%|██████▎ | 183/288 [01:00<00:37, 2.82it/s]
 
697
  64%|██████▍ | 184/288 [01:00<00:36, 2.86it/s]
 
698
  64%|██████▍ | 185/288 [01:00<00:35, 2.92it/s]
 
699
  65%|██████▍ | 186/288 [01:01<00:38, 2.62it/s]
 
700
  65%|██████▍ | 187/288 [01:01<00:38, 2.63it/s]
 
701
  65%|██████▌ | 188/288 [01:02<00:36, 2.74it/s]
 
702
  66%|██████▌ | 189/288 [01:02<00:34, 2.90it/s]
 
703
  66%|██████▌ | 190/288 [01:02<00:34, 2.81it/s]
 
704
  66%|██████▋ | 191/288 [01:03<00:39, 2.43it/s]
 
705
  67%|██████▋ | 192/288 [01:03<00:36, 2.65it/s]
 
706
  67%|██████▋ | 193/288 [01:03<00:34, 2.76it/s]
 
707
  67%|██████▋ | 194/288 [01:04<00:34, 2.72it/s]
 
708
  68%|██████▊ | 195/288 [01:04<00:33, 2.80it/s]
 
709
  68%|██████▊ | 196/288 [01:05<00:39, 2.31it/s]
 
710
  68%|██████▊ | 197/288 [01:05<00:36, 2.47it/s]
 
711
  69%|██████▉ | 198/288 [01:05<00:33, 2.72it/s]
 
712
  69%|██████▉ | 199/288 [01:06<00:31, 2.82it/s]
 
713
  69%|██████▉ | 200/288 [01:06<00:29, 3.00it/s]
 
714
  70%|██████▉ | 201/288 [01:06<00:35, 2.46it/s]
 
715
  70%|███████ | 202/288 [01:07<00:32, 2.62it/s]
 
716
  70%|███████ | 203/288 [01:07<00:31, 2.73it/s]
 
717
  71%|███████ | 204/288 [01:07<00:29, 2.82it/s]
 
718
  71%|███████ | 205/288 [01:08<00:28, 2.95it/s]
 
719
  72%|███████▏ | 206/288 [01:08<00:33, 2.42it/s]
 
720
  72%|███████▏ | 207/288 [01:09<00:32, 2.50it/s]
 
721
  72%|███████▏ | 208/288 [01:09<00:31, 2.57it/s]
 
722
  73%|███████▎ | 209/288 [01:09<00:30, 2.61it/s]
 
723
  73%|███████▎ | 210/288 [01:10<00:27, 2.87it/s]
 
724
  73%|███████▎ | 211/288 [01:10<00:36, 2.14it/s]
 
725
  74%|███████▎ | 212/288 [01:11<00:32, 2.36it/s]
 
726
  74%|███████▍ | 213/288 [01:11<00:31, 2.38it/s]
 
727
  74%|███████▍ | 214/288 [01:12<00:29, 2.54it/s]
 
728
  75%|███████▍ | 215/288 [01:12<00:31, 2.30it/s]
 
729
  75%|███████▌ | 216/288 [01:12<00:29, 2.45it/s]
 
730
  75%|███████▌ | 217/288 [01:13<00:26, 2.67it/s]
 
731
  76%|███████▌ | 218/288 [01:13<00:25, 2.72it/s]
 
732
  76%|███████▌ | 219/288 [01:14<00:29, 2.32it/s]
 
733
  76%|███████▋ | 220/288 [01:14<00:28, 2.37it/s]
 
734
  77%|███████▋ | 221/288 [01:14<00:26, 2.49it/s]
 
735
  77%|███████▋ | 222/288 [01:15<00:26, 2.54it/s]
 
736
  77%|███████▋ | 223/288 [01:15<00:28, 2.30it/s]
 
737
  78%|███████▊ | 224/288 [01:16<00:26, 2.45it/s]
 
738
  78%|███████▊ | 225/288 [01:16<00:24, 2.61it/s]
 
739
  78%|███████▊ | 226/288 [01:16<00:22, 2.72it/s]
 
740
  79%|███████▉ | 227/288 [01:17<00:27, 2.25it/s]
 
741
  79%|███████▉ | 228/288 [01:17<00:24, 2.40it/s]
 
742
  80%|███████▉ | 229/288 [01:18<00:23, 2.54it/s]
 
743
  80%|███████▉ | 230/288 [01:18<00:21, 2.66it/s]
 
744
  80%|████��███ | 231/288 [01:19<00:25, 2.23it/s]
 
745
  81%|████████ | 232/288 [01:19<00:22, 2.44it/s]
 
746
  81%|████████ | 233/288 [01:19<00:21, 2.53it/s]
 
747
  81%|████████▏ | 234/288 [01:20<00:19, 2.71it/s]
 
748
  82%|████████▏ | 235/288 [01:20<00:25, 2.12it/s]
 
749
  82%|████████▏ | 236/288 [01:21<00:22, 2.28it/s]
 
750
  82%|████████▏ | 237/288 [01:21<00:21, 2.41it/s]
 
751
  83%|████████▎ | 238/288 [01:21<00:20, 2.50it/s]
 
752
  83%|████████▎ | 239/288 [01:22<00:24, 2.00it/s]
 
753
  83%|████████▎ | 240/288 [01:22<00:21, 2.27it/s]
 
754
  84%|████████▎ | 241/288 [01:23<00:19, 2.42it/s]
 
755
  84%|████████▍ | 242/288 [01:23<00:18, 2.54it/s]
 
756
  84%|████████▍ | 243/288 [01:24<00:21, 2.06it/s]
 
757
  85%|████████▍ | 244/288 [01:24<00:19, 2.24it/s]
 
758
  85%|████████▌ | 245/288 [01:25<00:17, 2.39it/s]
 
759
  85%|████████▌ | 246/288 [01:25<00:17, 2.45it/s]
 
760
  86%|████████▌ | 247/288 [01:26<00:20, 2.02it/s]
 
761
  86%|████████▌ | 248/288 [01:26<00:17, 2.25it/s]
 
762
  86%|████████▋ | 249/288 [01:26<00:16, 2.37it/s]
 
763
  87%|████████▋ | 250/288 [01:27<00:15, 2.45it/s]
 
764
  87%|████████▋ | 251/288 [01:27<00:19, 1.93it/s]
 
765
  88%|████████▊ | 252/288 [01:28<00:17, 2.10it/s]
 
766
  88%|████████▊ | 253/288 [01:28<00:14, 2.34it/s]
 
767
  88%|████████▊ | 254/288 [01:29<00:15, 2.19it/s]
 
768
  89%|████████▊ | 255/288 [01:29<00:13, 2.44it/s]
 
769
  89%|████████▉ | 256/288 [01:29<00:12, 2.56it/s]
 
770
  89%|████████▉ | 257/288 [01:30<00:14, 2.19it/s]
 
771
  90%|████████▉ | 258/288 [01:30<00:13, 2.30it/s]
 
772
  90%|████████▉ | 259/288 [01:31<00:12, 2.39it/s]
 
773
  90%|█████████ | 260/288 [01:31<00:13, 2.08it/s]
 
774
  91%|█████████ | 261/288 [01:32<00:11, 2.26it/s]
 
775
  91%|█████████ | 262/288 [01:32<00:10, 2.41it/s]
 
776
  91%|█████████▏| 263/288 [01:33<00:11, 2.10it/s]
 
777
  92%|█████████▏| 264/288 [01:33<00:10, 2.26it/s]
 
778
  92%|█████████▏| 265/288 [01:33<00:09, 2.39it/s]
 
779
  92%|█████████▏| 266/288 [01:34<00:10, 2.08it/s]
 
780
  93%|█████████▎| 267/288 [01:34<00:09, 2.20it/s]
 
781
  93%|█████████▎| 268/288 [01:35<00:08, 2.38it/s]
 
782
  93%|█████████▎| 269/288 [01:35<00:09, 2.00it/s]
 
783
  94%|█████████▍| 270/288 [01:36<00:08, 2.11it/s]
 
784
  94%|█████████▍| 271/288 [01:36<00:07, 2.28it/s]
 
785
  94%|█████████▍| 272/288 [01:37<00:08, 1.85it/s]
 
786
  95%|█████████▍| 273/288 [01:37<00:07, 2.08it/s]
 
787
  95%|█████████▌| 274/288 [01:38<00:06, 2.28it/s]
 
788
  95%|█████████▌| 275/288 [01:38<00:06, 1.89it/s]
 
789
  96%|█████████▌| 276/288 [01:39<00:05, 2.12it/s]
 
790
  96%|█████████▌| 277/288 [01:39<00:04, 2.29it/s]
 
791
  97%|█████████▋| 278/288 [01:40<00:05, 1.78it/s]
 
792
  97%|█████████▋| 279/288 [01:40<00:04, 2.05it/s]
 
793
  97%|█████████▋| 280/288 [01:41<00:03, 2.24it/s]
 
794
  98%|█████████▊| 281/288 [01:41<00:03, 1.83it/s]
 
795
  98%|█████████▊| 282/288 [01:42<00:02, 2.06it/s]
 
796
  98%|█████████▊| 283/288 [01:42<00:02, 2.17it/s]
 
797
  99%|█████████▊| 284/288 [01:43<00:02, 1.80it/s]
 
798
  99%|█████████▉| 285/288 [01:43<00:01, 2.05it/s]
 
799
  99%|█████████▉| 286/288 [01:44<00:00, 2.26it/s]
 
 
800
 
 
801
 
802
  1%|▏ | 500/37300 [15:13<14:56:05, 1.46s/it]
 
 
803
  Saving model checkpoint to ./checkpoint-500
 
 
 
 
 
1
+ 02/06/2022 16:31:57 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True
2
+ 02/06/2022 16:31:57 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
3
+ _n_gpu=1,
4
+ adafactor=False,
5
+ adam_beta1=0.9,
6
+ adam_beta2=0.999,
7
+ adam_epsilon=1e-08,
8
+ bf16=False,
9
+ bf16_full_eval=False,
10
+ dataloader_drop_last=False,
11
+ dataloader_num_workers=0,
12
+ dataloader_pin_memory=True,
13
+ ddp_bucket_cap_mb=None,
14
+ ddp_find_unused_parameters=None,
15
+ debug=[],
16
+ deepspeed=None,
17
+ disable_tqdm=False,
18
+ do_eval=True,
19
+ do_predict=False,
20
+ do_train=True,
21
+ eval_accumulation_steps=None,
22
+ eval_steps=500,
23
+ evaluation_strategy=IntervalStrategy.STEPS,
24
+ fp16=True,
25
+ fp16_backend=auto,
26
+ fp16_full_eval=False,
27
+ fp16_opt_level=O1,
28
+ gradient_accumulation_steps=4,
29
+ gradient_checkpointing=True,
30
+ greater_is_better=None,
31
+ group_by_length=True,
32
+ half_precision_backend=auto,
33
+ hub_model_id=None,
34
+ hub_strategy=HubStrategy.EVERY_SAVE,
35
+ hub_token=<HUB_TOKEN>,
36
+ ignore_data_skip=False,
37
+ label_names=None,
38
+ label_smoothing_factor=0.0,
39
+ learning_rate=0.0001,
40
+ length_column_name=input_length,
41
+ load_best_model_at_end=False,
42
+ local_rank=-1,
43
+ log_level=-1,
44
+ log_level_replica=-1,
45
+ log_on_each_node=True,
46
+ logging_dir=./runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c,
47
+ logging_first_step=False,
48
+ logging_nan_inf_filter=True,
49
+ logging_steps=100,
50
+ logging_strategy=IntervalStrategy.STEPS,
51
+ lr_scheduler_type=SchedulerType.LINEAR,
52
+ max_grad_norm=1.0,
53
+ max_steps=-1,
54
+ metric_for_best_model=None,
55
+ mp_parameters=,
56
+ no_cuda=False,
57
+ num_train_epochs=100.0,
58
+ optim=OptimizerNames.ADAMW_HF,
59
+ output_dir=./,
60
+ overwrite_output_dir=True,
61
+ past_index=-1,
62
+ per_device_eval_batch_size=8,
63
+ per_device_train_batch_size=8,
64
+ prediction_loss_only=False,
65
+ push_to_hub=True,
66
+ push_to_hub_model_id=None,
67
+ push_to_hub_organization=None,
68
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
69
+ remove_unused_columns=True,
70
+ report_to=['tensorboard'],
71
+ resume_from_checkpoint=None,
72
+ run_name=./,
73
+ save_on_each_node=False,
74
+ save_steps=500,
75
+ save_strategy=IntervalStrategy.STEPS,
76
+ save_total_limit=3,
77
+ seed=42,
78
+ sharded_ddp=[],
79
+ skip_memory_metrics=True,
80
+ tf32=None,
81
+ tpu_metrics_debug=False,
82
+ tpu_num_cores=None,
83
+ use_legacy_prediction_loop=False,
84
+ warmup_ratio=0.0,
85
+ warmup_steps=2000,
86
+ weight_decay=0.0,
87
+ xpu_backend=None,
88
+ )
89
+ 02/06/2022 16:31:57 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/common_voice/zh-HK/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd)
90
+ 02/06/2022 16:31:58 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/common_voice/zh-HK/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd)
91
+ 02/06/2022 16:31:58 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/common_voice/zh-HK/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd/cache-4b9c7ee298793a4a.arrow
92
+ 02/06/2022 16:31:58 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/common_voice/zh-HK/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd/cache-229158ba70a553cf.arrow
93
+ loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6
94
+ Model config Wav2Vec2Config {
95
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
96
+ "activation_dropout": 0.0,
97
+ "adapter_kernel_size": 3,
98
+ "adapter_stride": 2,
99
+ "add_adapter": false,
100
+ "apply_spec_augment": true,
101
+ "architectures": [
102
+ "Wav2Vec2ForPreTraining"
103
+ ],
104
+ "attention_dropout": 0.1,
105
+ "bos_token_id": 1,
106
+ "classifier_proj_size": 256,
107
+ "codevector_dim": 768,
108
+ "contrastive_logits_temperature": 0.1,
109
+ "conv_bias": true,
110
+ "conv_dim": [
111
+ 512,
112
+ 512,
113
+ 512,
114
+ 512,
115
+ 512,
116
+ 512,
117
+ 512
118
+ ],
119
+ "conv_kernel": [
120
+ 10,
121
+ 3,
122
+ 3,
123
+ 3,
124
+ 3,
125
+ 2,
126
+ 2
127
+ ],
128
+ "conv_stride": [
129
+ 5,
130
+ 2,
131
+ 2,
132
+ 2,
133
+ 2,
134
+ 2,
135
+ 2
136
+ ],
137
+ "ctc_loss_reduction": "sum",
138
+ "ctc_zero_infinity": false,
139
+ "diversity_loss_weight": 0.1,
140
+ "do_stable_layer_norm": true,
141
+ "eos_token_id": 2,
142
+ "feat_extract_activation": "gelu",
143
+ "feat_extract_dropout": 0.0,
144
+ "feat_extract_norm": "layer",
145
+ "feat_proj_dropout": 0.1,
146
+ "feat_quantizer_dropout": 0.0,
147
+ "final_dropout": 0.0,
148
+ "gradient_checkpointing": false,
149
+ "hidden_act": "gelu",
150
+ "hidden_dropout": 0.1,
151
+ "hidden_size": 1024,
152
+ "initializer_range": 0.02,
153
+ "intermediate_size": 4096,
154
+ "layer_norm_eps": 1e-05,
155
+ "layerdrop": 0.1,
156
+ "mask_feature_length": 10,
157
+ "mask_feature_min_masks": 0,
158
+ "mask_feature_prob": 0.0,
159
+ "mask_time_length": 10,
160
+ "mask_time_min_masks": 2,
161
+ "mask_time_prob": 0.075,
162
+ "model_type": "wav2vec2",
163
+ "num_adapter_layers": 3,
164
+ "num_attention_heads": 16,
165
+ "num_codevector_groups": 2,
166
+ "num_codevectors_per_group": 320,
167
+ "num_conv_pos_embedding_groups": 16,
168
+ "num_conv_pos_embeddings": 128,
169
+ "num_feat_extract_layers": 7,
170
+ "num_hidden_layers": 24,
171
+ "num_negatives": 100,
172
+ "output_hidden_size": 1024,
173
+ "pad_token_id": 0,
174
+ "proj_codevector_dim": 768,
175
+ "tdnn_dilation": [
176
+ 1,
177
+ 2,
178
+ 3,
179
+ 1,
180
+ 1
181
+ ],
182
+ "tdnn_dim": [
183
+ 512,
184
+ 512,
185
+ 512,
186
+ 512,
187
+ 1500
188
+ ],
189
+ "tdnn_kernel": [
190
+ 5,
191
+ 3,
192
+ 3,
193
+ 1,
194
+ 1
195
+ ],
196
+ "torch_dtype": "float32",
197
+ "transformers_version": "4.17.0.dev0",
198
+ "use_weighted_layer_sum": false,
199
+ "vocab_size": 32,
200
+ "xvector_output_dim": 512
201
+ }
202
+
203
+
204
  0%| | 0/1 [00:00<?, ?ba/s]
205
+
206
  0%| | 0/1 [00:00<?, ?ba/s]
207
+ Didn't find file ./tokenizer_config.json. We won't load it.
208
+ Didn't find file ./added_tokens.json. We won't load it.
209
+ Didn't find file ./special_tokens_map.json. We won't load it.
210
+ Didn't find file ./tokenizer.json. We won't load it.
211
+ loading file ./vocab.json
212
+ loading file None
213
+ loading file None
214
+ loading file None
215
+ loading file None
216
+ file ./config.json not found
217
+ Adding <s> to the vocabulary
218
+ Adding </s> to the vocabulary
219
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
220
+ loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6
221
+ Model config Wav2Vec2Config {
222
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
223
+ "activation_dropout": 0.0,
224
+ "adapter_kernel_size": 3,
225
+ "adapter_stride": 2,
226
+ "add_adapter": false,
227
+ "apply_spec_augment": true,
228
+ "architectures": [
229
+ "Wav2Vec2ForPreTraining"
230
+ ],
231
+ "attention_dropout": 0.1,
232
+ "bos_token_id": 1,
233
+ "classifier_proj_size": 256,
234
+ "codevector_dim": 768,
235
+ "contrastive_logits_temperature": 0.1,
236
+ "conv_bias": true,
237
+ "conv_dim": [
238
+ 512,
239
+ 512,
240
+ 512,
241
+ 512,
242
+ 512,
243
+ 512,
244
+ 512
245
+ ],
246
+ "conv_kernel": [
247
+ 10,
248
+ 3,
249
+ 3,
250
+ 3,
251
+ 3,
252
+ 2,
253
+ 2
254
+ ],
255
+ "conv_stride": [
256
+ 5,
257
+ 2,
258
+ 2,
259
+ 2,
260
+ 2,
261
+ 2,
262
+ 2
263
+ ],
264
+ "ctc_loss_reduction": "sum",
265
+ "ctc_zero_infinity": false,
266
+ "diversity_loss_weight": 0.1,
267
+ "do_stable_layer_norm": true,
268
+ "eos_token_id": 2,
269
+ "feat_extract_activation": "gelu",
270
+ "feat_extract_dropout": 0.0,
271
+ "feat_extract_norm": "layer",
272
+ "feat_proj_dropout": 0.1,
273
+ "feat_quantizer_dropout": 0.0,
274
+ "final_dropout": 0.0,
275
+ "gradient_checkpointing": false,
276
+ "hidden_act": "gelu",
277
+ "hidden_dropout": 0.1,
278
+ "hidden_size": 1024,
279
+ "initializer_range": 0.02,
280
+ "intermediate_size": 4096,
281
+ "layer_norm_eps": 1e-05,
282
+ "layerdrop": 0.1,
283
+ "mask_feature_length": 10,
284
+ "mask_feature_min_masks": 0,
285
+ "mask_feature_prob": 0.0,
286
+ "mask_time_length": 10,
287
+ "mask_time_min_masks": 2,
288
+ "mask_time_prob": 0.075,
289
+ "model_type": "wav2vec2",
290
+ "num_adapter_layers": 3,
291
+ "num_attention_heads": 16,
292
+ "num_codevector_groups": 2,
293
+ "num_codevectors_per_group": 320,
294
+ "num_conv_pos_embedding_groups": 16,
295
+ "num_conv_pos_embeddings": 128,
296
+ "num_feat_extract_layers": 7,
297
+ "num_hidden_layers": 24,
298
+ "num_negatives": 100,
299
+ "output_hidden_size": 1024,
300
+ "pad_token_id": 0,
301
+ "proj_codevector_dim": 768,
302
+ "tdnn_dilation": [
303
+ 1,
304
+ 2,
305
+ 3,
306
+ 1,
307
+ 1
308
+ ],
309
+ "tdnn_dim": [
310
+ 512,
311
+ 512,
312
+ 512,
313
+ 512,
314
+ 1500
315
+ ],
316
+ "tdnn_kernel": [
317
+ 5,
318
+ 3,
319
+ 3,
320
+ 1,
321
+ 1
322
+ ],
323
+ "torch_dtype": "float32",
324
+ "transformers_version": "4.17.0.dev0",
325
+ "use_weighted_layer_sum": false,
326
+ "vocab_size": 32,
327
+ "xvector_output_dim": 512
328
+ }
329
+
330
+ loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
331
+ Feature extractor Wav2Vec2FeatureExtractor {
332
+ "do_normalize": true,
333
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
334
+ "feature_size": 1,
335
+ "padding_side": "right",
336
+ "padding_value": 0,
337
+ "return_attention_mask": true,
338
+ "sampling_rate": 16000
339
+ }
340
+
341
+ loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd
342
+ Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['project_hid.bias', 'quantizer.codevectors', 'quantizer.weight_proj.weight', 'project_q.weight', 'project_hid.weight', 'project_q.bias', 'quantizer.weight_proj.bias']
343
+ - This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
344
+ - This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
345
+ Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.weight', 'lm_head.bias']
346
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
347
+ 02/06/2022 16:32:04 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/common_voice/zh-HK/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd/cache-34b3c26b7a1907b4.arrow
348
+
349
+
350
+
351
  0%| | 0/3 [00:00<?, ?ba/s]
352
+ Configuration saved in ./preprocessor_config.json
353
+ tokenizer config file saved in ./tokenizer_config.json
354
+ Special tokens file saved in ./special_tokens_map.json
355
+ added tokens file saved in ./added_tokens.json
356
+ Configuration saved in ./config.json
357
+ loading feature extractor configuration file ./preprocessor_config.json
358
+ loading configuration file ./config.json
359
+ Model config Wav2Vec2Config {
360
+ "_name_or_path": "./",
361
+ "activation_dropout": 0.1,
362
+ "adapter_kernel_size": 3,
363
+ "adapter_stride": 2,
364
+ "add_adapter": false,
365
+ "apply_spec_augment": true,
366
+ "architectures": [
367
+ "Wav2Vec2ForPreTraining"
368
+ ],
369
+ "attention_dropout": 0.0,
370
+ "bos_token_id": 1,
371
+ "classifier_proj_size": 256,
372
+ "codevector_dim": 768,
373
+ "contrastive_logits_temperature": 0.1,
374
+ "conv_bias": true,
375
+ "conv_dim": [
376
+ 512,
377
+ 512,
378
+ 512,
379
+ 512,
380
+ 512,
381
+ 512,
382
+ 512
383
+ ],
384
+ "conv_kernel": [
385
+ 10,
386
+ 3,
387
+ 3,
388
+ 3,
389
+ 3,
390
+ 2,
391
+ 2
392
+ ],
393
+ "conv_stride": [
394
+ 5,
395
+ 2,
396
+ 2,
397
+ 2,
398
+ 2,
399
+ 2,
400
+ 2
401
+ ],
402
+ "ctc_loss_reduction": "mean",
403
+ "ctc_zero_infinity": false,
404
+ "diversity_loss_weight": 0.1,
405
+ "do_stable_layer_norm": true,
406
+ "eos_token_id": 2,
407
+ "feat_extract_activation": "gelu",
408
+ "feat_extract_dropout": 0.0,
409
+ "feat_extract_norm": "layer",
410
+ "feat_proj_dropout": 0.0,
411
+ "feat_quantizer_dropout": 0.0,
412
+ "final_dropout": 0.0,
413
+ "hidden_act": "gelu",
414
+ "hidden_dropout": 0.0,
415
+ "hidden_size": 1024,
416
+ "initializer_range": 0.02,
417
+ "intermediate_size": 4096,
418
+ "layer_norm_eps": 1e-05,
419
+ "layerdrop": 0.0,
420
+ "mask_feature_length": 64,
421
+ "mask_feature_min_masks": 0,
422
+ "mask_feature_prob": 0.25,
423
+ "mask_time_length": 10,
424
+ "mask_time_min_masks": 2,
425
+ "mask_time_prob": 0.75,
426
+ "model_type": "wav2vec2",
427
+ "num_adapter_layers": 3,
428
+ "num_attention_heads": 16,
429
+ "num_codevector_groups": 2,
430
+ "num_codevectors_per_group": 320,
431
+ "num_conv_pos_embedding_groups": 16,
432
+ "num_conv_pos_embeddings": 128,
433
+ "num_feat_extract_layers": 7,
434
+ "num_hidden_layers": 24,
435
+ "num_negatives": 100,
436
+ "output_hidden_size": 1024,
437
+ "pad_token_id": 3652,
438
+ "proj_codevector_dim": 768,
439
+ "tdnn_dilation": [
440
+ 1,
441
+ 2,
442
+ 3,
443
+ 1,
444
+ 1
445
+ ],
446
+ "tdnn_dim": [
447
+ 512,
448
+ 512,
449
+ 512,
450
+ 512,
451
+ 1500
452
+ ],
453
+ "tdnn_kernel": [
454
+ 5,
455
+ 3,
456
+ 3,
457
+ 1,
458
+ 1
459
+ ],
460
+ "torch_dtype": "float32",
461
+ "transformers_version": "4.17.0.dev0",
462
+ "use_weighted_layer_sum": false,
463
+ "vocab_size": 3655,
464
+ "xvector_output_dim": 512
465
+ }
466
+
467
+ loading feature extractor configuration file ./preprocessor_config.json
468
+ Feature extractor Wav2Vec2FeatureExtractor {
469
+ "do_normalize": true,
470
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
471
+ "feature_size": 1,
472
+ "padding_side": "right",
473
+ "padding_value": 0,
474
+ "return_attention_mask": true,
475
+ "sampling_rate": 16000
476
+ }
477
+
478
+ Didn't find file ./tokenizer.json. We won't load it.
479
+ loading file ./vocab.json
480
+ loading file ./tokenizer_config.json
481
+ loading file ./added_tokens.json
482
+ loading file ./special_tokens_map.json
483
+ loading file None
484
+ Adding <s> to the vocabulary
485
+ Adding </s> to the vocabulary
486
+ /workspace/wav2vec2-xls-r-300m-zh-HK-v2/./ is already a clone of https://huggingface.co/w11wo/wav2vec2-xls-r-300m-zh-HK-v2. Make sure you pull the latest changes with `repo.git_pull()`.
487
+ 02/06/2022 16:32:48 - WARNING - huggingface_hub.repository - /workspace/wav2vec2-xls-r-300m-zh-HK-v2/./ is already a clone of https://huggingface.co/w11wo/wav2vec2-xls-r-300m-zh-HK-v2. Make sure you pull the latest changes with `repo.git_pull()`.
488
+ Using amp half precision backend
489
+ The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
490
+ /opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
491
+ warnings.warn(
492
+ ***** Running training *****
493
+ Num examples = 11949
494
+ Num Epochs = 100
495
+ Instantaneous batch size per device = 8
496
+ Total train batch size (w. parallel, distributed & accumulation) = 32
497
+ Gradient Accumulation steps = 4
498
+ Total optimization steps = 37300
499
+
500
  0%| | 0/37300 [00:00<?, ?it/s]
501
  0%| | 1/37300 [00:02<27:52:51, 2.69s/it]
502
  0%| | 2/37300 [00:05<27:44:40, 2.68s/it]
503
  0%| | 3/37300 [00:07<26:20:20, 2.54s/it]
504
  0%| | 4/37300 [00:10<25:14:13, 2.44s/it]
505
  0%| | 5/37300 [00:12<24:34:00, 2.37s/it]
506
  0%| | 6/37300 [00:14<23:51:17, 2.30s/it]
507
  0%| | 7/37300 [00:16<23:12:30, 2.24s/it]
508
  0%| | 8/37300 [00:18<22:34:15, 2.18s/it]
509
  0%| | 9/37300 [00:20<21:43:40, 2.10s/it]
510
  0%| | 10/37300 [00:22<21:09:24, 2.04s/it]
511
  0%| | 11/37300 [00:24<20:23:14, 1.97s/it]
512
  0%| | 12/37300 [00:26<19:48:38, 1.91s/it]
513
  0%| | 13/37300 [00:27<19:19:05, 1.87s/it]
514
  0%| | 14/37300 [00:29<18:58:02, 1.83s/it]
515
  0%| | 15/37300 [00:31<18:35:09, 1.79s/it]
516
  0%| | 16/37300 [00:32<18:20:58, 1.77s/it]
517
  0%| | 17/37300 [00:34<18:01:00, 1.74s/it]
518
  0%| | 18/37300 [00:36<17:45:52, 1.72s/it]
519
  0%| | 19/37300 [00:37<17:25:44, 1.68s/it]
520
  0%| | 20/37300 [00:39<17:09:19, 1.66s/it]
521
  0%| | 21/37300 [00:41<16:53:34, 1.63s/it]
522
  0%| | 22/37300 [00:42<16:33:49, 1.60s/it]
523
  0%| | 23/37300 [00:44<16:24:45, 1.59s/it]
524
  0%| | 24/37300 [00:45<16:23:26, 1.58s/it]
525
  0%| | 25/37300 [00:47<16:09:47, 1.56s/it]
526
  0%| | 26/37300 [00:48<15:54:32, 1.54s/it]
527
  0%| | 27/37300 [00:50<15:33:01, 1.50s/it]
528
  0%| | 28/37300 [00:51<15:34:21, 1.50s/it]
529
  0%| | 29/37300 [00:53<15:14:30, 1.47s/it]
530
  0%| | 30/37300 [00:54<14:53:39, 1.44s/it]
531
  0%| | 31/37300 [00:55<14:28:05, 1.40s/it]
532
  0%| | 32/37300 [00:56<14:10:28, 1.37s/it]
533
  0%| | 33/37300 [00:58<13:55:51, 1.35s/it]
534
  0%| | 34/37300 [00:59<13:37:37, 1.32s/it]
535
  0%| | 35/37300 [01:00<13:25:49, 1.30s/it]
536
  0%| | 36/37300 [01:01<13:05:22, 1.26s/it]
537
  0%| | 37/37300 [01:03<12:58:00, 1.25s/it]
538
  0%| | 38/37300 [01:04<12:54:34, 1.25s/it]
539
  0%| | 39/37300 [01:05<12:45:42, 1.23s/it]
540
  0%| | 40/37300 [01:06<12:35:43, 1.22s/it]
541
  0%| | 41/37300 [01:07<12:29:24, 1.21s/it]
542
  0%| | 42/37300 [01:09<12:24:22, 1.20s/it]
543
  0%| | 43/37300 [01:10<12:09:58, 1.18s/it]
544
  0%| | 44/37300 [01:11<11:54:30, 1.15s/it]
545
  0%| | 45/37300 [01:12<11:37:04, 1.12s/it]
546
  0%| | 46/37300 [01:13<11:22:02, 1.10s/it]
547
  0%| | 47/37300 [01:14<11:10:26, 1.08s/it]
548
  0%| | 48/37300 [01:15<10:59:46, 1.06s/it]
549
  0%| | 49/37300 [01:16<10:45:22, 1.04s/it]
550
  0%| | 50/37300 [01:17<10:30:56, 1.02s/it]
551
  0%| | 51/37300 [01:20<15:17:02, 1.48s/it]
552
  0%| | 52/37300 [01:22<18:47:59, 1.82s/it]
553
  0%| | 53/37300 [01:25<20:33:46, 1.99s/it]
554
  0%| | 54/37300 [01:27<21:31:50, 2.08s/it]
555
  0%| | 55/37300 [01:29<21:58:23, 2.12s/it]
556
  0%| | 56/37300 [01:31<22:08:50, 2.14s/it]
557
  0%| | 57/37300 [01:33<22:03:41, 2.13s/it]
558
  0%| | 58/37300 [01:35<21:56:25, 2.12s/it]
559
  0%| | 59/37300 [01:38<21:45:05, 2.10s/it]
560
  0%| | 60/37300 [01:39<21:12:29, 2.05s/it]
561
  0%| | 61/37300 [01:41<20:39:19, 2.00s/it]
562
  0%| | 62/37300 [01:43<20:08:46, 1.95s/it]
563
  0%| | 63/37300 [01:45<19:34:13, 1.89s/it]
564
  0%| | 64/37300 [01:47<19:05:14, 1.85s/it]
565
  0%| | 65/37300 [01:48<18:38:25, 1.80s/it]
566
  0%| | 66/37300 [01:50<18:18:23, 1.77s/it]
567
  0%| | 67/37300 [01:52<17:59:04, 1.74s/it]
568
  0%| | 68/37300 [01:53<17:39:44, 1.71s/it]
569
  0%| | 69/37300 [01:55<17:23:14, 1.68s/it]
570
  0%| | 70/37300 [01:57<17:14:44, 1.67s/it]
571
  0%| | 71/37300 [01:58<16:47:48, 1.62s/it]
572
  0%| | 72/37300 [02:00<16:31:57, 1.60s/it]
573
  0%| | 73/37300 [02:01<16:13:28, 1.57s/it]
574
  0%| | 74/37300 [02:03<16:03:25, 1.55s/it]
575
  0%| | 75/37300 [02:04<15:45:47, 1.52s/it]
576
  0%| | 76/37300 [02:06<15:34:44, 1.51s/it]
577
  0%| | 77/37300 [02:07<15:33:55, 1.51s/it]
578
  0%| | 78/37300 [02:09<15:20:42, 1.48s/it]
579
  0%| | 79/37300 [02:10<15:07:12, 1.46s/it]
580
  0%| | 80/37300 [02:11<14:56:02, 1.44s/it]
581
  0%| | 81/37300 [02:13<14:43:08, 1.42s/it]
582
  0%| | 82/37300 [02:14<14:20:35, 1.39s/it]
583
  0%| | 83/37300 [02:15<13:58:56, 1.35s/it]
584
  0%| | 84/37300 [02:17<13:46:27, 1.33s/it]
585
  0%| | 85/37300 [02:18<13:32:48, 1.31s/it]
586
  0%| | 86/37300 [02:19<13:15:56, 1.28s/it]
587
  0%| | 87/37300 [02:20<13:06:35, 1.27s/it]
588
  0%| | 88/37300 [02:22<13:08:02, 1.27s/it]
589
  0%| | 89/37300 [02:23<13:17:08, 1.29s/it]
590
  0%| | 90/37300 [02:24<13:10:15, 1.27s/it]
591
  0%| | 91/37300 [02:25<12:51:09, 1.24s/it]
592
  0%| | 92/37300 [02:26<12:38:10, 1.22s/it]
593
  0%| | 93/37300 [02:28<12:31:19, 1.21s/it]
594
  0%| | 94/37300 [02:29<12:20:50, 1.19s/it]
595
  0%| | 95/37300 [02:30<12:04:08, 1.17s/it]
596
  0%| | 96/37300 [02:31<11:58:35, 1.16s/it]
597
  0%| | 97/37300 [02:32<11:51:50, 1.15s/it]
598
  0%| | 98/37300 [02:33<11:57:28, 1.16s/it]
599
  0%| | 99/37300 [02:34<11:27:51, 1.11s/it]
600
  0%| | 100/37300 [02:35<10:59:19, 1.06s/it]
601
 
602
  0%| | 100/37300 [02:35<10:59:19, 1.06s/it]
603
  0%| | 101/37300 [02:38<15:45:32, 1.53s/it]
604
  0%| | 102/37300 [02:40<18:45:01, 1.81s/it]
605
  0%| | 103/37300 [02:43<20:54:43, 2.02s/it]
606
  0%| | 104/37300 [02:45<21:57:30, 2.13s/it]
607
  0%| | 105/37300 [02:48<22:31:20, 2.18s/it]
608
  0%| | 106/37300 [02:50<22:43:03, 2.20s/it]
609
  0%| | 107/37300 [02:52<22:35:22, 2.19s/it]
610
  0%| | 108/37300 [02:54<22:28:01, 2.17s/it]
611
  0%| | 109/37300 [02:56<22:07:51, 2.14s/it]
612
  0%| | 110/37300 [02:58<21:38:46, 2.10s/it]
613
  0%| | 111/37300 [03:00<20:54:47, 2.02s/it]
614
  0%| | 112/37300 [03:02<20:20:21, 1.97s/it]
615
  0%| | 113/37300 [03:04<19:49:19, 1.92s/it]
616
  0%| | 114/37300 [03:05<19:12:13, 1.86s/it]
617
  0%| | 115/37300 [03:07<18:44:03, 1.81s/it]
618
  0%| | 116/37300 [03:09<18:22:26, 1.78s/it]
619
  0%| | 117/37300 [03:11<18:04:39, 1.75s/it]
620
  0%| | 118/37300 [03:12<17:47:09, 1.72s/it]
621
  0%| | 119/37300 [03:14<17:27:27, 1.69s/it]
622
  0%| | 120/37300 [03:15<17:20:24, 1.68s/it]
623
  0%| | 121/37300 [03:17<16:59:01, 1.64s/it]
624
  0%| | 122/37300 [03:19<16:42:22, 1.62s/it]
625
  0%| | 123/37300 [03:20<16:21:49, 1.58s/it]
626
  0%| | 124/37300 [03:22<16:05:35, 1.56s/it]
627
  0%| | 125/37300 [03:23<15:48:51, 1.53s/it]
628
  0%| | 126/37300 [03:24<15:35:04, 1.51s/it]
629
  0%| | 127/37300 [03:26<15:18:16, 1.48s/it]
630
  0%| | 128/37300 [03:27<15:03:16, 1.46s/it]
631
  0%| | 129/37300 [03:29<14:59:34, 1.45s/it]
632
  0%| | 130/37300 [03:30<14:46:24, 1.43s/it]
633
  0%| | 131/37300 [03:31<14:31:11, 1.41s/it]
634
  0%| | 132/37300 [03:33<14:15:12, 1.38s/it]
635
  0%| | 133/37300 [03:34<13:58:47, 1.35s/it]
636
  0%| | 134/37300 [03:35<13:42:15, 1.33s/it]
637
  0%| | 135/37300 [03:37<13:30:43, 1.31s/it]
638
  0%| | 136/37300 [03:38<13:19:58, 1.29s/it]
639
  0%| | 137/37300 [03:39<13:06:24, 1.27s/it]
640
  0%| | 138/37300 [03:40<12:58:20, 1.26s/it]
641
  0%| | 139/37300 [03:42<12:48:51, 1.24s/it]
642
  0%| | 140/37300 [03:43<12:39:14, 1.23s/it]
643
  0%| | 141/37300 [03:44<12:31:25, 1.21s/it]
644
  0%| | 142/37300 [03:45<12:21:48, 1.20s/it]
645
  0%| | 143/37300 [03:46<12:11:52, 1.18s/it]
646
  0%| | 144/37300 [03:47<12:02:04, 1.17s/it]
647
  0%| | 145/37300 [03:48<11:45:59, 1.14s/it]
648
  0%| | 146/37300 [03:49<11:28:24, 1.11s/it]
649
  0%| | 147/37300 [03:50<11:14:51, 1.09s/it]
650
  0%| | 148/37300 [03:52<11:00:52, 1.07s/it]
651
  0%| | 149/37300 [03:53<10:48:50, 1.05s/it]
652
  0%| | 150/37300 [03:54<10:38:20, 1.03s/it]
653
  0%| | 151/37300 [03:56<15:47:29, 1.53s/it]
654
  0%| | 152/37300 [03:59<18:55:25, 1.83s/it]
655
  0%| | 153/37300 [04:01<20:26:23, 1.98s/it]
656
  0%| | 154/37300 [04:03<21:20:13, 2.07s/it]
657
  0%| | 155/37300 [04:06<21:41:01, 2.10s/it]
658
  0%| | 156/37300 [04:08<21:49:57, 2.12s/it]
659
  0%| | 157/37300 [04:10<21:52:03, 2.12s/it]
660
  0%| | 158/37300 [04:12<21:41:55, 2.10s/it]
661
  0%| | 159/37300 [04:14<21:18:45, 2.07s/it]
662
  0%| | 160/37300 [04:16<20:51:07, 2.02s/it]
663
  0%| | 161/37300 [04:18<20:24:13, 1.98s/it]
664
  0%| | 162/37300 [04:19<20:01:35, 1.94s/it]
665
  0%| | 163/37300 [04:21<19:33:34, 1.90s/it]
666
  0%| | 164/37300 [04:23<19:05:20, 1.85s/it]
667
  0%| | 165/37300 [04:25<18:42:30, 1.81s/it]
668
  0%| | 166/37300 [04:26<18:20:56, 1.78s/it]
669
  0%| | 167/37300 [04:28<17:59:54, 1.74s/it]
670
  0%| | 168/37300 [04:30<17:40:57, 1.71s/it]
671
  0%| | 169/37300 [04:31<17:23:10, 1.69s/it]
672
  0%| | 170/37300 [04:33<17:06:21, 1.66s/it]
673
  0%| | 171/37300 [04:35<16:55:37, 1.64s/it]
674
  0%| | 172/37300 [04:36<16:37:58, 1.61s/it]
675
  0%| | 173/37300 [04:38<16:18:04, 1.58s/it]
676
  0%| | 174/37300 [04:39<16:04:23, 1.56s/it]
677
  0%| | 175/37300 [04:41<15:48:34, 1.53s/it]
678
  0%| | 176/37300 [04:42<15:34:02, 1.51s/it]
679
  0%| | 177/37300 [04:43<15:21:04, 1.49s/it]
680
  0%| | 178/37300 [04:45<15:07:04, 1.47s/it]
681
  0%| | 179/37300 [04:46<14:52:21, 1.44s/it]
682
  0%| | 180/37300 [04:48<14:37:16, 1.42s/it]
683
  0%| | 181/37300 [04:49<14:17:20, 1.39s/it]
684
  0%| | 182/37300 [04:50<14:02:50, 1.36s/it]
685
  0%| | 183/37300 [04:52<13:46:55, 1.34s/it]
686
  0%| | 184/37300 [04:53<13:31:47, 1.31s/it]
687
  0%| | 185/37300 [04:54<13:19:05, 1.29s/it]
688
  0%| | 186/37300 [04:55<13:04:14, 1.27s/it]
689
  1%| | 187/37300 [04:56<12:56:13, 1.25s/it]
690
  1%| | 188/37300 [04:58<12:50:37, 1.25s/it]
691
  1%| | 189/37300 [04:59<12:44:06, 1.24s/it]
692
  1%| | 190/37300 [05:00<12:32:33, 1.22s/it]
693
  1%| | 191/37300 [05:01<12:26:03, 1.21s/it]
694
  1%| | 192/37300 [05:02<12:23:02, 1.20s/it]
695
  1%| | 193/37300 [05:04<12:13:22, 1.19s/it]
696
  1%| | 194/37300 [05:05<11:58:56, 1.16s/it]
697
  1%| | 195/37300 [05:06<11:38:50, 1.13s/it]
698
  1%| | 196/37300 [05:07<11:23:43, 1.11s/it]
699
  1%| | 197/37300 [05:08<11:11:30, 1.09s/it]
700
  1%| | 198/37300 [05:09<10:57:12, 1.06s/it]
701
  1%| | 199/37300 [05:10<10:44:39, 1.04s/it]
702
  1%| | 200/37300 [05:11<10:33:35, 1.02s/it]
703
 
704
  1%| | 200/37300 [05:11<10:33:35, 1.02s/it]
705
  1%| | 201/37300 [05:13<15:31:47, 1.51s/it]
706
  1%| | 202/37300 [05:16<18:41:01, 1.81s/it]
707
  1%| | 203/37300 [05:18<20:25:35, 1.98s/it]
708
  1%| | 204/37300 [05:21<21:20:46, 2.07s/it]
709
  1%| | 205/37300 [05:23<21:47:59, 2.12s/it]
710
  1%| | 206/37300 [05:25<21:57:05, 2.13s/it]
711
  1%| | 207/37300 [05:27<21:46:58, 2.11s/it]
712
  1%| | 208/37300 [05:29<21:27:39, 2.08s/it]
713
  1%| | 209/37300 [05:31<21:09:13, 2.05s/it]
714
  1%| | 210/37300 [05:33<20:38:21, 2.00s/it]
715
  1%| | 211/37300 [05:35<20:06:57, 1.95s/it]
716
  1%| | 212/37300 [05:37<19:39:36, 1.91s/it]
717
  1%| | 213/37300 [05:38<19:13:12, 1.87s/it]
718
  1%| | 214/37300 [05:40<18:48:22, 1.83s/it]
719
  1%| | 215/37300 [05:42<18:28:07, 1.79s/it]
720
  1%| | 216/37300 [05:44<18:11:30, 1.77s/it]
721
  1%| | 217/37300 [05:45<17:56:19, 1.74s/it]
722
  1%| | 218/37300 [05:47<17:38:08, 1.71s/it]
723
  1%| | 219/37300 [05:49<17:21:49, 1.69s/it]
724
  1%| | 220/37300 [05:50<17:03:17, 1.66s/it]
725
  1%| | 221/37300 [05:52<16:48:03, 1.63s/it]
726
  1%| | 222/37300 [05:53<16:31:37, 1.60s/it]
727
  1%| | 223/37300 [05:55<16:13:34, 1.58s/it]
728
  1%| | 224/37300 [05:56<15:51:53, 1.54s/it]
729
  1%| | 225/37300 [05:58<15:38:33, 1.52s/it]
730
  1%| | 226/37300 [05:59<15:27:25, 1.50s/it]
731
  1%| | 227/37300 [06:01<15:12:26, 1.48s/it]
732
  1%| | 228/37300 [06:02<14:57:47, 1.45s/it]
733
  1%| | 229/37300 [06:03<14:52:51, 1.45s/it]
734
  1%| | 230/37300 [06:05<14:39:29, 1.42s/it]
735
  1%| | 231/37300 [06:06<14:21:08, 1.39s/it]
736
  1%| | 232/37300 [06:07<14:05:47, 1.37s/it]
737
  1%| | 233/37300 [06:09<13:51:04, 1.35s/it]
738
  1%| | 234/37300 [06:10<13:37:55, 1.32s/it]
739
  1%| | 235/37300 [06:11<13:24:52, 1.30s/it]
740
  1%| | 236/37300 [06:12<13:16:21, 1.29s/it]
741
  1%| | 237/37300 [06:14<13:04:33, 1.27s/it]
742
  1%| | 238/37300 [06:15<12:51:23, 1.25s/it]
743
  1%| | 239/37300 [06:16<12:39:44, 1.23s/it]
744
  1%| | 240/37300 [06:17<12:28:34, 1.21s/it]
745
  1%| | 241/37300 [06:18<12:18:29, 1.20s/it]
746
  1%| | 242/37300 [06:20<12:10:01, 1.18s/it]
747
  1%| | 243/37300 [06:21<11:57:15, 1.16s/it]
748
  1%| | 244/37300 [06:22<11:42:43, 1.14s/it]
749
  1%| | 245/37300 [06:23<11:29:25, 1.12s/it]
750
  1%| | 246/37300 [06:24<11:16:00, 1.09s/it]
751
  1%| | 247/37300 [06:25<11:04:52, 1.08s/it]
752
  1%| | 248/37300 [06:26<10:52:38, 1.06s/it]
753
  1%| | 249/37300 [06:27<10:42:50, 1.04s/it]
754
  1%| | 250/37300 [06:28<10:29:23, 1.02s/it]
755
  1%| | 251/37300 [06:31<15:29:58, 1.51s/it]
756
  1%| | 252/37300 [06:33<18:28:44, 1.80s/it]
757
  1%| | 253/37300 [06:35<20:06:55, 1.95s/it]
758
  1%| | 254/37300 [06:38<21:04:00, 2.05s/it]
759
  1%| | 255/37300 [06:40<21:34:26, 2.10s/it]
760
  1%| | 256/37300 [06:42<21:45:33, 2.11s/it]
761
  1%| | 257/37300 [06:44<21:38:14, 2.10s/it]
762
  1%| | 258/37300 [06:46<21:19:51, 2.07s/it]
763
  1%| | 259/37300 [06:48<20:58:49, 2.04s/it]
764
  1%| | 260/37300 [06:50<20:38:07, 2.01s/it]
765
  1%| | 261/37300 [06:52<20:03:34, 1.95s/it]
766
  1%| | 262/37300 [06:54<19:34:28, 1.90s/it]
767
  1%| | 263/37300 [06:55<19:09:15, 1.86s/it]
768
  1%| | 264/37300 [06:57<18:43:50, 1.82s/it]
769
  1%| | 265/37300 [06:59<18:23:06, 1.79s/it]
770
  1%| | 266/37300 [07:00<18:02:27, 1.75s/it]
771
  1%| | 267/37300 [07:02<17:41:46, 1.72s/it]
772
  1%| | 268/37300 [07:04<17:21:27, 1.69s/it]
773
  1%| | 269/37300 [07:05<17:02:20, 1.66s/it]
774
  1%| | 270/37300 [07:07<16:41:34, 1.62s/it]
775
  1%| | 271/37300 [07:08<16:24:24, 1.60s/it]
776
  1%| | 272/37300 [07:10<16:10:44, 1.57s/it]
777
  1%| | 273/37300 [07:11<15:58:13, 1.55s/it]
778
  1%| | 274/37300 [07:13<15:48:52, 1.54s/it]
779
  1%| | 275/37300 [07:14<15:25:32, 1.50s/it]
780
  1%| | 276/37300 [07:16<15:19:58, 1.49s/it]
781
  1%| | 277/37300 [07:17<15:09:44, 1.47s/it]
782
  1%| | 278/37300 [07:19<14:54:52, 1.45s/it]
783
  1%| | 279/37300 [07:20<14:38:45, 1.42s/it]
784
  1%| | 280/37300 [07:21<14:25:20, 1.40s/it]
785
  1%| | 281/37300 [07:23<14:02:50, 1.37s/it]
786
  1%| | 282/37300 [07:24<13:51:04, 1.35s/it]
787
  1%| | 283/37300 [07:25<13:40:56, 1.33s/it]
788
  1%| | 284/37300 [07:26<13:29:15, 1.31s/it]
789
  1%| | 285/37300 [07:28<13:18:39, 1.29s/it]
790
  1%| | 286/37300 [07:29<13:08:10, 1.28s/it]
791
  1%| | 287/37300 [07:30<12:59:39, 1.26s/it]
792
  1%| | 288/37300 [07:31<12:52:29, 1.25s/it]
793
  1%| | 289/37300 [07:33<12:45:20, 1.24s/it]
794
  1%| | 290/37300 [07:34<12:36:16, 1.23s/it]
795
  1%| | 291/37300 [07:35<12:24:28, 1.21s/it]
796
  1%| | 292/37300 [07:36<12:12:44, 1.19s/it]
797
  1%| | 293/37300 [07:37<11:58:28, 1.16s/it]
798
  1%| | 294/37300 [07:38<11:40:55, 1.14s/it]
799
  1%| | 295/37300 [07:39<11:26:52, 1.11s/it]
800
  1%| | 296/37300 [07:40<11:13:02, 1.09s/it]
801
  1%| | 297/37300 [07:41<11:04:15, 1.08s/it]
802
  1%| | 298/37300 [07:42<11:03:23, 1.08s/it]
803
  1%| | 299/37300 [07:44<11:00:40, 1.07s/it]
804
  1%| | 300/37300 [07:45<10:55:45, 1.06s/it]
805
 
806
  1%| | 300/37300 [07:45<10:55:45, 1.06s/it]
807
  1%| | 301/37300 [07:47<16:10:58, 1.57s/it]
808
  1%| | 302/37300 [07:50<19:12:13, 1.87s/it]
809
  1%| | 303/37300 [07:52<20:47:24, 2.02s/it]
810
  1%| | 304/37300 [07:55<21:33:25, 2.10s/it]
811
  1%| | 305/37300 [07:57<21:50:10, 2.12s/it]
812
  1%| | 306/37300 [07:59<21:47:30, 2.12s/it]
813
  1%| | 307/37300 [08:01<21:32:10, 2.10s/it]
814
  1%| | 308/37300 [08:03<21:16:45, 2.07s/it]
815
  1%| | 309/37300 [08:05<20:45:55, 2.02s/it]
816
  1%| | 310/37300 [08:07<20:20:14, 1.98s/it]
817
  1%| | 311/37300 [08:09<19:54:30, 1.94s/it]
818
  1%| | 312/37300 [08:10<19:26:14, 1.89s/it]
819
  1%| | 313/37300 [08:12<19:03:03, 1.85s/it]
820
  1%| | 314/37300 [08:14<18:36:53, 1.81s/it]
821
  1%| | 315/37300 [08:15<18:18:38, 1.78s/it]
822
  1%| | 316/37300 [08:17<17:59:06, 1.75s/it]
823
  1%| | 317/37300 [08:19<17:42:03, 1.72s/it]
824
  1%| | 318/37300 [08:20<17:25:54, 1.70s/it]
825
  1%| | 319/37300 [08:22<17:09:21, 1.67s/it]
826
  1%| | 320/37300 [08:24<16:58:56, 1.65s/it]
827
  1%| | 321/37300 [08:25<16:45:02, 1.63s/it]
828
  1%| | 322/37300 [08:27<16:20:16, 1.59s/it]
829
  1%| | 323/37300 [08:28<15:56:38, 1.55s/it]
830
  1%| | 324/37300 [08:30<15:37:13, 1.52s/it]
831
  1%| | 325/37300 [08:31<15:21:09, 1.49s/it]
832
  1%| | 326/37300 [08:33<15:06:23, 1.47s/it]
833
  1%| | 327/37300 [08:34<14:57:39, 1.46s/it]
834
  1%| | 328/37300 [08:35<14:49:20, 1.44s/it]
835
  1%| | 329/37300 [08:37<14:39:49, 1.43s/it]
836
  1%| | 330/37300 [08:38<14:27:18, 1.41s/it]
837
  1%| | 331/37300 [08:39<13:59:32, 1.36s/it]
838
  1%| | 332/37300 [08:41<13:33:00, 1.32s/it]
839
  1%| | 333/37300 [08:42<13:10:45, 1.28s/it]
840
  1%| | 334/37300 [08:43<12:51:01, 1.25s/it]
841
  1%| | 335/37300 [08:44<12:44:14, 1.24s/it]
842
  1%| | 336/37300 [08:45<12:40:36, 1.23s/it]
843
  1%| | 337/37300 [08:47<12:35:46, 1.23s/it]
844
  1%| | 338/37300 [08:48<12:31:18, 1.22s/it]
845
  1%| | 339/37300 [08:49<12:28:38, 1.22s/it]
846
  1%| | 340/37300 [08:50<12:20:34, 1.20s/it]
847
  1%| | 341/37300 [08:51<12:14:34, 1.19s/it]
848
  1%| | 342/37300 [08:53<12:11:17, 1.19s/it]
849
  1%| | 343/37300 [08:54<12:02:42, 1.17s/it]
850
  1%| | 344/37300 [08:55<11:49:23, 1.15s/it]
851
  1%| | 345/37300 [08:56<11:34:14, 1.13s/it]
852
  1%| | 346/37300 [08:57<11:24:38, 1.11s/it]
853
  1%| | 347/37300 [08:58<11:17:38, 1.10s/it]
854
  1%| | 348/37300 [08:59<11:07:13, 1.08s/it]
855
  1%| | 349/37300 [09:00<11:01:01, 1.07s/it]
856
  1%| | 350/37300 [09:01<10:51:43, 1.06s/it]
857
  1%| | 351/37300 [09:04<15:53:24, 1.55s/it]
858
  1%| | 352/37300 [09:06<18:25:47, 1.80s/it]
859
  1%| | 353/37300 [09:08<19:34:30, 1.91s/it]
860
  1%| | 354/37300 [09:10<19:51:19, 1.93s/it]
861
  1%| | 355/37300 [09:12<19:55:51, 1.94s/it]
862
  1%| | 356/37300 [09:14<19:22:22, 1.89s/it]
863
  1%| | 357/37300 [09:16<18:51:28, 1.84s/it]
864
  1%| | 358/37300 [09:17<18:19:14, 1.79s/it]
865
  1%| | 359/37300 [09:19<17:44:58, 1.73s/it]
866
  1%| | 360/37300 [09:21<17:08:49, 1.67s/it]
867
  1%| | 361/37300 [09:22<16:34:13, 1.61s/it]
868
  1%| | 362/37300 [09:24<16:03:16, 1.56s/it]
869
  1%| | 363/37300 [09:25<15:38:10, 1.52s/it]
870
  1%| | 364/37300 [09:26<15:09:52, 1.48s/it]
871
  1%| | 365/37300 [09:28<14:35:35, 1.42s/it]
872
  1%| | 366/37300 [09:29<14:03:17, 1.37s/it]
873
  1%| | 367/37300 [09:30<13:37:19, 1.33s/it]
874
  1%| | 368/37300 [09:31<13:13:03, 1.29s/it]
875
  1%| | 369/37300 [09:32<12:51:08, 1.25s/it]
876
  1%| | 370/37300 [09:34<12:38:03, 1.23s/it]
877
  1%| | 371/37300 [09:35<12:28:26, 1.22s/it]
878
  1%| | 372/37300 [09:36<12:14:34, 1.19s/it]
879
  1%| | 373/37300 [09:37<12:02:00, 1.17s/it]
880
  1%| | 374/37300 [09:40<18:23:02, 1.79s/it]
881
  1%| | 375/37300 [09:43<21:07:36, 2.06s/it]
882
  1%| | 376/37300 [09:46<22:35:22, 2.20s/it]
883
  1%| | 377/37300 [09:48<23:14:10, 2.27s/it]
884
  1%| | 378/37300 [09:50<23:30:37, 2.29s/it]
885
  1%| | 379/37300 [09:53<23:30:25, 2.29s/it]
886
  1%| | 380/37300 [09:55<23:05:48, 2.25s/it]
887
  1%| | 381/37300 [09:57<22:25:43, 2.19s/it]
888
  1%| | 382/37300 [09:59<21:33:51, 2.10s/it]
889
  1%| | 383/37300 [10:01<20:54:02, 2.04s/it]
890
  1%| | 384/37300 [10:02<20:13:07, 1.97s/it]
891
  1%| | 385/37300 [10:04<19:38:56, 1.92s/it]
892
  1%| | 386/37300 [10:06<19:08:45, 1.87s/it]
893
  1%| | 387/37300 [10:08<18:41:06, 1.82s/it]
894
  1%| | 388/37300 [10:09<18:14:14, 1.78s/it]
895
  1%| | 389/37300 [10:11<17:53:33, 1.75s/it]
896
  1%| | 390/37300 [10:13<17:33:59, 1.71s/it]
897
  1%| | 391/37300 [10:14<17:23:13, 1.70s/it]
898
  1%| | 392/37300 [10:16<17:04:20, 1.67s/it]
899
  1%| | 393/37300 [10:17<16:42:16, 1.63s/it]
900
  1%| | 394/37300 [10:19<16:25:13, 1.60s/it]
901
  1%| | 395/37300 [10:21<16:16:03, 1.59s/it]
902
  1%| | 396/37300 [10:22<15:51:12, 1.55s/it]
903
  1%| | 397/37300 [10:23<15:31:56, 1.52s/it]
904
  1%| | 398/37300 [10:25<15:21:09, 1.50s/it]
905
  1%| | 399/37300 [10:26<15:11:11, 1.48s/it]
906
  1%| | 400/37300 [10:28<15:00:01, 1.46s/it]
907
 
908
  1%| | 400/37300 [10:28<15:00:01, 1.46s/it]
909
  1%| | 401/37300 [10:29<14:47:32, 1.44s/it]
910
  1%| | 402/37300 [10:31<14:38:28, 1.43s/it]
911
  1%| | 403/37300 [10:32<14:24:39, 1.41s/it]
912
  1%| | 404/37300 [10:33<14:03:43, 1.37s/it]
913
  1%| | 405/37300 [10:34<13:52:50, 1.35s/it]
914
  1%| | 406/37300 [10:36<13:39:16, 1.33s/it]
915
  1%| | 407/37300 [10:37<13:23:59, 1.31s/it]
916
  1%| | 408/37300 [10:38<13:13:51, 1.29s/it]
917
  1%| | 409/37300 [10:39<13:00:44, 1.27s/it]
918
  1%| | 410/37300 [10:41<13:07:06, 1.28s/it]
919
  1%| | 411/37300 [10:42<13:04:49, 1.28s/it]
920
  1%| | 412/37300 [10:43<12:59:21, 1.27s/it]
921
  1%| | 413/37300 [10:45<12:54:01, 1.26s/it]
922
  1%| | 414/37300 [10:46<12:36:07, 1.23s/it]
923
  1%| | 415/37300 [10:47<12:23:33, 1.21s/it]
924
  1%| | 416/37300 [10:48<12:13:15, 1.19s/it]
925
  1%| | 417/37300 [10:49<11:52:20, 1.16s/it]
926
  1%| | 418/37300 [10:50<11:43:37, 1.14s/it]
927
  1%| | 419/37300 [10:51<11:26:02, 1.12s/it]
928
  1%| | 420/37300 [10:52<11:09:22, 1.09s/it]
929
  1%| | 421/37300 [10:53<10:54:00, 1.06s/it]
930
  1%| | 422/37300 [10:54<10:38:28, 1.04s/it]
931
  1%| | 423/37300 [10:55<10:23:41, 1.01s/it]
932
  1%| | 424/37300 [10:58<15:23:42, 1.50s/it]
933
  1%| | 425/37300 [11:00<18:33:47, 1.81s/it]
934
  1%| | 426/37300 [11:03<20:13:15, 1.97s/it]
935
  1%| | 427/37300 [11:05<21:18:42, 2.08s/it]
936
  1%| | 428/37300 [11:07<21:46:07, 2.13s/it]
937
  1%| | 429/37300 [11:09<21:49:41, 2.13s/it]
938
  1%| | 430/37300 [11:12<21:44:41, 2.12s/it]
939
  1%| | 431/37300 [11:14<21:30:55, 2.10s/it]
940
  1%| | 432/37300 [11:16<21:14:23, 2.07s/it]
941
  1%| | 433/37300 [11:18<20:43:34, 2.02s/it]
942
  1%| | 434/37300 [11:19<20:18:57, 1.98s/it]
943
  1%| | 435/37300 [11:21<19:47:27, 1.93s/it]
944
  1%| | 436/37300 [11:23<19:22:53, 1.89s/it]
945
  1%| | 437/37300 [11:25<19:00:48, 1.86s/it]
946
  1%| | 438/37300 [11:27<18:51:44, 1.84s/it]
947
  1%| | 439/37300 [11:28<18:27:19, 1.80s/it]
948
  1%| | 440/37300 [11:30<18:08:23, 1.77s/it]
949
  1%| | 441/37300 [11:32<17:47:58, 1.74s/it]
950
  1%| | 442/37300 [11:33<17:31:46, 1.71s/it]
951
  1%| | 443/37300 [11:35<17:14:11, 1.68s/it]
952
  1%| | 444/37300 [11:37<16:54:51, 1.65s/it]
953
  1%| | 445/37300 [11:38<16:40:53, 1.63s/it]
954
  1%| | 446/37300 [11:40<16:22:57, 1.60s/it]
955
  1%| | 447/37300 [11:41<16:05:18, 1.57s/it]
956
  1%| | 448/37300 [11:43<15:49:17, 1.55s/it]
957
  1%| | 449/37300 [11:44<15:38:40, 1.53s/it]
958
  1%| | 450/37300 [11:46<15:24:17, 1.50s/it]
959
  1%| | 451/37300 [11:47<15:09:51, 1.48s/it]
960
  1%| | 452/37300 [11:48<14:56:31, 1.46s/it]
961
  1%| | 453/37300 [11:50<14:41:32, 1.44s/it]
962
  1%| | 454/37300 [11:51<14:31:58, 1.42s/it]
963
  1%| | 455/37300 [11:52<14:12:11, 1.39s/it]
964
  1%| | 456/37300 [11:54<13:56:00, 1.36s/it]
965
  1%| | 457/37300 [11:55<13:37:46, 1.33s/it]
966
  1%| | 458/37300 [11:56<13:19:43, 1.30s/it]
967
  1%| | 459/37300 [11:58<13:08:43, 1.28s/it]
968
  1%| | 460/37300 [11:59<12:55:34, 1.26s/it]
969
  1%| | 461/37300 [12:00<12:50:36, 1.26s/it]
970
  1%| | 462/37300 [12:01<12:42:06, 1.24s/it]
971
  1%| | 463/37300 [12:02<12:31:09, 1.22s/it]
972
  1%| | 464/37300 [12:04<12:19:40, 1.20s/it]
973
  1%| | 465/37300 [12:05<12:09:04, 1.19s/it]
974
  1%| | 466/37300 [12:06<11:57:00, 1.17s/it]
975
  1%|▏ | 467/37300 [12:07<11:40:36, 1.14s/it]
976
  1%|▏ | 468/37300 [12:08<11:26:55, 1.12s/it]
977
  1%|▏ | 469/37300 [12:09<11:13:20, 1.10s/it]
978
  1%|▏ | 470/37300 [12:10<11:00:30, 1.08s/it]
979
  1%|▏ | 471/37300 [12:11<10:49:08, 1.06s/it]
980
  1%|▏ | 472/37300 [12:12<10:36:47, 1.04s/it]
981
  1%|▏ | 473/37300 [12:13<10:23:46, 1.02s/it]
982
  1%|▏ | 474/37300 [12:16<15:07:13, 1.48s/it]
983
  1%|▏ | 475/37300 [12:18<18:21:41, 1.80s/it]
984
  1%|▏ | 476/37300 [12:20<20:09:53, 1.97s/it]
985
  1%|▏ | 477/37300 [12:23<21:01:58, 2.06s/it]
986
  1%|▏ | 478/37300 [12:25<21:27:36, 2.10s/it]
987
  1%|▏ | 479/37300 [12:27<21:35:40, 2.11s/it]
988
  1%|▏ | 480/37300 [12:29<21:23:24, 2.09s/it]
989
  1%|▏ | 481/37300 [12:31<21:10:37, 2.07s/it]
990
  1%|▏ | 482/37300 [12:33<20:46:29, 2.03s/it]
991
  1%|▏ | 483/37300 [12:35<20:20:44, 1.99s/it]
992
  1%|▏ | 484/37300 [12:37<19:52:00, 1.94s/it]
993
  1%|▏ | 485/37300 [12:39<19:23:01, 1.90s/it]
994
  1%|▏ | 486/37300 [12:40<19:02:47, 1.86s/it]
995
  1%|▏ | 487/37300 [12:42<18:35:06, 1.82s/it]
996
  1%|▏ | 488/37300 [12:44<18:10:05, 1.78s/it]
997
  1%|▏ | 489/37300 [12:45<17:56:05, 1.75s/it]
998
  1%|▏ | 490/37300 [12:47<17:39:09, 1.73s/it]
999
  1%|▏ | 491/37300 [12:49<17:27:46, 1.71s/it]
1000
  1%|▏ | 492/37300 [12:50<17:15:08, 1.69s/it]
1001
  1%|▏ | 493/37300 [12:52<16:57:58, 1.66s/it]
1002
  1%|▏ | 494/37300 [12:54<16:42:08, 1.63s/it]
1003
  1%|▏ | 495/37300 [12:55<16:29:22, 1.61s/it]
1004
  1%|▏ | 496/37300 [12:57<16:13:01, 1.59s/it]
1005
  1%|▏ | 497/37300 [12:58<15:59:08, 1.56s/it]
1006
  1%|▏ | 498/37300 [13:00<15:37:34, 1.53s/it]
1007
  1%|▏ | 499/37300 [13:01<15:14:28, 1.49s/it]
1008
  1%|▏ | 500/37300 [13:02<14:56:05, 1.46s/it]
1009
 
1010
  1%|▏ | 500/37300 [13:02<14:56:05, 1.46s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
1011
+ ***** Running Evaluation *****
1012
+ Num examples = 2302
1013
+ Batch size = 8
1014
+ {'loss': 153.5094, 'learning_rate': 4.85e-06, 'epoch': 0.27}
1015
+ {'loss': 108.8648, 'learning_rate': 9.85e-06, 'epoch': 0.54}
1016
+ {'loss': 92.5714, 'learning_rate': 1.48e-05, 'epoch': 0.8}
1017
+ {'loss': 79.9356, 'learning_rate': 1.9800000000000004e-05, 'epoch': 1.07}
1018
+ {'loss': 69.8341, 'learning_rate': 2.48e-05, 'epoch': 1.34}
1019
+
1020
+
1021
  0%| | 0/288 [00:00<?, ?it/s]
1022
+
1023
  1%| | 2/288 [00:00<00:39, 7.26it/s]
1024
+
1025
  1%| | 3/288 [00:00<01:06, 4.26it/s]
1026
+
1027
  1%|▏ | 4/288 [00:00<01:15, 3.75it/s]
1028
+
1029
  2%|▏ | 5/288 [00:01<01:13, 3.84it/s]
1030
+
1031
  2%|▏ | 6/288 [00:01<01:15, 3.74it/s]
1032
+
1033
  2%|▏ | 7/288 [00:01<01:18, 3.57it/s]
1034
+
1035
  3%|▎ | 8/288 [00:02<01:15, 3.72it/s]
1036
+
1037
  3%|▎ | 9/288 [00:02<01:19, 3.53it/s]
1038
+
1039
  3%|▎ | 10/288 [00:02<01:27, 3.17it/s]
1040
+
1041
  4%|▍ | 11/288 [00:03<01:29, 3.10it/s]
1042
+
1043
  4%|▍ | 12/288 [00:03<01:24, 3.28it/s]
1044
+
1045
  5%|▍ | 13/288 [00:03<01:25, 3.21it/s]
1046
+
1047
  5%|▍ | 14/288 [00:03<01:24, 3.23it/s]
1048
+
1049
  5%|▌ | 15/288 [00:04<01:21, 3.36it/s]
1050
+
1051
  6%|▌ | 16/288 [00:04<01:22, 3.30it/s]
1052
+
1053
  6%|▌ | 17/288 [00:04<01:21, 3.34it/s]
1054
+
1055
  6%|▋ | 18/288 [00:05<01:23, 3.21it/s]
1056
+
1057
  7%|▋ | 19/288 [00:05<01:26, 3.12it/s]
1058
+
1059
  7%|▋ | 20/288 [00:05<01:23, 3.21it/s]
1060
+
1061
  7%|▋ | 21/288 [00:06<01:22, 3.24it/s]
1062
+
1063
  8%|▊ | 22/288 [00:06<01:20, 3.30it/s]
1064
+
1065
  8%|▊ | 23/288 [00:06<01:22, 3.20it/s]
1066
+
1067
  8%|▊ | 24/288 [00:07<01:24, 3.12it/s]
1068
+
1069
  9%|▊ | 25/288 [00:07<01:23, 3.15it/s]
1070
+
1071
  9%|▉ | 26/288 [00:07<01:27, 2.99it/s]
1072
+
1073
  9%|▉ | 27/288 [00:08<01:26, 3.01it/s]
1074
+
1075
  10%|▉ | 28/288 [00:08<01:23, 3.11it/s]
1076
+
1077
  10%|█ | 29/288 [00:08<01:19, 3.26it/s]
1078
+
1079
  10%|█ | 30/288 [00:08<01:11, 3.60it/s]
1080
+
1081
  11%|█ | 31/288 [00:09<01:10, 3.66it/s]
1082
+
1083
  11%|█ | 32/288 [00:09<01:12, 3.52it/s]
1084
+
1085
  11%|█▏ | 33/288 [00:09<01:10, 3.62it/s]
1086
+
1087
  12%|█▏ | 34/288 [00:10<01:11, 3.55it/s]
1088
+
1089
  12%|█▏ | 35/288 [00:10<01:12, 3.48it/s]
1090
+
1091
  12%|█▎ | 36/288 [00:10<01:14, 3.40it/s]
1092
+
1093
  13%|█▎ | 37/288 [00:10<01:13, 3.40it/s]
1094
+
1095
  13%|█▎ | 38/288 [00:11<01:13, 3.41it/s]
1096
+
1097
  14%|█▎ | 39/288 [00:11<01:14, 3.34it/s]
1098
+
1099
  14%|█▍ | 40/288 [00:11<01:17, 3.19it/s]
1100
+
1101
  14%|█▍ | 41/288 [00:12<01:15, 3.26it/s]
1102
+
1103
  15%|█▍ | 42/288 [00:12<01:17, 3.16it/s]
1104
+
1105
  15%|█▍ | 43/288 [00:12<01:18, 3.11it/s]
1106
+
1107
  15%|█▌ | 44/288 [00:13<01:17, 3.15it/s]
1108
+
1109
  16%|█▌ | 45/288 [00:13<01:17, 3.13it/s]
1110
+
1111
  16%|█▌ | 46/288 [00:13<01:16, 3.16it/s]
1112
+
1113
  16%|█▋ | 47/288 [00:14<01:20, 3.01it/s]
1114
+
1115
  17%|█▋ | 48/288 [00:14<01:24, 2.85it/s]
1116
+
1117
  17%|█▋ | 49/288 [00:14<01:20, 2.96it/s]
1118
+
1119
  17%|█▋ | 50/288 [00:15<01:18, 3.05it/s]
1120
+
1121
  18%|█▊ | 51/288 [00:15<01:16, 3.08it/s]
1122
+
1123
  18%|█▊ | 52/288 [00:15<01:19, 2.95it/s]
1124
+
1125
  18%|█▊ | 53/288 [00:16<01:21, 2.88it/s]
1126
+
1127
  19%|█▉ | 54/288 [00:16<01:19, 2.95it/s]
1128
+
1129
  19%|█▉ | 55/288 [00:16<01:22, 2.84it/s]
1130
+
1131
  19%|█▉ | 56/288 [00:17<01:19, 2.91it/s]
1132
+
1133
  20%|█▉ | 57/288 [00:17<01:15, 3.08it/s]
1134
+
1135
  20%|██ | 58/288 [00:17<01:10, 3.24it/s]
1136
+
1137
  20%|██ | 59/288 [00:18<01:07, 3.37it/s]
1138
+
1139
  21%|██ | 60/288 [00:18<01:04, 3.56it/s]
1140
+
1141
  21%|██ | 61/288 [00:18<01:08, 3.33it/s]
1142
+
1143
  22%|██▏ | 62/288 [00:18<01:08, 3.28it/s]
1144
+
1145
  22%|██▏ | 63/288 [00:19<01:09, 3.23it/s]
1146
+
1147
  22%|██▏ | 64/288 [00:19<01:07, 3.32it/s]
1148
+
1149
  23%|██▎ | 65/288 [00:19<01:10, 3.17it/s]
1150
+
1151
  23%|██▎ | 66/288 [00:20<01:11, 3.10it/s]
1152
+
1153
  23%|██▎ | 67/288 [00:20<01:09, 3.16it/s]
1154
+
1155
  24%|██▎ | 68/288 [00:20<01:11, 3.09it/s]
1156
+
1157
  24%|██▍ | 69/288 [00:21<01:12, 3.04it/s]
1158
+
1159
  24%|██▍ | 70/288 [00:21<01:10, 3.07it/s]
1160
+
1161
  25%|██▍ | 71/288 [00:21<01:13, 2.95it/s]
1162
+
1163
  25%|██▌ | 72/288 [00:22<01:16, 2.84it/s]
1164
+
1165
  25%|██▌ | 73/288 [00:22<01:14, 2.89it/s]
1166
+
1167
  26%|██▌ | 74/288 [00:22<01:14, 2.88it/s]
1168
+
1169
  26%|██▌ | 75/288 [00:23<01:14, 2.87it/s]
1170
+
1171
  26%|██▋ | 76/288 [00:23<01:10, 2.99it/s]
1172
+
1173
  27%|██▋ | 77/288 [00:23<01:10, 2.98it/s]
1174
+
1175
  27%|██▋ | 78/288 [00:24<01:09, 3.04it/s]
1176
+
1177
  27%|██▋ | 79/288 [00:24<01:07, 3.11it/s]
1178
+
1179
  28%|██▊ | 80/288 [00:24<01:05, 3.20it/s]
1180
+
1181
  28%|██▊ | 81/288 [00:25<01:07, 3.06it/s]
1182
+
1183
  28%|██▊ | 82/288 [00:25<01:05, 3.13it/s]
1184
+
1185
  29%|██▉ | 83/288 [00:25<01:05, 3.13it/s]
1186
+
1187
  29%|██▉ | 84/288 [00:26<01:00, 3.39it/s]
1188
+
1189
  30%|██▉ | 85/288 [00:26<00:58, 3.44it/s]
1190
+
1191
  30%|██▉ | 86/288 [00:26<01:02, 3.24it/s]
1192
+
1193
  30%|███ | 87/288 [00:27<01:07, 2.99it/s]
1194
+
1195
  31%|███ | 88/288 [00:27<01:06, 2.99it/s]
1196
+
1197
  31%|███ | 89/288 [00:27<01:04, 3.09it/s]
1198
+
1199
  31%|███▏ | 90/288 [00:28<01:02, 3.17it/s]
1200
+
1201
  32%|███▏ | 91/288 [00:28<01:03, 3.12it/s]
1202
+
1203
  32%|███▏ | 92/288 [00:28<01:02, 3.15it/s]
1204
+
1205
  32%|███▏ | 93/288 [00:29<01:03, 3.06it/s]
1206
+
1207
  33%|███▎ | 94/288 [00:29<01:02, 3.09it/s]
1208
+
1209
  33%|███▎ | 95/288 [00:29<01:02, 3.08it/s]
1210
+
1211
  33%|███▎ | 96/288 [00:29<00:58, 3.26it/s]
1212
+
1213
  34%|███▎ | 97/288 [00:30<00:56, 3.36it/s]
1214
+
1215
  34%|███▍ | 98/288 [00:30<00:58, 3.24it/s]
1216
+
1217
  34%|███▍ | 99/288 [00:30<01:00, 3.11it/s]
1218
+
1219
  35%|███▍ | 100/288 [00:31<01:05, 2.87it/s]
1220
+
1221
  35%|███▌ | 101/288 [00:31<01:03, 2.97it/s]
1222
+
1223
  35%|███▌ | 102/288 [00:31<00:59, 3.11it/s]
1224
+
1225
  36%|███▌ | 103/288 [00:32<00:58, 3.17it/s]
1226
+
1227
  36%|███▌ | 104/288 [00:32<00:58, 3.13it/s]
1228
+
1229
  36%|███▋ | 105/288 [00:32<00:55, 3.31it/s]
1230
+
1231
  37%|███▋ | 106/288 [00:33<00:57, 3.16it/s]
1232
+
1233
  37%|███▋ | 107/288 [00:33<00:57, 3.15it/s]
1234
+
1235
  38%|███▊ | 108/288 [00:33<00:57, 3.13it/s]
1236
+
1237
  38%|███▊ | 109/288 [00:34<00:57, 3.11it/s]
1238
+
1239
  38%|███▊ | 110/288 [00:34<00:56, 3.15it/s]
1240
+
1241
  39%|███▊ | 111/288 [00:34<01:02, 2.82it/s]
1242
+
1243
  39%|███▉ | 112/288 [00:35<01:01, 2.85it/s]
1244
+
1245
  39%|███▉ | 113/288 [00:35<00:59, 2.93it/s]
1246
+
1247
  40%|███▉ | 114/288 [00:35<00:58, 2.96it/s]
1248
+
1249
  40%|███▉ | 115/288 [00:36<00:56, 3.06it/s]
1250
+
1251
  40%|████ | 116/288 [00:36<00:56, 3.02it/s]
1252
+
1253
  41%|████ | 117/288 [00:36<00:55, 3.06it/s]
1254
+
1255
  41%|████ | 118/288 [00:37<00:52, 3.23it/s]
1256
+
1257
  41%|████▏ | 119/288 [00:37<00:53, 3.13it/s]
1258
+
1259
  42%|████▏ | 120/288 [00:37<01:00, 2.78it/s]
1260
+
1261
  42%|████▏ | 121/288 [00:38<00:56, 2.94it/s]
1262
+
1263
  42%|████▏ | 122/288 [00:38<00:51, 3.20it/s]
1264
+
1265
  43%|████▎ | 123/288 [00:38<00:54, 3.05it/s]
1266
+
1267
  43%|████▎ | 124/288 [00:39<00:53, 3.07it/s]
1268
+
1269
  43%|████▎ | 125/288 [00:39<00:52, 3.10it/s]
1270
+
1271
  44%|████▍ | 126/288 [00:39<00:54, 2.99it/s]
1272
+
1273
  44%|████▍ | 127/288 [00:40<00:52, 3.07it/s]
1274
+
1275
  44%|████▍ | 128/288 [00:40<00:51, 3.08it/s]
1276
+
1277
  45%|████▍ | 129/288 [00:41<01:05, 2.41it/s]
1278
+
1279
  45%|████▌ | 130/288 [00:41<01:01, 2.56it/s]
1280
+
1281
  45%|████▌ | 131/288 [00:41<00:57, 2.75it/s]
1282
+
1283
  46%|████▌ | 132/288 [00:42<00:55, 2.83it/s]
1284
+
1285
  46%|████▌ | 133/288 [00:42<00:55, 2.78it/s]
1286
+
1287
  47%|████▋ | 134/288 [00:42<00:55, 2.77it/s]
1288
+
1289
  47%|████▋ | 135/288 [00:43<00:53, 2.85it/s]
1290
+
1291
  47%|████▋ | 136/288 [00:43<00:51, 2.93it/s]
1292
+
1293
  48%|████▊ | 137/288 [00:43<01:01, 2.47it/s]
1294
+
1295
  48%|████▊ | 138/288 [00:44<00:58, 2.57it/s]
1296
+
1297
  48%|████▊ | 139/288 [00:44<00:55, 2.66it/s]
1298
+
1299
  49%|████▊ | 140/288 [00:45<00:53, 2.74it/s]
1300
+
1301
  49%|████▉ | 141/288 [00:45<00:53, 2.74it/s]
1302
+
1303
  49%|████▉ | 142/288 [00:45<00:50, 2.87it/s]
1304
+
1305
  50%|████▉ | 143/288 [00:46<00:50, 2.87it/s]
1306
+
1307
  50%|█████ | 144/288 [00:46<00:50, 2.85it/s]
1308
+
1309
  50%|█████ | 145/288 [00:46<00:49, 2.88it/s]
1310
+
1311
  51%|█████ | 146/288 [00:47<00:47, 3.00it/s]
1312
+
1313
  51%|█████ | 147/288 [00:47<00:45, 3.10it/s]
1314
+
1315
  51%|█████▏ | 148/288 [00:47<00:45, 3.08it/s]
1316
+
1317
  52%|█████▏ | 149/288 [00:47<00:44, 3.10it/s]
1318
+
1319
  52%|█████▏ | 150/288 [00:48<00:44, 3.13it/s]
1320
+
1321
  52%|█████▏ | 151/288 [00:48<00:49, 2.77it/s]
1322
+
1323
  53%|█████▎ | 152/288 [00:49<00:48, 2.82it/s]
1324
+
1325
  53%|█████▎ | 153/288 [00:49<00:45, 2.99it/s]
1326
+
1327
  53%|█████▎ | 154/288 [00:49<00:45, 2.93it/s]
1328
+
1329
  54%|█████▍ | 155/288 [00:50<00:44, 2.96it/s]
1330
+
1331
  54%|█████▍ | 156/288 [00:50<00:43, 3.03it/s]
1332
+
1333
  55%|███���█▍ | 157/288 [00:50<00:43, 3.03it/s]
1334
+
1335
  55%|█████▍ | 158/288 [00:51<00:50, 2.58it/s]
1336
+
1337
  55%|█████▌ | 159/288 [00:51<00:47, 2.69it/s]
1338
+
1339
  56%|█████▌ | 160/288 [00:51<00:44, 2.88it/s]
1340
+
1341
  56%|█████▌ | 161/288 [00:52<00:41, 3.02it/s]
1342
+
1343
  56%|█████▋ | 162/288 [00:52<00:40, 3.14it/s]
1344
+
1345
  57%|█████▋ | 163/288 [00:52<00:40, 3.09it/s]
1346
+
1347
  57%|█████▋ | 164/288 [00:53<00:45, 2.75it/s]
1348
+
1349
  57%|█████▋ | 165/288 [00:53<00:43, 2.83it/s]
1350
+
1351
  58%|█████▊ | 166/288 [00:53<00:42, 2.87it/s]
1352
+
1353
  58%|█████▊ | 167/288 [00:54<00:40, 2.99it/s]
1354
+
1355
  58%|█████▊ | 168/288 [00:54<00:40, 2.93it/s]
1356
+
1357
  59%|█████▊ | 169/288 [00:54<00:40, 2.94it/s]
1358
+
1359
  59%|█████▉ | 170/288 [00:55<00:45, 2.57it/s]
1360
+
1361
  59%|█████▉ | 171/288 [00:55<00:43, 2.68it/s]
1362
+
1363
  60%|█████▉ | 172/288 [00:56<00:41, 2.79it/s]
1364
+
1365
  60%|██████ | 173/288 [00:56<00:40, 2.83it/s]
1366
+
1367
  60%|██████ | 174/288 [00:56<00:39, 2.89it/s]
1368
+
1369
  61%|██████ | 175/288 [00:57<00:37, 3.01it/s]
1370
+
1371
  61%|██████ | 176/288 [00:57<00:51, 2.17it/s]
1372
+
1373
  61%|██████▏ | 177/288 [00:58<00:47, 2.36it/s]
1374
+
1375
  62%|██████▏ | 178/288 [00:58<00:43, 2.50it/s]
1376
+
1377
  62%|██████▏ | 179/288 [00:58<00:40, 2.67it/s]
1378
+
1379
  62%|██████▎ | 180/288 [00:59<00:39, 2.77it/s]
1380
+
1381
  63%|██████▎ | 181/288 [00:59<00:39, 2.69it/s]
1382
+
1383
  63%|██████▎ | 182/288 [00:59<00:39, 2.71it/s]
1384
+
1385
  64%|██████▎ | 183/288 [01:00<00:37, 2.82it/s]
1386
+
1387
  64%|██████▍ | 184/288 [01:00<00:36, 2.86it/s]
1388
+
1389
  64%|██████▍ | 185/288 [01:00<00:35, 2.92it/s]
1390
+
1391
  65%|██████▍ | 186/288 [01:01<00:38, 2.62it/s]
1392
+
1393
  65%|██████▍ | 187/288 [01:01<00:38, 2.63it/s]
1394
+
1395
  65%|██████▌ | 188/288 [01:02<00:36, 2.74it/s]
1396
+
1397
  66%|██████▌ | 189/288 [01:02<00:34, 2.90it/s]
1398
+
1399
  66%|██████▌ | 190/288 [01:02<00:34, 2.81it/s]
1400
+
1401
  66%|██████▋ | 191/288 [01:03<00:39, 2.43it/s]
1402
+
1403
  67%|██████▋ | 192/288 [01:03<00:36, 2.65it/s]
1404
+
1405
  67%|██████▋ | 193/288 [01:03<00:34, 2.76it/s]
1406
+
1407
  67%|██████▋ | 194/288 [01:04<00:34, 2.72it/s]
1408
+
1409
  68%|██████▊ | 195/288 [01:04<00:33, 2.80it/s]
1410
+
1411
  68%|██████▊ | 196/288 [01:05<00:39, 2.31it/s]
1412
+
1413
  68%|██████▊ | 197/288 [01:05<00:36, 2.47it/s]
1414
+
1415
  69%|██████▉ | 198/288 [01:05<00:33, 2.72it/s]
1416
+
1417
  69%|██████▉ | 199/288 [01:06<00:31, 2.82it/s]
1418
+
1419
  69%|██████▉ | 200/288 [01:06<00:29, 3.00it/s]
1420
+
1421
  70%|██████▉ | 201/288 [01:06<00:35, 2.46it/s]
1422
+
1423
  70%|███████ | 202/288 [01:07<00:32, 2.62it/s]
1424
+
1425
  70%|███████ | 203/288 [01:07<00:31, 2.73it/s]
1426
+
1427
  71%|███████ | 204/288 [01:07<00:29, 2.82it/s]
1428
+
1429
  71%|███████ | 205/288 [01:08<00:28, 2.95it/s]
1430
+
1431
  72%|███████▏ | 206/288 [01:08<00:33, 2.42it/s]
1432
+
1433
  72%|███████▏ | 207/288 [01:09<00:32, 2.50it/s]
1434
+
1435
  72%|███████▏ | 208/288 [01:09<00:31, 2.57it/s]
1436
+
1437
  73%|███████▎ | 209/288 [01:09<00:30, 2.61it/s]
1438
+
1439
  73%|███████▎ | 210/288 [01:10<00:27, 2.87it/s]
1440
+
1441
  73%|███████▎ | 211/288 [01:10<00:36, 2.14it/s]
1442
+
1443
  74%|███████▎ | 212/288 [01:11<00:32, 2.36it/s]
1444
+
1445
  74%|███████▍ | 213/288 [01:11<00:31, 2.38it/s]
1446
+
1447
  74%|███████▍ | 214/288 [01:12<00:29, 2.54it/s]
1448
+
1449
  75%|███████▍ | 215/288 [01:12<00:31, 2.30it/s]
1450
+
1451
  75%|███████▌ | 216/288 [01:12<00:29, 2.45it/s]
1452
+
1453
  75%|███████▌ | 217/288 [01:13<00:26, 2.67it/s]
1454
+
1455
  76%|███████▌ | 218/288 [01:13<00:25, 2.72it/s]
1456
+
1457
  76%|███████▌ | 219/288 [01:14<00:29, 2.32it/s]
1458
+
1459
  76%|███████▋ | 220/288 [01:14<00:28, 2.37it/s]
1460
+
1461
  77%|███████▋ | 221/288 [01:14<00:26, 2.49it/s]
1462
+
1463
  77%|███████▋ | 222/288 [01:15<00:26, 2.54it/s]
1464
+
1465
  77%|███████▋ | 223/288 [01:15<00:28, 2.30it/s]
1466
+
1467
  78%|███████▊ | 224/288 [01:16<00:26, 2.45it/s]
1468
+
1469
  78%|███████▊ | 225/288 [01:16<00:24, 2.61it/s]
1470
+
1471
  78%|███████▊ | 226/288 [01:16<00:22, 2.72it/s]
1472
+
1473
  79%|███████▉ | 227/288 [01:17<00:27, 2.25it/s]
1474
+
1475
  79%|███████▉ | 228/288 [01:17<00:24, 2.40it/s]
1476
+
1477
  80%|███████▉ | 229/288 [01:18<00:23, 2.54it/s]
1478
+
1479
  80%|███████▉ | 230/288 [01:18<00:21, 2.66it/s]
1480
+
1481
  80%|████��███ | 231/288 [01:19<00:25, 2.23it/s]
1482
+
1483
  81%|████████ | 232/288 [01:19<00:22, 2.44it/s]
1484
+
1485
  81%|████████ | 233/288 [01:19<00:21, 2.53it/s]
1486
+
1487
  81%|████████▏ | 234/288 [01:20<00:19, 2.71it/s]
1488
+
1489
  82%|████████▏ | 235/288 [01:20<00:25, 2.12it/s]
1490
+
1491
  82%|████████▏ | 236/288 [01:21<00:22, 2.28it/s]
1492
+
1493
  82%|████████▏ | 237/288 [01:21<00:21, 2.41it/s]
1494
+
1495
  83%|████████▎ | 238/288 [01:21<00:20, 2.50it/s]
1496
+
1497
  83%|████████▎ | 239/288 [01:22<00:24, 2.00it/s]
1498
+
1499
  83%|████████▎ | 240/288 [01:22<00:21, 2.27it/s]
1500
+
1501
  84%|████████▎ | 241/288 [01:23<00:19, 2.42it/s]
1502
+
1503
  84%|████████▍ | 242/288 [01:23<00:18, 2.54it/s]
1504
+
1505
  84%|████████▍ | 243/288 [01:24<00:21, 2.06it/s]
1506
+
1507
  85%|████████▍ | 244/288 [01:24<00:19, 2.24it/s]
1508
+
1509
  85%|████████▌ | 245/288 [01:25<00:17, 2.39it/s]
1510
+
1511
  85%|████████▌ | 246/288 [01:25<00:17, 2.45it/s]
1512
+
1513
  86%|████████▌ | 247/288 [01:26<00:20, 2.02it/s]
1514
+
1515
  86%|████████▌ | 248/288 [01:26<00:17, 2.25it/s]
1516
+
1517
  86%|████████▋ | 249/288 [01:26<00:16, 2.37it/s]
1518
+
1519
  87%|████████▋ | 250/288 [01:27<00:15, 2.45it/s]
1520
+
1521
  87%|████████▋ | 251/288 [01:27<00:19, 1.93it/s]
1522
+
1523
  88%|████████▊ | 252/288 [01:28<00:17, 2.10it/s]
1524
+
1525
  88%|████████▊ | 253/288 [01:28<00:14, 2.34it/s]
1526
+
1527
  88%|████████▊ | 254/288 [01:29<00:15, 2.19it/s]
1528
+
1529
  89%|████████▊ | 255/288 [01:29<00:13, 2.44it/s]
1530
+
1531
  89%|████████▉ | 256/288 [01:29<00:12, 2.56it/s]
1532
+
1533
  89%|████████▉ | 257/288 [01:30<00:14, 2.19it/s]
1534
+
1535
  90%|████████▉ | 258/288 [01:30<00:13, 2.30it/s]
1536
+
1537
  90%|████████▉ | 259/288 [01:31<00:12, 2.39it/s]
1538
+
1539
  90%|█████████ | 260/288 [01:31<00:13, 2.08it/s]
1540
+
1541
  91%|█████████ | 261/288 [01:32<00:11, 2.26it/s]
1542
+
1543
  91%|█████████ | 262/288 [01:32<00:10, 2.41it/s]
1544
+
1545
  91%|█████████▏| 263/288 [01:33<00:11, 2.10it/s]
1546
+
1547
  92%|█████████▏| 264/288 [01:33<00:10, 2.26it/s]
1548
+
1549
  92%|█████████▏| 265/288 [01:33<00:09, 2.39it/s]
1550
+
1551
  92%|█████████▏| 266/288 [01:34<00:10, 2.08it/s]
1552
+
1553
  93%|█████████▎| 267/288 [01:34<00:09, 2.20it/s]
1554
+
1555
  93%|█████████▎| 268/288 [01:35<00:08, 2.38it/s]
1556
+
1557
  93%|█████████▎| 269/288 [01:35<00:09, 2.00it/s]
1558
+
1559
  94%|█████████▍| 270/288 [01:36<00:08, 2.11it/s]
1560
+
1561
  94%|█████████▍| 271/288 [01:36<00:07, 2.28it/s]
1562
+
1563
  94%|█████████▍| 272/288 [01:37<00:08, 1.85it/s]
1564
+
1565
  95%|█████████▍| 273/288 [01:37<00:07, 2.08it/s]
1566
+
1567
  95%|█████████▌| 274/288 [01:38<00:06, 2.28it/s]
1568
+
1569
  95%|█████████▌| 275/288 [01:38<00:06, 1.89it/s]
1570
+
1571
  96%|█████████▌| 276/288 [01:39<00:05, 2.12it/s]
1572
+
1573
  96%|█████████▌| 277/288 [01:39<00:04, 2.29it/s]
1574
+
1575
  97%|█████████▋| 278/288 [01:40<00:05, 1.78it/s]
1576
+
1577
  97%|█████████▋| 279/288 [01:40<00:04, 2.05it/s]
1578
+
1579
  97%|█████████▋| 280/288 [01:41<00:03, 2.24it/s]
1580
+
1581
  98%|█████████▊| 281/288 [01:41<00:03, 1.83it/s]
1582
+
1583
  98%|█████████▊| 282/288 [01:42<00:02, 2.06it/s]
1584
+
1585
  98%|█████████▊| 283/288 [01:42<00:02, 2.17it/s]
1586
+
1587
  99%|█████████▊| 284/288 [01:43<00:02, 1.80it/s]
1588
+
1589
  99%|█████████▉| 285/288 [01:43<00:01, 2.05it/s]
1590
+
1591
  99%|█████████▉| 286/288 [01:44<00:00, 2.26it/s]
1592
+
1593
+
1594
 
1595
+
1596
 
1597
  1%|▏ | 500/37300 [15:13<14:56:05, 1.46s/it]
1598
+
1599
+
1600
  Saving model checkpoint to ./checkpoint-500
1601
+ Configuration saved in ./checkpoint-500/config.json
1602
+ Model weights saved in ./checkpoint-500/pytorch_model.bin
1603
+ Configuration saved in ./checkpoint-500/preprocessor_config.json
1604
+ Configuration saved in ./preprocessor_config.json
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572879a364a49fa364fbb6c4c85b5ca6ca71adfa5d735fdd625948ca4a5d6f55
3
+ size 1276909233
run.sh ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python run_speech_recognition_ctc.py \
2
+ --dataset_name="common_voice" \
3
+ --model_name_or_path="facebook/wav2vec2-xls-r-300m" \
4
+ --dataset_config_name="zh-HK" \
5
+ --output_dir="./" \
6
+ --overwrite_output_dir \
7
+ --num_train_epochs="100" \
8
+ --per_device_train_batch_size="8" \
9
+ --per_device_eval_batch_size="8" \
10
+ --gradient_accumulation_steps="4" \
11
+ --learning_rate="1e-4" \
12
+ --warmup_steps="2000" \
13
+ --length_column_name="input_length" \
14
+ --max_duration_in_seconds="7" \
15
+ --max_eval_samples="3000" \
16
+ --evaluation_strategy="steps" \
17
+ --text_column_name="sentence" \
18
+ --chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – ! - : – 。 》 , ) , ? ; ~ ~ … ︰ , ( 」 ‧ 《 ﹔ 、 — / , 「 ﹖ · \
19
+ --save_steps="500" \
20
+ --eval_steps="500" \
21
+ --logging_steps="100" \
22
+ --layerdrop="0.0" \
23
+ --activation_dropout="0.1" \
24
+ --save_total_limit="3" \
25
+ --freeze_feature_encoder \
26
+ --feat_proj_dropout="0.0" \
27
+ --mask_time_prob="0.75" \
28
+ --mask_time_length="10" \
29
+ --mask_feature_prob="0.25" \
30
+ --mask_feature_length="64" \
31
+ --gradient_checkpointing \
32
+ --use_auth_token \
33
+ --fp16 \
34
+ --group_by_length \
35
+ --do_train --do_eval \
36
+ --report_to="tensorboard" \
37
+ --push_to_hub
run_speech_recognition_ctc.py ADDED
@@ -0,0 +1,829 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+
16
+ """ Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition"""
17
+
18
+ import functools
19
+ import json
20
+ import logging
21
+ import os
22
+ import re
23
+ import sys
24
+ import warnings
25
+ from dataclasses import dataclass, field
26
+ from typing import Dict, List, Optional, Union
27
+
28
+ import datasets
29
+ import numpy as np
30
+ import torch
31
+ from datasets import DatasetDict, load_dataset, load_metric
32
+
33
+ import transformers
34
+ from transformers import (
35
+ AutoConfig,
36
+ AutoFeatureExtractor,
37
+ AutoModelForCTC,
38
+ AutoProcessor,
39
+ AutoTokenizer,
40
+ HfArgumentParser,
41
+ Trainer,
42
+ TrainingArguments,
43
+ Wav2Vec2Processor,
44
+ set_seed,
45
+ )
46
+ from transformers.trainer_utils import get_last_checkpoint, is_main_process
47
+ from transformers.utils import check_min_version
48
+ from transformers.utils.versions import require_version
49
+
50
+
51
+ # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
52
+ check_min_version("4.17.0.dev0")
53
+
54
+ require_version(
55
+ "datasets>=1.13.3",
56
+ "To fix: pip install -r examples/pytorch/text-classification/requirements.txt",
57
+ )
58
+
59
+
60
+ logger = logging.getLogger(__name__)
61
+
62
+
63
+ def list_field(default=None, metadata=None):
64
+ return field(default_factory=lambda: default, metadata=metadata)
65
+
66
+
67
+ @dataclass
68
+ class ModelArguments:
69
+ """
70
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
71
+ """
72
+
73
+ model_name_or_path: str = field(
74
+ metadata={
75
+ "help": "Path to pretrained model or model identifier from huggingface.co/models"
76
+ }
77
+ )
78
+ tokenizer_name_or_path: Optional[str] = field(
79
+ default=None,
80
+ metadata={
81
+ "help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models"
82
+ },
83
+ )
84
+ cache_dir: Optional[str] = field(
85
+ default=None,
86
+ metadata={
87
+ "help": "Where do you want to store the pretrained models downloaded from huggingface.co"
88
+ },
89
+ )
90
+ freeze_feature_encoder: bool = field(
91
+ default=True,
92
+ metadata={"help": "Whether to freeze the feature encoder layers of the model."},
93
+ )
94
+ attention_dropout: float = field(
95
+ default=0.0,
96
+ metadata={"help": "The dropout ratio for the attention probabilities."},
97
+ )
98
+ activation_dropout: float = field(
99
+ default=0.0,
100
+ metadata={
101
+ "help": "The dropout ratio for activations inside the fully connected layer."
102
+ },
103
+ )
104
+ feat_proj_dropout: float = field(
105
+ default=0.0, metadata={"help": "The dropout ratio for the projected features."}
106
+ )
107
+ hidden_dropout: float = field(
108
+ default=0.0,
109
+ metadata={
110
+ "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
111
+ },
112
+ )
113
+ final_dropout: float = field(
114
+ default=0.0,
115
+ metadata={"help": "The dropout probability for the final projection layer."},
116
+ )
117
+ mask_time_prob: float = field(
118
+ default=0.05,
119
+ metadata={
120
+ "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
121
+ "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
122
+ "vectors will be masked along the time axis."
123
+ },
124
+ )
125
+ mask_time_length: int = field(
126
+ default=10,
127
+ metadata={"help": "Length of vector span to mask along the time axis."},
128
+ )
129
+ mask_feature_prob: float = field(
130
+ default=0.0,
131
+ metadata={
132
+ "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
133
+ "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
134
+ },
135
+ )
136
+ mask_feature_length: int = field(
137
+ default=10,
138
+ metadata={"help": "Length of vector span to mask along the feature axis."},
139
+ )
140
+ layerdrop: float = field(
141
+ default=0.0, metadata={"help": "The LayerDrop probability."}
142
+ )
143
+ ctc_loss_reduction: Optional[str] = field(
144
+ default="mean",
145
+ metadata={
146
+ "help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."
147
+ },
148
+ )
149
+
150
+
151
+ @dataclass
152
+ class DataTrainingArguments:
153
+ """
154
+ Arguments pertaining to what data we are going to input our model for training and eval.
155
+
156
+ Using `HfArgumentParser` we can turn this class
157
+ into argparse arguments to be able to specify them on
158
+ the command line.
159
+ """
160
+
161
+ dataset_name: str = field(
162
+ metadata={
163
+ "help": "The configuration name of the dataset to use (via the datasets library)."
164
+ }
165
+ )
166
+ dataset_config_name: str = field(
167
+ default=None,
168
+ metadata={
169
+ "help": "The configuration name of the dataset to use (via the datasets library)."
170
+ },
171
+ )
172
+ train_split_name: str = field(
173
+ default="train+validation",
174
+ metadata={
175
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
176
+ },
177
+ )
178
+ eval_split_name: str = field(
179
+ default="test",
180
+ metadata={
181
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'test'"
182
+ },
183
+ )
184
+ audio_column_name: str = field(
185
+ default="audio",
186
+ metadata={
187
+ "help": "The name of the dataset column containing the audio data. Defaults to 'audio'"
188
+ },
189
+ )
190
+ text_column_name: str = field(
191
+ default="text",
192
+ metadata={
193
+ "help": "The name of the dataset column containing the text data. Defaults to 'text'"
194
+ },
195
+ )
196
+ overwrite_cache: bool = field(
197
+ default=False,
198
+ metadata={"help": "Overwrite the cached preprocessed datasets or not."},
199
+ )
200
+ preprocessing_num_workers: Optional[int] = field(
201
+ default=None,
202
+ metadata={"help": "The number of processes to use for the preprocessing."},
203
+ )
204
+ max_train_samples: Optional[int] = field(
205
+ default=None,
206
+ metadata={
207
+ "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
208
+ "value if set."
209
+ },
210
+ )
211
+ max_eval_samples: Optional[int] = field(
212
+ default=None,
213
+ metadata={
214
+ "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
215
+ "value if set."
216
+ },
217
+ )
218
+ chars_to_ignore: Optional[List[str]] = list_field(
219
+ default=None,
220
+ metadata={"help": "A list of characters to remove from the transcripts."},
221
+ )
222
+ eval_metrics: List[str] = list_field(
223
+ default=["wer", "cer"],
224
+ metadata={
225
+ "help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"
226
+ },
227
+ )
228
+ max_duration_in_seconds: float = field(
229
+ default=20.0,
230
+ metadata={
231
+ "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
232
+ },
233
+ )
234
+ min_duration_in_seconds: float = field(
235
+ default=0.0,
236
+ metadata={
237
+ "help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"
238
+ },
239
+ )
240
+ preprocessing_only: bool = field(
241
+ default=False,
242
+ metadata={
243
+ "help": "Whether to only do data preprocessing and skip training. "
244
+ "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
245
+ "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
246
+ "so that the cached datasets can consequently be loaded in distributed training"
247
+ },
248
+ )
249
+ use_auth_token: bool = field(
250
+ default=False,
251
+ metadata={
252
+ "help": "If :obj:`True`, will use the token generated when running"
253
+ ":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
254
+ },
255
+ )
256
+ unk_token: str = field(
257
+ default="[UNK]", metadata={"help": "The unk token for the tokenizer"},
258
+ )
259
+ pad_token: str = field(
260
+ default="[PAD]", metadata={"help": "The padding token for the tokenizer"},
261
+ )
262
+ word_delimiter_token: str = field(
263
+ default="|", metadata={"help": "The word delimiter token for the tokenizer"},
264
+ )
265
+ phoneme_language: Optional[str] = field(
266
+ default=None,
267
+ metadata={
268
+ "help": "The target language that should be used be"
269
+ " passed to the tokenizer for tokenization. Note that"
270
+ " this is only relevant if the model classifies the"
271
+ " input audio to a sequence of phoneme sequences."
272
+ },
273
+ )
274
+
275
+
276
+ @dataclass
277
+ class DataCollatorCTCWithPadding:
278
+ """
279
+ Data collator that will dynamically pad the inputs received.
280
+ Args:
281
+ processor (:class:`~transformers.AutoProcessor`)
282
+ The processor used for proccessing the data.
283
+ padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
284
+ Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
285
+ among:
286
+ * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
287
+ sequence if provided).
288
+ * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
289
+ maximum acceptable input length for the model if that argument is not provided.
290
+ * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
291
+ different lengths).
292
+ max_length (:obj:`int`, `optional`):
293
+ Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
294
+ max_length_labels (:obj:`int`, `optional`):
295
+ Maximum length of the ``labels`` returned list and optionally padding length (see above).
296
+ pad_to_multiple_of (:obj:`int`, `optional`):
297
+ If set will pad the sequence to a multiple of the provided value.
298
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
299
+ 7.5 (Volta).
300
+ """
301
+
302
+ processor: AutoProcessor
303
+ padding: Union[bool, str] = "longest"
304
+ pad_to_multiple_of: Optional[int] = None
305
+ pad_to_multiple_of_labels: Optional[int] = None
306
+
307
+ def __call__(
308
+ self, features: List[Dict[str, Union[List[int], torch.Tensor]]]
309
+ ) -> Dict[str, torch.Tensor]:
310
+ # split inputs and labels since they have to be of different lenghts and need
311
+ # different padding methods
312
+ input_features = [
313
+ {"input_values": feature["input_values"]} for feature in features
314
+ ]
315
+ label_features = [{"input_ids": feature["labels"]} for feature in features]
316
+
317
+ batch = self.processor.pad(
318
+ input_features,
319
+ padding=self.padding,
320
+ pad_to_multiple_of=self.pad_to_multiple_of,
321
+ return_tensors="pt",
322
+ )
323
+
324
+ with self.processor.as_target_processor():
325
+ labels_batch = self.processor.pad(
326
+ label_features,
327
+ padding=self.padding,
328
+ pad_to_multiple_of=self.pad_to_multiple_of_labels,
329
+ return_tensors="pt",
330
+ )
331
+
332
+ # replace padding with -100 to ignore loss correctly
333
+ labels = labels_batch["input_ids"].masked_fill(
334
+ labels_batch.attention_mask.ne(1), -100
335
+ )
336
+
337
+ batch["labels"] = labels
338
+
339
+ return batch
340
+
341
+
342
+ def create_vocabulary_from_data(
343
+ datasets: DatasetDict,
344
+ word_delimiter_token: Optional[str] = None,
345
+ unk_token: Optional[str] = None,
346
+ pad_token: Optional[str] = None,
347
+ ):
348
+ # Given training and test labels create vocabulary
349
+ def extract_all_chars(batch):
350
+ all_text = " ".join(batch["target_text"])
351
+ vocab = list(set(all_text))
352
+ return {"vocab": [vocab], "all_text": [all_text]}
353
+
354
+ vocabs = datasets.map(
355
+ extract_all_chars,
356
+ batched=True,
357
+ batch_size=-1,
358
+ keep_in_memory=True,
359
+ remove_columns=datasets["train"].column_names,
360
+ )
361
+
362
+ # take union of all unique characters in each dataset
363
+ vocab_set = functools.reduce(
364
+ lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]),
365
+ vocabs.values(),
366
+ )
367
+
368
+ vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
369
+
370
+ # replace white space with delimiter token
371
+ if word_delimiter_token is not None:
372
+ vocab_dict[word_delimiter_token] = vocab_dict[" "]
373
+ del vocab_dict[" "]
374
+
375
+ # add unk and pad token
376
+ if unk_token is not None:
377
+ vocab_dict[unk_token] = len(vocab_dict)
378
+
379
+ if pad_token is not None:
380
+ vocab_dict[pad_token] = len(vocab_dict)
381
+
382
+ return vocab_dict
383
+
384
+
385
+ def main():
386
+ # See all possible arguments in src/transformers/training_args.py
387
+ # or by passing the --help flag to this script.
388
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
389
+
390
+ parser = HfArgumentParser(
391
+ (ModelArguments, DataTrainingArguments, TrainingArguments)
392
+ )
393
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
394
+ # If we pass only one argument to the script and it's the path to a json file,
395
+ # let's parse it to get our arguments.
396
+ model_args, data_args, training_args = parser.parse_json_file(
397
+ json_file=os.path.abspath(sys.argv[1])
398
+ )
399
+ else:
400
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
401
+
402
+ # Detecting last checkpoint.
403
+ last_checkpoint = None
404
+ if (
405
+ os.path.isdir(training_args.output_dir)
406
+ and training_args.do_train
407
+ and not training_args.overwrite_output_dir
408
+ ):
409
+ last_checkpoint = get_last_checkpoint(training_args.output_dir)
410
+ if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
411
+ raise ValueError(
412
+ f"Output directory ({training_args.output_dir}) already exists and is not empty. "
413
+ "Use --overwrite_output_dir to overcome."
414
+ )
415
+ elif last_checkpoint is not None:
416
+ logger.info(
417
+ f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
418
+ "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
419
+ )
420
+
421
+ # Setup logging
422
+ logging.basicConfig(
423
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
424
+ datefmt="%m/%d/%Y %H:%M:%S",
425
+ handlers=[logging.StreamHandler(sys.stdout)],
426
+ )
427
+ logger.setLevel(
428
+ logging.INFO if is_main_process(training_args.local_rank) else logging.WARN
429
+ )
430
+
431
+ # Log on each process the small summary:
432
+ logger.warning(
433
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
434
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
435
+ )
436
+ # Set the verbosity to info of the Transformers logger (on main process only):
437
+ if is_main_process(training_args.local_rank):
438
+ transformers.utils.logging.set_verbosity_info()
439
+ logger.info("Training/evaluation parameters %s", training_args)
440
+
441
+ # Set seed before initializing model.
442
+ set_seed(training_args.seed)
443
+
444
+ # 1. First, let's load the dataset
445
+ raw_datasets = DatasetDict()
446
+
447
+ if training_args.do_train:
448
+ raw_datasets["train"] = load_dataset(
449
+ data_args.dataset_name,
450
+ data_args.dataset_config_name,
451
+ split=data_args.train_split_name,
452
+ use_auth_token=data_args.use_auth_token,
453
+ )
454
+
455
+ if data_args.audio_column_name not in raw_datasets["train"].column_names:
456
+ raise ValueError(
457
+ f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
458
+ "Make sure to set `--audio_column_name` to the correct audio column - one of "
459
+ f"{', '.join(raw_datasets['train'].column_names)}."
460
+ )
461
+
462
+ if data_args.text_column_name not in raw_datasets["train"].column_names:
463
+ raise ValueError(
464
+ f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
465
+ "Make sure to set `--text_column_name` to the correct text column - one of "
466
+ f"{', '.join(raw_datasets['train'].column_names)}."
467
+ )
468
+
469
+ if data_args.max_train_samples is not None:
470
+ raw_datasets["train"] = raw_datasets["train"].select(
471
+ range(data_args.max_train_samples)
472
+ )
473
+
474
+ if training_args.do_eval:
475
+ raw_datasets["eval"] = load_dataset(
476
+ data_args.dataset_name,
477
+ data_args.dataset_config_name,
478
+ split=data_args.eval_split_name,
479
+ use_auth_token=data_args.use_auth_token,
480
+ )
481
+
482
+ if data_args.max_eval_samples is not None:
483
+ raw_datasets["eval"] = raw_datasets["eval"].select(
484
+ range(data_args.max_eval_samples)
485
+ )
486
+
487
+ # 2. We remove some special characters from the datasets
488
+ # that make training complicated and do not help in transcribing the speech
489
+ # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
490
+ # that could be easily picked up by the model
491
+ chars_to_ignore_regex = (
492
+ f'[{"".join(data_args.chars_to_ignore)}]'
493
+ if data_args.chars_to_ignore is not None
494
+ else None
495
+ )
496
+ text_column_name = data_args.text_column_name
497
+
498
+ def remove_special_characters(batch):
499
+ if chars_to_ignore_regex is not None:
500
+ batch["target_text"] = (
501
+ re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
502
+ )
503
+ else:
504
+ batch["target_text"] = batch[text_column_name].lower() + " "
505
+ return batch
506
+
507
+ with training_args.main_process_first(
508
+ desc="dataset map special characters removal"
509
+ ):
510
+ raw_datasets = raw_datasets.map(
511
+ remove_special_characters,
512
+ remove_columns=[text_column_name],
513
+ desc="remove special characters from datasets",
514
+ )
515
+
516
+ # save special tokens for tokenizer
517
+ word_delimiter_token = data_args.word_delimiter_token
518
+ unk_token = data_args.unk_token
519
+ pad_token = data_args.pad_token
520
+
521
+ # 3. Next, let's load the config as we might need it to create
522
+ # the tokenizer
523
+ # load config
524
+ config = AutoConfig.from_pretrained(
525
+ model_args.model_name_or_path,
526
+ cache_dir=model_args.cache_dir,
527
+ use_auth_token=data_args.use_auth_token,
528
+ )
529
+
530
+ # 4. Next, if no tokenizer file is defined,
531
+ # we create the vocabulary of the model by extracting all unique characters from
532
+ # the training and evaluation datasets
533
+ # We need to make sure that only first rank saves vocabulary
534
+ # make sure all processes wait until vocab is created
535
+ tokenizer_name_or_path = model_args.tokenizer_name_or_path
536
+ tokenizer_kwargs = {}
537
+ if tokenizer_name_or_path is None:
538
+ # save vocab in training output dir
539
+ tokenizer_name_or_path = training_args.output_dir
540
+
541
+ vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json")
542
+
543
+ with training_args.main_process_first():
544
+ if training_args.overwrite_output_dir and os.path.isfile(vocab_file):
545
+ os.remove(vocab_file)
546
+
547
+ with training_args.main_process_first(desc="dataset map vocabulary creation"):
548
+ if not os.path.isfile(vocab_file):
549
+ os.makedirs(tokenizer_name_or_path, exist_ok=True)
550
+ vocab_dict = create_vocabulary_from_data(
551
+ raw_datasets,
552
+ word_delimiter_token=word_delimiter_token,
553
+ unk_token=unk_token,
554
+ pad_token=pad_token,
555
+ )
556
+
557
+ # save vocab dict to be loaded into tokenizer
558
+ with open(vocab_file, "w") as file:
559
+ json.dump(vocab_dict, file)
560
+
561
+ # if tokenizer has just been created
562
+ # it is defined by `tokenizer_class` if present in config else by `model_type`
563
+ tokenizer_kwargs = {
564
+ "config": config if config.tokenizer_class is not None else None,
565
+ "tokenizer_type": config.model_type
566
+ if config.tokenizer_class is None
567
+ else None,
568
+ "unk_token": unk_token,
569
+ "pad_token": pad_token,
570
+ "word_delimiter_token": word_delimiter_token,
571
+ }
572
+
573
+ # 5. Now we can instantiate the feature extractor, tokenizer and model
574
+ # Note for distributed training, the .from_pretrained methods guarantee that only
575
+ # one local process can concurrently download model & vocab.
576
+
577
+ # load feature_extractor and tokenizer
578
+ tokenizer = AutoTokenizer.from_pretrained(
579
+ tokenizer_name_or_path,
580
+ use_auth_token=data_args.use_auth_token,
581
+ **tokenizer_kwargs,
582
+ )
583
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
584
+ model_args.model_name_or_path,
585
+ cache_dir=model_args.cache_dir,
586
+ use_auth_token=data_args.use_auth_token,
587
+ )
588
+
589
+ # adapt config
590
+ config.update(
591
+ {
592
+ "feat_proj_dropout": model_args.feat_proj_dropout,
593
+ "attention_dropout": model_args.attention_dropout,
594
+ "hidden_dropout": model_args.hidden_dropout,
595
+ "final_dropout": model_args.final_dropout,
596
+ "mask_time_prob": model_args.mask_time_prob,
597
+ "mask_time_length": model_args.mask_time_length,
598
+ "mask_feature_prob": model_args.mask_feature_prob,
599
+ "mask_feature_length": model_args.mask_feature_length,
600
+ "gradient_checkpointing": training_args.gradient_checkpointing,
601
+ "layerdrop": model_args.layerdrop,
602
+ "ctc_loss_reduction": model_args.ctc_loss_reduction,
603
+ "pad_token_id": tokenizer.pad_token_id,
604
+ "vocab_size": len(tokenizer),
605
+ "activation_dropout": model_args.activation_dropout,
606
+ }
607
+ )
608
+
609
+ # create model
610
+ model = AutoModelForCTC.from_pretrained(
611
+ model_args.model_name_or_path,
612
+ cache_dir=model_args.cache_dir,
613
+ config=config,
614
+ use_auth_token=data_args.use_auth_token,
615
+ )
616
+
617
+ # freeze encoder
618
+ if model_args.freeze_feature_encoder:
619
+ model.freeze_feature_encoder()
620
+
621
+ # 6. Now we preprocess the datasets including loading the audio, resampling and normalization
622
+ # Thankfully, `datasets` takes care of automatically loading and resampling the audio,
623
+ # so that we just need to set the correct target sampling rate and normalize the input
624
+ # via the `feature_extractor`
625
+
626
+ # make sure that dataset decodes audio with correct sampling rate
627
+ dataset_sampling_rate = (
628
+ next(iter(raw_datasets.values()))
629
+ .features[data_args.audio_column_name]
630
+ .sampling_rate
631
+ )
632
+ if dataset_sampling_rate != feature_extractor.sampling_rate:
633
+ raw_datasets = raw_datasets.cast_column(
634
+ data_args.audio_column_name,
635
+ datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate),
636
+ )
637
+
638
+ # derive max & min input length for sample rate & max duration
639
+ max_input_length = (
640
+ data_args.max_duration_in_seconds * feature_extractor.sampling_rate
641
+ )
642
+ min_input_length = (
643
+ data_args.min_duration_in_seconds * feature_extractor.sampling_rate
644
+ )
645
+ audio_column_name = data_args.audio_column_name
646
+ num_workers = data_args.preprocessing_num_workers
647
+
648
+ # `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification
649
+ phoneme_language = data_args.phoneme_language
650
+
651
+ # Preprocessing the datasets.
652
+ # We need to read the audio files as arrays and tokenize the targets.
653
+ def prepare_dataset(batch):
654
+ # load audio
655
+ sample = batch[audio_column_name]
656
+
657
+ inputs = feature_extractor(
658
+ sample["array"], sampling_rate=sample["sampling_rate"]
659
+ )
660
+ batch["input_values"] = inputs.input_values[0]
661
+ batch["input_length"] = len(batch["input_values"])
662
+
663
+ # encode targets
664
+ additional_kwargs = {}
665
+ if phoneme_language is not None:
666
+ additional_kwargs["phonemizer_lang"] = phoneme_language
667
+
668
+ batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids
669
+ return batch
670
+
671
+ with training_args.main_process_first(desc="dataset map preprocessing"):
672
+ vectorized_datasets = raw_datasets.map(
673
+ prepare_dataset,
674
+ remove_columns=next(iter(raw_datasets.values())).column_names,
675
+ num_proc=num_workers,
676
+ desc="preprocess datasets",
677
+ )
678
+
679
+ def is_audio_in_length_range(length):
680
+ return length > min_input_length and length < max_input_length
681
+
682
+ # filter data that is shorter than min_input_length
683
+ vectorized_datasets = vectorized_datasets.filter(
684
+ is_audio_in_length_range,
685
+ num_proc=num_workers,
686
+ input_columns=["input_length"],
687
+ )
688
+
689
+ # 7. Next, we can prepare the training.
690
+ # Let's use word error rate (WER) as our evaluation metric,
691
+ # instantiate a data collator and the trainer
692
+
693
+ # Define evaluation metrics during training, *i.e.* word error rate, character error rate
694
+ eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
695
+
696
+ # for large datasets it is advised to run the preprocessing on a
697
+ # single machine first with ``args.preprocessing_only`` since there will mostly likely
698
+ # be a timeout when running the script in distributed mode.
699
+ # In a second step ``args.preprocessing_only`` can then be set to `False` to load the
700
+ # cached dataset
701
+ if data_args.preprocessing_only:
702
+ logger.info(
703
+ f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}"
704
+ )
705
+ return
706
+
707
+ def compute_metrics(pred):
708
+ pred_logits = pred.predictions
709
+ pred_ids = np.argmax(pred_logits, axis=-1)
710
+
711
+ pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
712
+
713
+ pred_str = tokenizer.batch_decode(pred_ids)
714
+ # we do not want to group tokens when computing the metrics
715
+ label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
716
+
717
+ metrics = {
718
+ k: v.compute(predictions=pred_str, references=label_str)
719
+ for k, v in eval_metrics.items()
720
+ }
721
+
722
+ return metrics
723
+
724
+ # Now save everything to be able to create a single processor later
725
+ if is_main_process(training_args.local_rank):
726
+ # save feature extractor, tokenizer and config
727
+ feature_extractor.save_pretrained(training_args.output_dir)
728
+ tokenizer.save_pretrained(training_args.output_dir)
729
+ config.save_pretrained(training_args.output_dir)
730
+
731
+ try:
732
+ processor = AutoProcessor.from_pretrained(training_args.output_dir)
733
+ except (OSError, KeyError):
734
+ warnings.warn(
735
+ "Loading a processor from a feature extractor config that does not"
736
+ " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following "
737
+ " attribute to your `preprocessor_config.json` file to suppress this warning: "
738
+ " `'processor_class': 'Wav2Vec2Processor'`",
739
+ FutureWarning,
740
+ )
741
+ processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir)
742
+
743
+ # Instantiate custom data collator
744
+ data_collator = DataCollatorCTCWithPadding(processor=processor)
745
+
746
+ # Initialize Trainer
747
+ trainer = Trainer(
748
+ model=model,
749
+ data_collator=data_collator,
750
+ args=training_args,
751
+ compute_metrics=compute_metrics,
752
+ train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
753
+ eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
754
+ tokenizer=feature_extractor,
755
+ )
756
+
757
+ # 8. Finally, we can start training
758
+
759
+ # Training
760
+ if training_args.do_train:
761
+
762
+ # use last checkpoint if exist
763
+ if last_checkpoint is not None:
764
+ checkpoint = last_checkpoint
765
+ elif os.path.isdir(model_args.model_name_or_path):
766
+ checkpoint = model_args.model_name_or_path
767
+ else:
768
+ checkpoint = None
769
+
770
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
771
+ trainer.save_model()
772
+
773
+ metrics = train_result.metrics
774
+ max_train_samples = (
775
+ data_args.max_train_samples
776
+ if data_args.max_train_samples is not None
777
+ else len(vectorized_datasets["train"])
778
+ )
779
+ metrics["train_samples"] = min(
780
+ max_train_samples, len(vectorized_datasets["train"])
781
+ )
782
+
783
+ trainer.log_metrics("train", metrics)
784
+ trainer.save_metrics("train", metrics)
785
+ trainer.save_state()
786
+
787
+ # Evaluation
788
+ results = {}
789
+ if training_args.do_eval:
790
+ logger.info("*** Evaluate ***")
791
+ metrics = trainer.evaluate()
792
+ max_eval_samples = (
793
+ data_args.max_eval_samples
794
+ if data_args.max_eval_samples is not None
795
+ else len(vectorized_datasets["eval"])
796
+ )
797
+ metrics["eval_samples"] = min(
798
+ max_eval_samples, len(vectorized_datasets["eval"])
799
+ )
800
+
801
+ trainer.log_metrics("eval", metrics)
802
+ trainer.save_metrics("eval", metrics)
803
+
804
+ # Write model card and (optionally) push to hub
805
+ config_name = (
806
+ data_args.dataset_config_name
807
+ if data_args.dataset_config_name is not None
808
+ else "na"
809
+ )
810
+ kwargs = {
811
+ "finetuned_from": model_args.model_name_or_path,
812
+ "tasks": "speech-recognition",
813
+ "tags": ["automatic-speech-recognition", data_args.dataset_name],
814
+ "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}",
815
+ "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
816
+ }
817
+ if "common_voice" in data_args.dataset_name:
818
+ kwargs["language"] = config_name
819
+
820
+ if training_args.push_to_hub:
821
+ trainer.push_to_hub(**kwargs)
822
+ else:
823
+ trainer.create_model_card(**kwargs)
824
+
825
+ return results
826
+
827
+
828
+ if __name__ == "__main__":
829
+ main()
runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/1644165171.7227242/events.out.tfevents.1644165171.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c98151adf5ee89a0d86283e74254aa6f1e4356e6d7ca722e0529d9870e9c55e6
3
+ size 4564
runs/Feb06_16-31-57_job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c/events.out.tfevents.1644165171.job-cb7cc850-8327-4ab0-bdf4-0ebe63e2788c ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d4a1bc2c2de2a345205696c1d43038bf6abda4645555b39a2341e729306471
3
+ size 5468
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472e2ebcb99d59b6b693f009ff1df20cb7c55629d4fab148f61d3dc117b7c960
3
+ size 2991
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"a": 1, "b": 2, "c": 3, "d": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "l": 10, "m": 11, "n": 12, "p": 13, "q": 14, "r": 15, "s": 16, "t": 17, "u": 18, "v": 19, "w": 20, "x": 21, "y": 22, "~": 23, "⋯": 24, "⠀": 25, "⻣": 26, "ㄧ": 27, "㗎": 28, "㩒": 29, "㩿": 30, "㪐": 31, "䒏": 32, "䒐": 33, "䰧": 34, "一": 35, "丁": 36, "七": 37, "丈": 38, "三": 39, "上": 40, "下": 41, "不": 42, "丑": 43, "且": 44, "丕": 45, "世": 46, "丘": 47, "丙": 48, "丟": 49, "両": 50, "並": 51, "丫": 52, "中": 53, "丰": 54, "串": 55, "丶": 56, "丸": 57, "丹": 58, "主": 59, "丼": 60, "乃": 61, "久": 62, "义": 63, "之": 64, "乍": 65, "乎": 66, "乏": 67, "乒": 68, "乓": 69, "乖": 70, "乘": 71, "乙": 72, "乜": 73, "九": 74, "乞": 75, "也": 76, "乳": 77, "乸": 78, "乾": 79, "亂": 80, "了": 81, "予": 82, "事": 83, "二": 84, "于": 85, "互": 86, "五": 87, "井": 88, "些": 89, "亞": 90, "亡": 91, "亢": 92, "交": 93, "亦": 94, "亨": 95, "享": 96, "京": 97, "亭": 98, "亮": 99, "人": 100, "什": 101, "仁": 102, "仆": 103, "仇": 104, "今": 105, "介": 106, "仍": 107, "仔": 108, "仕": 109, "他": 110, "仗": 111, "付": 112, "仙": 113, "仞": 114, "代": 115, "令": 116, "以": 117, "仰": 118, "仲": 119, "件": 120, "任": 121, "份": 122, "仿": 123, "企": 124, "伊": 125, "伏": 126, "伐": 127, "休": 128, "伙": 129, "伯": 130, "估": 131, "伴": 132, "伶": 133, "伸": 134, "似": 135, "伽": 136, "佃": 137, "但": 138, "佈": 139, "位": 140, "低": 141, "住": 142, "佐": 143, "佑": 144, "佔": 145, "何": 146, "佗": 147, "余": 148, "佚": 149, "佛": 150, "作": 151, "你": 152, "佢": 153, "佣": 154, "佩": 155, "佬": 156, "佳": 157, "併": 158, "佻": 159, "使": 160, "侄": 161, "來": 162, "例": 163, "侍": 164, "供": 165, "依": 166, "侮": 167, "侯": 168, "侵": 169, "侶": 170, "便": 171, "係": 172, "促": 173, "俄": 174, "俊": 175, "俎": 176, "俏": 177, "俐": 178, "俗": 179, "俚": 180, "保": 181, "俠": 182, "信": 183, "俬": 184, "修": 185, "俸": 186, "俾": 187, "倉": 188, "個": 189, "倍": 190, "們": 191, "倒": 192, "候": 193, "倚": 194, "借": 195, "倦": 196, "倫": 197, "值": 198, "假": 199, "偈": 200, "偉": 201, "偏": 202, "偕": 203, "做": 204, "停": 205, "健": 206, "側": 207, "偶": 208, "偷": 209, "偽": 210, "傅": 211, "傍": 212, "傑": 213, "傘": 214, "備": 215, "傢": 216, "催": 217, "傭": 218, "傲": 219, "傳": 220, "債": 221, "傷": 222, "傻": 223, "傾": 224, "僅": 225, "像": 226, "僑": 227, "僕": 228, "僭": 229, "僱": 230, "價": 231, "僻": 232, "儀": 233, "億": 234, "儈": 235, "儍": 236, "儒": 237, "儘": 238, "優": 239, "儲": 240, "允": 241, "元": 242, "兄": 243, "充": 244, "兆": 245, "兇": 246, "先": 247, "光": 248, "克": 249, "兌": 250, "免": 251, "兒": 252, "兔": 253, "兜": 254, "入": 255, "內": 256, "全": 257, "兩": 258, "八": 259, "公": 260, "六": 261, "兮": 262, "共": 263, "兵": 264, "其": 265, "具": 266, "典": 267, "兼": 268, "内": 269, "冇": 270, "冊": 271, "再": 272, "冒": 273, "冕": 274, "冗": 275, "冚": 276, "冠": 277, "冤": 278, "冧": 279, "冬": 280, "冰": 281, "冷": 282, "准": 283, "凈": 284, "凌": 285, "凍": 286, "凝": 287, "凡": 288, "凰": 289, "凱": 290, "凳": 291, "凶": 292, "凹": 293, "出": 294, "函": 295, "刀": 296, "刁": 297, "刃": 298, "分": 299, "切": 300, "刑": 301, "划": 302, "列": 303, "初": 304, "判": 305, "別": 306, "刨": 307, "利": 308, "刮": 309, "到": 310, "制": 311, "刷": 312, "券": 313, "刺": 314, "刻": 315, "則": 316, "前": 317, "剎": 318, "剔": 319, "剛": 320, "剝": 321, "剩": 322, "剪": 323, "副": 324, "割": 325, "創": 326, "剷": 327, "劃": 328, "劇": 329, "劉": 330, "劊": 331, "劍": 332, "劑": 333, "劖": 334, "力": 335, "功": 336, "加": 337, "劣": 338, "助": 339, "努": 340, "勁": 341, "勃": 342, "勇": 343, "勉": 344, "勒": 345, "動": 346, "勘": 347, "務": 348, "勝": 349, "勞": 350, "勢": 351, "勤": 352, "勳": 353, "勵": 354, "勸": 355, "勻": 356, "勾": 357, "勿": 358, "包": 359, "匈": 360, "化": 361, "北": 362, "匙": 363, "匡": 364, "匯": 365, "匹": 366, "匿": 367, "區": 368, "十": 369, "千": 370, "升": 371, "午": 372, "半": 373, "卑": 374, "卒": 375, "卓": 376, "協": 377, "南": 378, "博": 379, "卜": 380, "卡": 381, "卦": 382, "卧": 383, "印": 384, "危": 385, "即": 386, "卵": 387, "卷": 388, "卸": 389, "卻": 390, "卿": 391, "厄": 392, "厘": 393, "厚": 394, "原": 395, "厥": 396, "厭": 397, "厲": 398, "厴": 399, "去": 400, "參": 401, "又": 402, "叉": 403, "及": 404, "友": 405, "反": 406, "叔": 407, "取": 408, "受": 409, "叛": 410, "叢": 411, "口": 412, "古": 413, "句": 414, "另": 415, "叨": 416, "只": 417, "叫": 418, "召": 419, "叭": 420, "叮": 421, "可": 422, "台": 423, "史": 424, "右": 425, "司": 426, "叻": 427, "吃": 428, "各": 429, "合": 430, "吉": 431, "吊": 432, "吋": 433, "同": 434, "名": 435, "后": 436, "吐": 437, "向": 438, "���": 439, "吖": 440, "君": 441, "吝": 442, "吞": 443, "吟": 444, "吠": 445, "否": 446, "吧": 447, "吩": 448, "含": 449, "吱": 450, "吳": 451, "吵": 452, "吶": 453, "吸": 454, "吹": 455, "吻": 456, "吼": 457, "吽": 458, "吾": 459, "呀": 460, "呂": 461, "呃": 462, "呆": 463, "呈": 464, "告": 465, "呎": 466, "呔": 467, "呢": 468, "周": 469, "呱": 470, "味": 471, "呷": 472, "呻": 473, "呼": 474, "命": 475, "咀": 476, "咁": 477, "咄": 478, "咇": 479, "咋": 480, "和": 481, "咐": 482, "咕": 483, "咖": 484, "咗": 485, "咦": 486, "咧": 487, "咩": 488, "咪": 489, "咬": 490, "咯": 491, "咳": 492, "咸": 493, "咽": 494, "咿": 495, "哀": 496, "品": 497, "哂": 498, "哄": 499, "哇": 500, "哈": 501, "哉": 502, "哋": 503, "响": 504, "哎": 505, "員": 506, "哣": 507, "哥": 508, "哦": 509, "哨": 510, "哩": 511, "哪": 512, "哭": 513, "哲": 514, "哺": 515, "哼": 516, "唇": 517, "唈": 518, "唉": 519, "唎": 520, "唏": 521, "唐": 522, "唔": 523, "唞": 524, "唥": 525, "唧": 526, "唪": 527, "售": 528, "唯": 529, "唱": 530, "唸": 531, "啄": 532, "啅": 533, "商": 534, "啊": 535, "啋": 536, "問": 537, "啕": 538, "啖": 539, "啜": 540, "啞": 541, "啟": 542, "啡": 543, "啤": 544, "啦": 545, "啩": 546, "啪": 547, "啫": 548, "啱": 549, "啲": 550, "啵": 551, "喀": 552, "喂": 553, "喃": 554, "善": 555, "喇": 556, "喉": 557, "喊": 558, "喎": 559, "喐": 560, "喔": 561, "喙": 562, "喚": 563, "喜": 564, "喝": 565, "喪": 566, "喫": 567, "喬": 568, "單": 569, "喱": 570, "喳": 571, "喺": 572, "喻": 573, "喼": 574, "嗅": 575, "嗇": 576, "嗌": 577, "嗎": 578, "嗒": 579, "嗚": 580, "嗜": 581, "嗡": 582, "嗤": 583, "嗦": 584, "嗰": 585, "嗱": 586, "嗲": 587, "嗶": 588, "嗷": 589, "嗽": 590, "嘅": 591, "嘆": 592, "嘈": 593, "嘉": 594, "嘔": 595, "嘗": 596, "嘛": 597, "嘜": 598, "嘞": 599, "嘟": 600, "嘢": 601, "嘥": 602, "嘩": 603, "嘲": 604, "嘴": 605, "嘸": 606, "噁": 607, "噃": 608, "噄": 609, "噉": 610, "噌": 611, "噎": 612, "噏": 613, "噓": 614, "噚": 615, "噤": 616, "器": 617, "噪": 618, "噬": 619, "噴": 620, "噶": 621, "噹": 622, "嚇": 623, "嚎": 624, "嚐": 625, "嚕": 626, "嚟": 627, "嚡": 628, "嚢": 629, "嚥": 630, "嚨": 631, "嚴": 632, "嚷": 633, "嚼": 634, "嚿": 635, "囉": 636, "囊": 637, "囌": 638, "囍": 639, "囑": 640, "囚": 641, "四": 642, "囝": 643, "回": 644, "因": 645, "囡": 646, "囪": 647, "困": 648, "固": 649, "圃": 650, "圈": 651, "國": 652, "圍": 653, "圑": 654, "園": 655, "圓": 656, "圖": 657, "團": 658, "土": 659, "在": 660, "圭": 661, "地": 662, "圳": 663, "圾": 664, "址": 665, "均": 666, "坊": 667, "坎": 668, "坐": 669, "坑": 670, "坡": 671, "坤": 672, "坦": 673, "坪": 674, "坭": 675, "坳": 676, "垂": 677, "垃": 678, "型": 679, "垢": 680, "埃": 681, "埋": 682, "城": 683, "埔": 684, "埗": 685, "埞": 686, "域": 687, "埠": 688, "埲": 689, "執": 690, "培": 691, "基": 692, "堂": 693, "堅": 694, "堆": 695, "堡": 696, "堤": 697, "堪": 698, "報": 699, "場": 700, "堵": 701, "塊": 702, "塑": 703, "塔": 704, "塗": 705, "塘": 706, "塞": 707, "塢": 708, "填": 709, "塱": 710, "塵": 711, "塾": 712, "境": 713, "墅": 714, "墊": 715, "墓": 716, "墜": 717, "增": 718, "墟": 719, "墨": 720, "墩": 721, "墮": 722, "墳": 723, "壁": 724, "壆": 725, "壇": 726, "壓": 727, "壘": 728, "壞": 729, "壟": 730, "壩": 731, "士": 732, "壯": 733, "壹": 734, "壺": 735, "壽": 736, "夏": 737, "夕": 738, "外": 739, "多": 740, "夜": 741, "夠": 742, "夢": 743, "夥": 744, "大": 745, "天": 746, "太": 747, "夫": 748, "央": 749, "失": 750, "夷": 751, "夾": 752, "奀": 753, "奄": 754, "奇": 755, "奈": 756, "奉": 757, "奏": 758, "契": 759, "奔": 760, "奕": 761, "套": 762, "奚": 763, "奧": 764, "奪": 765, "奮": 766, "女": 767, "奴": 768, "奶": 769, "奸": 770, "她": 771, "好": 772, "如": 773, "妄": 774, "妒": 775, "妓": 776, "妙": 777, "妝": 778, "妥": 779, "妨": 780, "妳": 781, "妹": 782, "妻": 783, "姆": 784, "姊": 785, "始": 786, "姐": 787, "姑": 788, "姓": 789, "委": 790, "姣": 791, "姦": 792, "姨": 793, "姬": 794, "姻": 795, "姿": 796, "威": 797, "娃": 798, "娘": 799, "娛": 800, "娜": 801, "娥": 802, "娶": 803, "婆": 804, "婚": 805, "婦": 806, "媒": 807, "媽": 808, "媾": 809, "嫁": 810, "嫂": 811, "嫉": 812, "嫌": 813, "嫩": 814, "嫪": 815, "嫲": 816, "嫻": 817, "嬉": 818, "嬲": 819, "嬸": 820, "子": 821, "孔": 822, "孖": 823, "字": 824, "存": 825, "孚": 826, "孝": 827, "孟": 828, "季": 829, "孤": 830, "孥": 831, "孩": 832, "孫": 833, "孭": 834, "孰": 835, "孱": 836, "學": 837, "孽": 838, "它": 839, "宅": 840, "宇": 841, "守": 842, "安": 843, "宋": 844, "完": 845, "宏": 846, "宗": 847, "官": 848, "宙": 849, "定": 850, "宛": 851, "宜": 852, "客": 853, "宣": 854, "室": 855, "宮": 856, "宰": 857, "害": 858, "宴": 859, "宵": 860, "家": 861, "宸": 862, "容": 863, "宿": 864, "寂": 865, "寃": 866, "寄": 867, "寅": 868, "密": 869, "寇": 870, "富": 871, "寒": 872, "寓": 873, "寞": 874, "察": 875, "寡": 876, "寢": 877, "實": 878, "寧": 879, "寨": 880, "審": 881, "寫": 882, "寬": 883, "寮": 884, "寶": 885, "寸": 886, "寺": 887, "封": 888, "射": 889, "將": 890, "專": 891, "尊": 892, "尋": 893, "對": 894, "導": 895, "小": 896, "少": 897, "尖": 898, "尚": 899, "尤": 900, "尬": 901, "就": 902, "尷": 903, "尺": 904, "尼": 905, "尾": 906, "尿": 907, "局": 908, "屁": 909, "居": 910, "屆": 911, "屈": 912, "屋": 913, "屌": 914, "屍": 915, "屎": 916, "屏": 917, "屑": 918, "展": 919, "屙": 920, "屠": 921, "層": 922, "履": 923, "屬": 924, "屯": 925, "山": 926, "屹": 927, "岀": 928, "岡": 929, "岩": 930, "岬": 931, "岳": 932, "岸": 933, "峒": 934, "峯": 935, "峰": 936, "島": 937, "峻": 938, "峽": 939, "崆": 940, "崇": 941, "崗": 942, "崙": 943, "崧": 944, "崩": 945, "嵌": 946, "嶄": 947, "嶙": 948, "嶺": 949, "嶼": 950, "巉": 951, "巒": 952, "川": 953, "州": 954, "巡": 955, "巢": 956, "工": 957, "左": 958, "巧": 959, "巨": 960, "巫": 961, "差": 962, "己": 963, "已": 964, "巴": 965, "巷": 966, "巾": 967, "市": 968, "布": 969, "帆": 970, "希": 971, "帖": 972, "帚": 973, "帝": 974, "帥": 975, "師": 976, "席": 977, "帳": 978, "帶": 979, "常": 980, "帽": 981, "幅": 982, "幕": 983, "幡": 984, "幢": 985, "幣": 986, "幫": 987, "干": 988, "平": 989, "年": 990, "幸": 991, "幹": 992, "幻": 993, "幼": 994, "幽": 995, "幾": 996, "庇": 997, "床": 998, "序": 999, "底": 1000, "店": 1001, "庚": 1002, "府": 1003, "度": 1004, "座": 1005, "庫": 1006, "庭": 1007, "庵": 1008, "庶": 1009, "康": 1010, "庸": 1011, "廁": 1012, "廂": 1013, "廈": 1014, "廉": 1015, "廊": 1016, "廖": 1017, "廚": 1018, "廟": 1019, "廠": 1020, "廢": 1021, "廣": 1022, "廬": 1023, "廳": 1024, "延": 1025, "廷": 1026, "建": 1027, "廿": 1028, "弄": 1029, "弊": 1030, "弍": 1031, "式": 1032, "弓": 1033, "引": 1034, "弟": 1035, "弱": 1036, "張": 1037, "強": 1038, "弸": 1039, "强": 1040, "弼": 1041, "彈": 1042, "彌": 1043, "彎": 1044, "彗": 1045, "彙": 1046, "形": 1047, "彤": 1048, "彥": 1049, "彩": 1050, "彪": 1051, "彭": 1052, "影": 1053, "彷": 1054, "役": 1055, "彼": 1056, "彿": 1057, "往": 1058, "征": 1059, "待": 1060, "徇": 1061, "很": 1062, "徊": 1063, "律": 1064, "後": 1065, "徐": 1066, "徑": 1067, "徒": 1068, "得": 1069, "徘": 1070, "從": 1071, "御": 1072, "復": 1073, "循": 1074, "微": 1075, "徵": 1076, "德": 1077, "徹": 1078, "徽": 1079, "心": 1080, "必": 1081, "忌": 1082, "忍": 1083, "志": 1084, "忘": 1085, "忙": 1086, "忠": 1087, "快": 1088, "念": 1089, "忽": 1090, "忿": 1091, "怎": 1092, "怒": 1093, "怕": 1094, "思": 1095, "怡": 1096, "急": 1097, "怦": 1098, "性": 1099, "怨": 1100, "怪": 1101, "怯": 1102, "恃": 1103, "恆": 1104, "恐": 1105, "恒": 1106, "恕": 1107, "恙": 1108, "恢": 1109, "恤": 1110, "恥": 1111, "恨": 1112, "恩": 1113, "恭": 1114, "息": 1115, "恰": 1116, "悅": 1117, "悉": 1118, "悒": 1119, "悔": 1120, "悖": 1121, "悗": 1122, "悟": 1123, "悠": 1124, "患": 1125, "您": 1126, "悲": 1127, "悶": 1128, "情": 1129, "惇": 1130, "惑": 1131, "惘": 1132, "惜": 1133, "惟": 1134, "惠": 1135, "惡": 1136, "惦": 1137, "惰": 1138, "惱": 1139, "想": 1140, "惶": 1141, "惹": 1142, "愁": 1143, "愈": 1144, "愉": 1145, "意": 1146, "愚": 1147, "愛": 1148, "感": 1149, "愧": 1150, "慈": 1151, "態": 1152, "慌": 1153, "慎": 1154, "慕": 1155, "慘": 1156, "慚": 1157, "慢": 1158, "慣": 1159, "慤": 1160, "慧": 1161, "慨": 1162, "慮": 1163, "慰": 1164, "慳": 1165, "慶": 1166, "慷": 1167, "慾": 1168, "憂": 1169, "憎": 1170, "憐": 1171, "憑": 1172, "憚": 1173, "憤": 1174, "憧": 1175, "憩": 1176, "憫": 1177, "憬": 1178, "憶": 1179, "憾": 1180, "懂": 1181, "懇": 1182, "應": 1183, "懊": 1184, "懞": 1185, "懣": 1186, "懵": 1187, "懶": 1188, "懷": 1189, "懺": 1190, "懼": 1191, "懿": 1192, "戀": 1193, "戇": 1194, "戊": 1195, "戎": 1196, "成": 1197, "我": 1198, "戒": 1199, "戕": 1200, "或": 1201, "戚": 1202, "戟": 1203, "戥": 1204, "截": 1205, "戰": 1206, "戲": 1207, "戴": 1208, "戶": 1209, "戽": 1210, "戾": 1211, "房": 1212, "所": 1213, "扂": 1214, "扇": 1215, "手": 1216, "才": 1217, "扎": 1218, "扑": 1219, "扒": 1220, "打": 1221, "托": 1222, "扣": 1223, "扭": 1224, "扮": 1225, "扯": 1226, "扶": 1227, "批": 1228, "扻": 1229, "扼": 1230, "找": 1231, "承": 1232, "技": 1233, "抄": 1234, "抆": 1235, "把": 1236, "抑": 1237, "抓": 1238, "投": 1239, "抖": 1240, "抗": 1241, "折": 1242, "抦": 1243, "抬": 1244, "抱": 1245, "抵": 1246, "抹": 1247, "押": 1248, "抽": 1249, "抿": 1250, "拂": 1251, "拃": 1252, "拆": 1253, "拉": 1254, "拋": 1255, "拌": 1256, "拍": 1257, "拎": 1258, "拐": 1259, "拒": 1260, "拓": 1261, "拔": 1262, "拖": 1263, "拗": 1264, "拘": 1265, "拙": 1266, "招": 1267, "拜": 1268, "括": 1269, "拮": 1270, "拯": 1271, "拱": 1272, "拳": 1273, "拼": 1274, "拾": 1275, "拿": 1276, "持": 1277, "指": 1278, "挈": 1279, "按": 1280, "挑": 1281, "挖": 1282, "挨": 1283, "挪": 1284, "挫": 1285, "振": 1286, "挺": 1287, "挽": 1288, "挾": 1289, "捉": 1290, "捋": 1291, "捌": 1292, "捐": 1293, "捕": 1294, "捨": 1295, "捩": 1296, "据": 1297, "捱": 1298, "捲": 1299, "捶": 1300, "捷": 1301, "捺": 1302, "捽": 1303, "掂": 1304, "掃": 1305, "掅": 1306, "授": 1307, "掉": 1308, "掌": 1309, "排": 1310, "掕": 1311, "掗": 1312, "掘": 1313, "掙": 1314, "掛": 1315, "掟": 1316, "掠": 1317, "採": 1318, "探": 1319, "掣": 1320, "接": 1321, "控": 1322, "推": 1323, "掩": 1324, "措": 1325, "揀": 1326, "揇": 1327, "揈": 1328, "揉": 1329, "提": 1330, "插": 1331, "揗": 1332, "揚": 1333, "換": 1334, "揞": 1335, "握": 1336, "揣": 1337, "揦": 1338, "揩": 1339, "揪": 1340, "揭": 1341, "揮": 1342, "揳": 1343, "援": 1344, "揸": 1345, "揼": 1346, "揾": 1347, "損": 1348, "搏": 1349, "搖": 1350, "搗": 1351, "搜": 1352, "搞": 1353, "搣": 1354, "搬": 1355, "搭": 1356, "搵": 1357, "搶": 1358, "搽": 1359, "摑": 1360, "摘": 1361, "摙": 1362, "摞": 1363, "摧": 1364, "摩": 1365, "摯": 1366, "摳": 1367, "摷": 1368, "摸": 1369, "摺": 1370, "撇": 1371, "撈": 1372, "撐": 1373, "撒": 1374, "撓": 1375, "撕": 1376, "撚": 1377, "撞": 1378, "撤": 1379, "撥": 1380, "撩": 1381, "撫": 1382, "播": 1383, "撮": 1384, "撲": 1385, "撳": 1386, "撻": 1387, "撼": 1388, "撿": 1389, "擁": 1390, "擂": 1391, "擅": 1392, "擇": 1393, "擊": 1394, "擋": 1395, "操": 1396, "擎": 1397, "擒": 1398, "擔": 1399, "擘": 1400, "據": 1401, "擤": 1402, "擦": 1403, "擬": 1404, "擰": 1405, "擲": 1406, "擴": 1407, "擸": 1408, "擺": 1409, "擾": 1410, "攀": 1411, "攋": 1412, "攏": 1413, "攔": 1414, "攘": 1415, "攝": 1416, "攞": 1417, "攣": 1418, "攤": 1419, "攪": 1420, "攬": 1421, "支": 1422, "攰": 1423, "收": 1424, "攸": 1425, "改": 1426, "攻": 1427, "放": 1428, "政": 1429, "故": 1430, "效": 1431, "敏": 1432, "救": 1433, "敗": 1434, "敘": 1435, "教": 1436, "敝": 1437, "敢": 1438, "散": 1439, "敦": 1440, "敬": 1441, "敲": 1442, "整": 1443, "敵": 1444, "敷": 1445, "數": 1446, "斂": 1447, "斃": 1448, "文": 1449, "斐": 1450, "斑": 1451, "斗": 1452, "料": 1453, "斜": 1454, "斟": 1455, "斤": 1456, "斧": 1457, "斬": 1458, "斯": 1459, "新": 1460, "斷": 1461, "方": 1462, "於": 1463, "施": 1464, "旁": 1465, "旅": 1466, "旋": 1467, "族": 1468, "旗": 1469, "既": 1470, "旣": 1471, "日": 1472, "旦": 1473, "旨": 1474, "早": 1475, "旬": 1476, "旭": 1477, "旳": 1478, "旺": 1479, "昂": 1480, "昃": 1481, "昆": 1482, "昇": 1483, "昌": 1484, "明": 1485, "昏": 1486, "昐": 1487, "易": 1488, "昔": 1489, "星": 1490, "映": 1491, "春": 1492, "昧": 1493, "昨": 1494, "昭": 1495, "是": 1496, "昺": 1497, "時": 1498, "晃": 1499, "晉": 1500, "晌": 1501, "晏": 1502, "晒": 1503, "晚": 1504, "晝": 1505, "晤": 1506, "晨": 1507, "普": 1508, "景": 1509, "晴": 1510, "晶": 1511, "智": 1512, "晾": 1513, "暇": 1514, "暈": 1515, "暉": 1516, "暑": 1517, "暖": 1518, "暗": 1519, "暢": 1520, "暨": 1521, "暫": 1522, "暮": 1523, "暴": 1524, "暸": 1525, "曆": 1526, "曉": 1527, "曖": 1528, "曜": 1529, "曬": 1530, "曱": 1531, "曲": 1532, "曳": 1533, "更": 1534, "書": 1535, "曹": 1536, "曼": 1537, "曾": 1538, "替": 1539, "最": 1540, "會": 1541, "月": 1542, "有": 1543, "朋": 1544, "服": 1545, "朕": 1546, "朗": 1547, "望": 1548, "朝": 1549, "期": 1550, "朦": 1551, "朧": 1552, "木": 1553, "未": 1554, "末": 1555, "本": 1556, "札": 1557, "朱": 1558, "朴": 1559, "朵": 1560, "朽": 1561, "杆": 1562, "杉": 1563, "李": 1564, "杏": 1565, "材": 1566, "村": 1567, "杖": 1568, "杜": 1569, "杞": 1570, "束": 1571, "来": 1572, "杭": 1573, "杯": 1574, "杰": 1575, "東": 1576, "杷": 1577, "松": 1578, "板": 1579, "枇": 1580, "枉": 1581, "枕": 1582, "林": 1583, "枚": 1584, "果": 1585, "枝": 1586, "枯": 1587, "枱": 1588, "架": 1589, "柄": 1590, "柏": 1591, "某": 1592, "柑": 1593, "柒": 1594, "染": 1595, "柔": 1596, "柚": 1597, "柞": 1598, "查": 1599, "柯": 1600, "柱": 1601, "柳": 1602, "柴": 1603, "柵": 1604, "柺": 1605, "柿": 1606, "栗": 1607, "校": 1608, "栢": 1609, "核": 1610, "根": 1611, "格": 1612, "栽": 1613, "桂": 1614, "桃": 1615, "桅": 1616, "案": 1617, "桌": 1618, "桐": 1619, "桑": 1620, "桔": 1621, "桶": 1622, "桿": 1623, "梁": 1624, "梅": 1625, "梓": 1626, "梗": 1627, "梘": 1628, "條": 1629, "梧": 1630, "梨": 1631, "梯": 1632, "械": 1633, "梳": 1634, "梵": 1635, "棄": 1636, "棉": 1637, "棋": 1638, "棍": 1639, "棒": 1640, "棕": 1641, "棖": 1642, "棗": 1643, "棘": 1644, "棚": 1645, "棟": 1646, "棠": 1647, "棧": 1648, "森": 1649, "棲": 1650, "棺": 1651, "椅": 1652, "植": 1653, "椏": 1654, "椒": 1655, "椰": 1656, "楂": 1657, "楊": 1658, "楋": 1659, "楓": 1660, "楚": 1661, "楣": 1662, "業": 1663, "極": 1664, "概": 1665, "榆": 1666, "榕": 1667, "榚": 1668, "榛": 1669, "榜": 1670, "榨": 1671, "榮": 1672, "榴": 1673, "構": 1674, "槍": 1675, "槐": 1676, "槤": 1677, "槽": 1678, "樂": 1679, "樊": 1680, "樑": 1681, "樓": 1682, "標": 1683, "樞": 1684, "樟": 1685, "模": 1686, "樣": 1687, "樸": 1688, "樹": 1689, "樺": 1690, "樽": 1691, "橋": 1692, "橘": 1693, "橙": 1694, "機": 1695, "橡": 1696, "橢": 1697, "橫": 1698, "檀": 1699, "檔": 1700, "檢": 1701, "檬": 1702, "檯": 1703, "檳": 1704, "檸": 1705, "檻": 1706, "櫃": 1707, "櫈": 1708, "櫚": 1709, "櫸": 1710, "櫻": 1711, "欄": 1712, "權": 1713, "欖": 1714, "欠": 1715, "次": 1716, "欣": 1717, "欲": 1718, "欺": 1719, "欽": 1720, "款": 1721, "歇": 1722, "歉": 1723, "歌": 1724, "歎": 1725, "歐": 1726, "歛": 1727, "歡": 1728, "止": 1729, "正": 1730, "此": 1731, "步": 1732, "武": 1733, "歧": 1734, "歪": 1735, "歲": 1736, "歷": 1737, "歸": 1738, "歹": 1739, "死": 1740, "殄": 1741, "殆": 1742, "殊": 1743, "殖": 1744, "殘": 1745, "殮": 1746, "段": 1747, "殷": 1748, "殺": 1749, "殼": 1750, "殿": 1751, "毀": 1752, "毅": 1753, "毋": 1754, "母": 1755, "每": 1756, "毒": 1757, "毓": 1758, "比": 1759, "毛": 1760, "毡": 1761, "毫": 1762, "氏": 1763, "民": 1764, "氓": 1765, "氛": 1766, "氣": 1767, "氧": 1768, "氯": 1769, "水": 1770, "永": 1771, "氹": 1772, "汀": 1773, "汁": 1774, "求": 1775, "汕": 1776, "汗": 1777, "汝": 1778, "江": 1779, "池": 1780, "污": 1781, "汪": 1782, "汰": 1783, "汶": 1784, "決": 1785, "汽": 1786, "沃": 1787, "沈": 1788, "沉": 1789, "沐": 1790, "沒": 1791, "沖": 1792, "沙": 1793, "沛": 1794, "沫": 1795, "沮": 1796, "沱": 1797, "河": 1798, "油": 1799, "治": 1800, "沽": 1801, "沾": 1802, "沿": 1803, "況": 1804, "泄": 1805, "泉": 1806, "泊": 1807, "泌": 1808, "泓": 1809, "法": 1810, "泛": 1811, "泡": 1812, "波": 1813, "泥": 1814, "注": 1815, "泮": 1816, "泰": 1817, "泳": 1818, "洋": 1819, "洗": 1820, "洛": 1821, "洞": 1822, "津": 1823, "洪": 1824, "洱": 1825, "洲": 1826, "洶": 1827, "活": 1828, "洽": 1829, "派": 1830, "流": 1831, "浙": 1832, "浚": 1833, "浣": 1834, "浦": 1835, "浩": 1836, "浪": 1837, "浮": 1838, "浴": 1839, "海": 1840, "浸": 1841, "涂": 1842, "消": 1843, "涉": 1844, "涌": 1845, "涕": 1846, "涯": 1847, "液": 1848, "涷": 1849, "涼": 1850, "淋": 1851, "淒": 1852, "淘": 1853, "淚": 1854, "淡": 1855, "淥": 1856, "淨": 1857, "淩": 1858, "淪": 1859, "淫": 1860, "深": 1861, "混": 1862, "淸": 1863, "淺": 1864, "添": 1865, "清": 1866, "減": 1867, "渝": 1868, "渠": 1869, "渡": 1870, "渣": 1871, "渦": 1872, "温": 1873, "測": 1874, "渭": 1875, "港": 1876, "渴": 1877, "游": 1878, "渺": 1879, "渾": 1880, "湃": 1881, "湖": 1882, "湘": 1883, "湧": 1884, "湯": 1885, "溋": 1886, "源": 1887, "準": 1888, "溜": 1889, "溝": 1890, "溢": 1891, "溪": 1892, "溫": 1893, "溶": 1894, "滂": 1895, "滄": 1896, "滅": 1897, "滋": 1898, "滌": 1899, "滑": 1900, "滔": 1901, "滘": 1902, "滙": 1903, "滯": 1904, "滷": 1905, "滾": 1906, "滿": 1907, "漁": 1908, "漂": 1909, "漆": 1910, "漏": 1911, "漓": 1912, "演": 1913, "漠": 1914, "漢": 1915, "漫": 1916, "漬": 1917, "漲": 1918, "漸": 1919, "漾": 1920, "漿": 1921, "潑": 1922, "潔": 1923, "潛": 1924, "潤": 1925, "潭": 1926, "潮": 1927, "潰": 1928, "潲": 1929, "潷": 1930, "潺": 1931, "澄": 1932, "澍": 1933, "澎": 1934, "澡": 1935, "澤": 1936, "澩": 1937, "澱": 1938, "澳": 1939, "激": 1940, "濃": 1941, "濕": 1942, "濛": 1943, "濟": 1944, "濠": 1945, "濤": 1946, "濫": 1947, "濱": 1948, "濾": 1949, "瀉": 1950, "瀚": 1951, "瀝": 1952, "瀟": 1953, "瀨": 1954, "瀾": 1955, "灑": 1956, "灘": 1957, "灣": 1958, "火": 1959, "灰": 1960, "灼": 1961, "災": 1962, "炆": 1963, "炊": 1964, "炎": 1965, "炒": 1966, "炕": 1967, "炙": 1968, "炭": 1969, "炮": 1970, "炳": 1971, "炸": 1972, "為": 1973, "烈": 1974, "烏": 1975, "烘": 1976, "烙": 1977, "烟": 1978, "烤": 1979, "烹": 1980, "焉": 1981, "焗": 1982, "焚": 1983, "無": 1984, "焦": 1985, "然": 1986, "煉": 1987, "煎": 1988, "煖": 1989, "煙": 1990, "煞": 1991, "煤": 1992, "照": 1993, "煨": 1994, "煩": 1995, "煮": 1996, "煲": 1997, "煽": 1998, "熄": 1999, "熊": 2000, "熒": 2001, "熔": 2002, "熙": 2003, "熟": 2004, "熬": 2005, "熱": 2006, "熾": 2007, "燃": 2008, "燈": 2009, "燉": 2010, "燒": 2011, "燕": 2012, "燜": 2013, "營": 2014, "燥": 2015, "燭": 2016, "燴": 2017, "燶": 2018, "爆": 2019, "爐": 2020, "爛": 2021, "爪": 2022, "爬": 2023, "爭": 2024, "爲": 2025, "爵": 2026, "父": 2027, "爸": 2028, "爹": 2029, "爺": 2030, "爽": 2031, "爾": 2032, "牀": 2033, "牆": 2034, "片": 2035, "版": 2036, "牌": 2037, "牘": 2038, "牙": 2039, "牛": 2040, "牡": 2041, "牢": 2042, "牧": 2043, "物": 2044, "牯": 2045, "牲": 2046, "特": 2047, "牽": 2048, "犀": 2049, "犧": 2050, "犬": 2051, "犯": 2052, "狀": 2053, "狂": 2054, "狄": 2055, "狐": 2056, "狗": 2057, "狠": 2058, "狡": 2059, "狩": 2060, "狸": 2061, "狹": 2062, "狼": 2063, "猄": 2064, "猛": 2065, "猜": 2066, "猴": 2067, "猶": 2068, "猾": 2069, "獄": 2070, "獅": 2071, "獎": 2072, "獠": 2073, "獨": 2074, "獲": 2075, "獵": 2076, "獸": 2077, "獻": 2078, "玄": 2079, "率": 2080, "玉": 2081, "王": 2082, "玟": 2083, "玩": 2084, "玫": 2085, "玻": 2086, "珀": 2087, "珊": 2088, "珍": 2089, "珏": 2090, "珒": 2091, "珠": 2092, "班": 2093, "現": 2094, "球": 2095, "理": 2096, "琉": 2097, "琛": 2098, "琦": 2099, "琳": 2100, "琴": 2101, "琵": 2102, "琶": 2103, "瑕": 2104, "瑙": 2105, "瑜": 2106, "瑞": 2107, "瑟": 2108, "瑤": 2109, "瑧": 2110, "瑪": 2111, "瑰": 2112, "璀": 2113, "璃": 2114, "璇": 2115, "璉": 2116, "璐": 2117, "璟": 2118, "璧": 2119, "璨": 2120, "環": 2121, "璵": 2122, "璽": 2123, "瓊": 2124, "瓏": 2125, "瓜": 2126, "瓦": 2127, "瓶": 2128, "甘": 2129, "甚": 2130, "甜": 2131, "生": 2132, "產": 2133, "甥": 2134, "用": 2135, "甩": 2136, "甫": 2137, "田": 2138, "由": 2139, "甲": 2140, "申": 2141, "甴": 2142, "男": 2143, "甸": 2144, "畀": 2145, "畋": 2146, "界": 2147, "畏": 2148, "畐": 2149, "畔": 2150, "留": 2151, "畜": 2152, "畢": 2153, "略": 2154, "番": 2155, "畫": 2156, "異": 2157, "當": 2158, "畿": 2159, "疆": 2160, "疇": 2161, "疊": 2162, "疏": 2163, "疑": 2164, "疤": 2165, "疫": 2166, "疲": 2167, "疵": 2168, "疹": 2169, "疼": 2170, "疾": 2171, "病": 2172, "症": 2173, "痕": 2174, "痛": 2175, "痢": 2176, "痰": 2177, "痱": 2178, "痴": 2179, "痺": 2180, "痾": 2181, "瘀": 2182, "瘁": 2183, "瘋": 2184, "瘓": 2185, "瘟": 2186, "瘡": 2187, "瘦": 2188, "療": 2189, "癆": 2190, "癌": 2191, "癡": 2192, "癢": 2193, "癩": 2194, "癮": 2195, "癱": 2196, "癲": 2197, "登": 2198, "發": 2199, "白": 2200, "百": 2201, "皂": 2202, "的": 2203, "皆": 2204, "皇": 2205, "皚": 2206, "皮": 2207, "皺": 2208, "盃": 2209, "盅": 2210, "盆": 2211, "盈": 2212, "益": 2213, "盏": 2214, "盒": 2215, "盔": 2216, "盛": 2217, "盜": 2218, "盞": 2219, "盟": 2220, "盡": 2221, "監": 2222, "盤": 2223, "盧": 2224, "盪": 2225, "目": 2226, "盲": 2227, "直": 2228, "相": 2229, "盼": 2230, "盾": 2231, "省": 2232, "眉": 2233, "看": 2234, "眞": 2235, "真": 2236, "眠": 2237, "眨": 2238, "眯": 2239, "眶": 2240, "眼": 2241, "眾": 2242, "着": 2243, "睄": 2244, "睇": 2245, "睏": 2246, "睛": 2247, "睜": 2248, "睡": 2249, "督": 2250, "睥": 2251, "睦": 2252, "睨": 2253, "睬": 2254, "睹": 2255, "瞅": 2256, "瞌": 2257, "瞓": 2258, "瞞": 2259, "瞬": 2260, "瞭": 2261, "矛": 2262, "知": 2263, "矩": 2264, "短": 2265, "矮": 2266, "石": 2267, "砂": 2268, "砌": 2269, "砍": 2270, "研": 2271, "砰": 2272, "砲": 2273, "破": 2274, "砵": 2275, "砸": 2276, "硤": 2277, "硬": 2278, "碇": 2279, "碉": 2280, "碌": 2281, "碎": 2282, "碑": 2283, "碗": 2284, "碘": 2285, "碟": 2286, "碧": 2287, "碰": 2288, "確": 2289, "碼": 2290, "磅": 2291, "磐": 2292, "磚": 2293, "磡": 2294, "磨": 2295, "磯": 2296, "礎": 2297, "礙": 2298, "礦": 2299, "礫": 2300, "示": 2301, "社": 2302, "祈": 2303, "祐": 2304, "祖": 2305, "祝": 2306, "神": 2307, "祟": 2308, "祠": 2309, "祥": 2310, "票": 2311, "祭": 2312, "祿": 2313, "禁": 2314, "禍": 2315, "福": 2316, "禡": 2317, "禧": 2318, "禪": 2319, "禮": 2320, "禱": 2321, "禽": 2322, "禾": 2323, "秀": 2324, "私": 2325, "秅": 2326, "秉": 2327, "秋": 2328, "科": 2329, "秒": 2330, "秘": 2331, "租": 2332, "秤": 2333, "秦": 2334, "秧": 2335, "秩": 2336, "移": 2337, "稀": 2338, "稅": 2339, "稈": 2340, "程": 2341, "稍": 2342, "稔": 2343, "稚": 2344, "稠": 2345, "種": 2346, "稱": 2347, "稻": 2348, "稿": 2349, "穀": 2350, "穌": 2351, "積": 2352, "穎": 2353, "穗": 2354, "穢": 2355, "穩": 2356, "穫": 2357, "穴": 2358, "究": 2359, "空": 2360, "穿": 2361, "突": 2362, "窄": 2363, "窒": 2364, "窗": 2365, "窠": 2366, "窩": 2367, "窮": 2368, "窰": 2369, "窿": 2370, "竄": 2371, "竅": 2372, "竇": 2373, "竊": 2374, "立": 2375, "站": 2376, "竟": 2377, "章": 2378, "童": 2379, "端": 2380, "競": 2381, "竹": 2382, "笆": 2383, "笈": 2384, "笏": 2385, "笑": 2386, "笛": 2387, "笠": 2388, "符": 2389, "笨": 2390, "笪": 2391, "第": 2392, "筆": 2393, "等": 2394, "筋": 2395, "筍": 2396, "筏": 2397, "筒": 2398, "答": 2399, "策": 2400, "筲": 2401, "筵": 2402, "筷": 2403, "箋": 2404, "箍": 2405, "箕": 2406, "算": 2407, "管": 2408, "箭": 2409, "箱": 2410, "箴": 2411, "節": 2412, "範": 2413, "篇": 2414, "築": 2415, "篋": 2416, "篙": 2417, "篤": 2418, "篳": 2419, "簍": 2420, "簡": 2421, "簽": 2422, "簾": 2423, "簿": 2424, "籃": 2425, "籌": 2426, "籍": 2427, "籐": 2428, "籠": 2429, "籤": 2430, "籬": 2431, "籮": 2432, "籲": 2433, "米": 2434, "籽": 2435, "粉": 2436, "粒": 2437, "粗": 2438, "粟": 2439, "粥": 2440, "粳": 2441, "粵": 2442, "粹": 2443, "粼": 2444, "精": 2445, "粿": 2446, "糉": 2447, "糊": 2448, "糍": 2449, "糕": 2450, "糖": 2451, "糞": 2452, "糟": 2453, "糧": 2454, "糯": 2455, "糰": 2456, "糴": 2457, "系": 2458, "糾": 2459, "紀": 2460, "約": 2461, "紅": 2462, "納": 2463, "紐": 2464, "紓": 2465, "純": 2466, "紗": 2467, "紙": 2468, "級": 2469, "紛": 2470, "素": 2471, "索": 2472, "紥": 2473, "紫": 2474, "紮": 2475, "累": 2476, "細": 2477, "紳": 2478, "紹": 2479, "終": 2480, "組": 2481, "結": 2482, "絕": 2483, "絞": 2484, "絡": 2485, "給": 2486, "絨": 2487, "統": 2488, "絲": 2489, "絶": 2490, "綁": 2491, "經": 2492, "綜": 2493, "綠": 2494, "綫": 2495, "維": 2496, "網": 2497, "綿": 2498, "緊": 2499, "緒": 2500, "緘": 2501, "線": 2502, "緣": 2503, "編": 2504, "緩": 2505, "緬": 2506, "練": 2507, "緻": 2508, "縉": 2509, "縊": 2510, "縛": 2511, "縫": 2512, "縮": 2513, "縱": 2514, "縷": 2515, "總": 2516, "績": 2517, "繁": 2518, "織": 2519, "繞": 2520, "繩": 2521, "繫": 2522, "繳": 2523, "繼": 2524, "續": 2525, "纏": 2526, "纔": 2527, "纖": 2528, "纜": 2529, "缸": 2530, "缺": 2531, "缽": 2532, "罅": 2533, "罐": 2534, "罔": 2535, "罕": 2536, "罟": 2537, "罩": 2538, "罪": 2539, "置": 2540, "罰": 2541, "署": 2542, "罵": 2543, "罷": 2544, "羅": 2545, "羈": 2546, "羊": 2547, "羌": 2548, "美": 2549, "羞": 2550, "羣": 2551, "群": 2552, "義": 2553, "羲": 2554, "羹": 2555, "羽": 2556, "翁": 2557, "翅": 2558, "翌": 2559, "習": 2560, "翔": 2561, "翠": 2562, "翡": 2563, "翩": 2564, "翰": 2565, "翱": 2566, "翻": 2567, "翼": 2568, "耀": 2569, "老": 2570, "考": 2571, "者": 2572, "而": 2573, "耍": 2574, "耐": 2575, "耕": 2576, "耗": 2577, "耘": 2578, "耳": 2579, "耶": 2580, "耷": 2581, "聆": 2582, "聊": 2583, "聖": 2584, "聘": 2585, "聚": 2586, "聞": 2587, "聯": 2588, "聰": 2589, "聲": 2590, "聳": 2591, "聶": 2592, "職": 2593, "聽": 2594, "肅": 2595, "肆": 2596, "肇": 2597, "肉": 2598, "肋": 2599, "肌": 2600, "肓": 2601, "肖": 2602, "肘": 2603, "肚": 2604, "肛": 2605, "肝": 2606, "股": 2607, "肥": 2608, "肨": 2609, "肩": 2610, "肯": 2611, "育": 2612, "肴": 2613, "肺": 2614, "胃": 2615, "背": 2616, "胎": 2617, "胚": 2618, "胡": 2619, "胭": 2620, "胸": 2621, "胺": 2622, "能": 2623, "脂": 2624, "脅": 2625, "脆": 2626, "脈": 2627, "脊": 2628, "脫": 2629, "脷": 2630, "脹": 2631, "脾": 2632, "腋": 2633, "腍": 2634, "腎": 2635, "腐": 2636, "腔": 2637, "腕": 2638, "腥": 2639, "腦": 2640, "腩": 2641, "腫": 2642, "腰": 2643, "腳": 2644, "腸": 2645, "腺": 2646, "腿": 2647, "膀": 2648, "膊": 2649, "膏": 2650, "膚": 2651, "膜": 2652, "膝": 2653, "膠": 2654, "膨": 2655, "膩": 2656, "膳": 2657, "膺": 2658, "膽": 2659, "臂": 2660, "臉": 2661, "臘": 2662, "臟": 2663, "臣": 2664, "臨": 2665, "自": 2666, "臭": 2667, "至": 2668, "致": 2669, "臺": 2670, "臻": 2671, "臼": 2672, "舂": 2673, "舅": 2674, "與": 2675, "興": 2676, "舉": 2677, "舊": 2678, "舌": 2679, "舍": 2680, "舐": 2681, "舒": 2682, "舔": 2683, "舖": 2684, "舞": 2685, "舟": 2686, "舢": 2687, "舨": 2688, "航": 2689, "般": 2690, "舶": 2691, "船": 2692, "艇": 2693, "艦": 2694, "良": 2695, "艱": 2696, "色": 2697, "艷": 2698, "芋": 2699, "芒": 2700, "芙": 2701, "芝": 2702, "芥": 2703, "芬": 2704, "芭": 2705, "芯": 2706, "花": 2707, "芳": 2708, "芹": 2709, "芽": 2710, "苑": 2711, "苔": 2712, "苗": 2713, "苟": 2714, "苣": 2715, "若": 2716, "苦": 2717, "英": 2718, "茂": 2719, "范": 2720, "茄": 2721, "茅": 2722, "茫": 2723, "茵": 2724, "茶": 2725, "茸": 2726, "荃": 2727, "草": 2728, "荊": 2729, "荒": 2730, "荔": 2731, "荷": 2732, "莆": 2733, "莉": 2734, "莊": 2735, "莎": 2736, "莓": 2737, "莞": 2738, "莫": 2739, "莽": 2740, "菁": 2741, "菇": 2742, "菊": 2743, "菌": 2744, "菓": 2745, "菜": 2746, "菠": 2747, "菩": 2748, "華": 2749, "菱": 2750, "菲": 2751, "菴": 2752, "萃": 2753, "萄": 2754, "萊": 2755, "萍": 2756, "萬": 2757, "萺": 2758, "落": 2759, "葉": 2760, "著": 2761, "葛": 2762, "葡": 2763, "董": 2764, "葫": 2765, "葬": 2766, "葳": 2767, "葵": 2768, "蒂": 2769, "蒙": 2770, "蒜": 2771, "蒡": 2772, "蒲": 2773, "蒸": 2774, "蒼": 2775, "蓀": 2776, "蓆": 2777, "蓉": 2778, "蓋": 2779, "蓓": 2780, "蓬": 2781, "蓮": 2782, "蓺": 2783, "蔓": 2784, "蔔": 2785, "蔗": 2786, "蔥": 2787, "蔫": 2788, "蔬": 2789, "蔭": 2790, "蔽": 2791, "蕃": 2792, "蕉": 2793, "蕎": 2794, "蕙": 2795, "蕩": 2796, "蕪": 2797, "蕭": 2798, "蕾": 2799, "薄": 2800, "薇": 2801, "薈": 2802, "薏": 2803, "薑": 2804, "薩": 2805, "薪": 2806, "薯": 2807, "薰": 2808, "藉": 2809, "藍": 2810, "藏": 2811, "藐": 2812, "藕": 2813, "藝": 2814, "藤": 2815, "藥": 2816, "藹": 2817, "蘅": 2818, "蘆": 2819, "蘇": 2820, "蘋": 2821, "蘑": 2822, "蘭": 2823, "蘸": 2824, "蘿": 2825, "虎": 2826, "虐": 2827, "虓": 2828, "處": 2829, "虛": 2830, "號": 2831, "虧": 2832, "虱": 2833, "虹": 2834, "蚊": 2835, "蚌": 2836, "蚝": 2837, "蚵": 2838, "蚺": 2839, "蛇": 2840, "蛋": 2841, "蛛": 2842, "蛟": 2843, "蛤": 2844, "蜂": 2845, "蜆": 2846, "蜊": 2847, "蜘": 2848, "蜜": 2849, "蜢": 2850, "蝕": 2851, "蝗": 2852, "蝦": 2853, "蝨": 2854, "蝴": 2855, "蝶": 2856, "蝸": 2857, "融": 2858, "螞": 2859, "螢": 2860, "螺": 2861, "蟀": 2862, "蟆": 2863, "蟋": 2864, "蟠": 2865, "蟬": 2866, "蟲": 2867, "蟹": 2868, "蟻": 2869, "蠅": 2870, "蠔": 2871, "蠟": 2872, "蠢": 2873, "蠱": 2874, "蠻": 2875, "血": 2876, "衆": 2877, "行": 2878, "衍": 2879, "術": 2880, "街": 2881, "衙": 2882, "衛": 2883, "衝": 2884, "衞": 2885, "衡": 2886, "衣": 2887, "表": 2888, "衫": 2889, "衰": 2890, "衲": 2891, "衷": 2892, "袁": 2893, "袋": 2894, "袖": 2895, "被": 2896, "裁": 2897, "裏": 2898, "裔": 2899, "裕": 2900, "裙": 2901, "補": 2902, "裝": 2903, "裡": 2904, "裴": 2905, "製": 2906, "複": 2907, "褒": 2908, "褦": 2909, "褪": 2910, "褲": 2911, "褸": 2912, "襟": 2913, "襪": 2914, "襯": 2915, "襲": 2916, "西": 2917, "要": 2918, "覆": 2919, "見": 2920, "規": 2921, "覓": 2922, "視": 2923, "親": 2924, "覲": 2925, "覺": 2926, "覽": 2927, "觀": 2928, "角": 2929, "解": 2930, "觸": 2931, "言": 2932, "訂": 2933, "計": 2934, "訊": 2935, "討": 2936, "訓": 2937, "訕": 2938, "託": 2939, "記": 2940, "訝": 2941, "訪": 2942, "設": 2943, "許": 2944, "訴": 2945, "診": 2946, "註": 2947, "証": 2948, "詆": 2949, "詐": 2950, "評": 2951, "詞": 2952, "詢": 2953, "試": 2954, "詩": 2955, "詭": 2956, "話": 2957, "該": 2958, "詳": 2959, "詹": 2960, "誅": 2961, "誇": 2962, "誌": 2963, "認": 2964, "誓": 2965, "誕": 2966, "誘": 2967, "語": 2968, "誠": 2969, "誡": 2970, "誤": 2971, "誨": 2972, "說": 2973, "説": 2974, "誰": 2975, "課": 2976, "誼": 2977, "調": 2978, "談": 2979, "請": 2980, "諒": 2981, "論": 2982, "諗": 2983, "諜": 2984, "諦": 2985, "諧": 2986, "諫": 2987, "諷": 2988, "諸": 2989, "諺": 2990, "諾": 2991, "謀": 2992, "謁": 2993, "謂": 2994, "謊": 2995, "謎": 2996, "謙": 2997, "講": 2998, "謝": 2999, "謢": 3000, "謬": 3001, "謹": 3002, "謾": 3003, "證": 3004, "譎": 3005, "譖": 3006, "識": 3007, "譚": 3008, "譜": 3009, "警": 3010, "譬": 3011, "譯": 3012, "議": 3013, "譴": 3014, "護": 3015, "譽": 3016, "讀": 3017, "變": 3018, "讎": 3019, "讓": 3020, "讚": 3021, "谷": 3022, "豁": 3023, "豂": 3024, "豆": 3025, "豈": 3026, "豉": 3027, "豎": 3028, "豐": 3029, "豚": 3030, "象": 3031, "豪": 3032, "豫": 3033, "豬": 3034, "豹": 3035, "貂": 3036, "貌": 3037, "貓": 3038, "貝": 3039, "負": 3040, "財": 3041, "貢": 3042, "貧": 3043, "貨": 3044, "販": 3045, "貪": 3046, "貫": 3047, "責": 3048, "貴": 3049, "貶": 3050, "買": 3051, "貸": 3052, "費": 3053, "貼": 3054, "貿": 3055, "賀": 3056, "賃": 3057, "資": 3058, "賈": 3059, "賊": 3060, "賒": 3061, "賓": 3062, "賜": 3063, "賞": 3064, "賢": 3065, "賣": 3066, "賤": 3067, "賦": 3068, "質": 3069, "賬": 3070, "賭": 3071, "賴": 3072, "賺": 3073, "購": 3074, "賽": 3075, "贅": 3076, "贈": 3077, "贊": 3078, "贏": 3079, "贼": 3080, "赤": 3081, "赫": 3082, "走": 3083, "赴": 3084, "起": 3085, "趁": 3086, "超": 3087, "越": 3088, "趌": 3089, "趕": 3090, "趙": 3091, "趣": 3092, "趨": 3093, "足": 3094, "趴": 3095, "趺": 3096, "趾": 3097, "跋": 3098, "跌": 3099, "跑": 3100, "跛": 3101, "距": 3102, "跟": 3103, "跡": 3104, "跣": 3105, "跨": 3106, "跪": 3107, "路": 3108, "跳": 3109, "踎": 3110, "踏": 3111, "踐": 3112, "踢": 3113, "踩": 3114, "踪": 3115, "踱": 3116, "踹": 3117, "蹄": 3118, "蹈": 3119, "蹋": 3120, "蹟": 3121, "蹤": 3122, "蹲": 3123, "蹺": 3124, "躁": 3125, "躉": 3126, "躍": 3127, "躝": 3128, "身": 3129, "躬": 3130, "躲": 3131, "車": 3132, "軌": 3133, "軍": 3134, "軒": 3135, "軟": 3136, "較": 3137, "載": 3138, "輊": 3139, "輋": 3140, "輔": 3141, "輕": 3142, "輘": 3143, "輝": 3144, "輟": 3145, "輩": 3146, "輪": 3147, "輯": 3148, "輷": 3149, "輸": 3150, "輻": 3151, "輾": 3152, "轄": 3153, "轆": 3154, "轉": 3155, "轍": 3156, "轡": 3157, "辛": 3158, "辜": 3159, "辣": 3160, "辦": 3161, "辨": 3162, "辭": 3163, "辯": 3164, "辰": 3165, "辱": 3166, "農": 3167, "迂": 3168, "迅": 3169, "迍": 3170, "迎": 3171, "近": 3172, "返": 3173, "迦": 3174, "迪": 3175, "迫": 3176, "述": 3177, "迴": 3178, "迷": 3179, "追": 3180, "迾": 3181, "退": 3182, "送": 3183, "逃": 3184, "逆": 3185, "透": 3186, "逐": 3187, "途": 3188, "逗": 3189, "這": 3190, "通": 3191, "逝": 3192, "逞": 3193, "速": 3194, "造": 3195, "逢": 3196, "連": 3197, "週": 3198, "進": 3199, "逸": 3200, "逹": 3201, "逼": 3202, "逾": 3203, "遂": 3204, "遇": 3205, "遊": 3206, "運": 3207, "遍": 3208, "過": 3209, "遏": 3210, "道": 3211, "達": 3212, "違": 3213, "遙": 3214, "遜": 3215, "遞": 3216, "遠": 3217, "遢": 3218, "遣": 3219, "適": 3220, "遭": 3221, "遮": 3222, "遲": 3223, "遴": 3224, "遵": 3225, "遷": 3226, "選": 3227, "遺": 3228, "避": 3229, "邀": 3230, "還": 3231, "邊": 3232, "邋": 3233, "邏": 3234, "那": 3235, "邦": 3236, "邨": 3237, "邪": 3238, "��": 3239, "邵": 3240, "邸": 3241, "郁": 3242, "郊": 3243, "郎": 3244, "郝": 3245, "部": 3246, "郭": 3247, "郵": 3248, "都": 3249, "鄂": 3250, "鄉": 3251, "鄙": 3252, "鄧": 3253, "鄭": 3254, "鄰": 3255, "酌": 3256, "配": 3257, "酒": 3258, "酥": 3259, "酪": 3260, "酬": 3261, "酮": 3262, "酱": 3263, "酷": 3264, "酸": 3265, "醇": 3266, "醉": 3267, "醋": 3268, "醒": 3269, "醜": 3270, "醫": 3271, "醬": 3272, "醺": 3273, "釀": 3274, "采": 3275, "釋": 3276, "里": 3277, "重": 3278, "野": 3279, "量": 3280, "金": 3281, "釗": 3282, "釘": 3283, "釜": 3284, "針": 3285, "釣": 3286, "釵": 3287, "鈍": 3288, "鈔": 3289, "鈕": 3290, "鈴": 3291, "鉛": 3292, "鉤": 3293, "鉸": 3294, "銀": 3295, "銅": 3296, "銘": 3297, "銳": 3298, "銷": 3299, "鋁": 3300, "鋒": 3301, "鋪": 3302, "鋼": 3303, "錄": 3304, "錐": 3305, "錢": 3306, "錦": 3307, "錫": 3308, "錯": 3309, "錶": 3310, "鍊": 3311, "鍋": 3312, "鍚": 3313, "鍵": 3314, "鍾": 3315, "鎖": 3316, "鎗": 3317, "鎭": 3318, "鎮": 3319, "鏈": 3320, "鏟": 3321, "鏡": 3322, "鏰": 3323, "鐘": 3324, "鐡": 3325, "鐵": 3326, "鐸": 3327, "鑄": 3328, "鑊": 3329, "鑑": 3330, "鑫": 3331, "鑲": 3332, "鑼": 3333, "鑽": 3334, "鑿": 3335, "長": 3336, "門": 3337, "閂": 3338, "閃": 3339, "閉": 3340, "開": 3341, "閏": 3342, "閒": 3343, "間": 3344, "閘": 3345, "閣": 3346, "閨": 3347, "閩": 3348, "閱": 3349, "閻": 3350, "闆": 3351, "闊": 3352, "闌": 3353, "闔": 3354, "闖": 3355, "關": 3356, "闢": 3357, "阜": 3358, "阪": 3359, "阱": 3360, "防": 3361, "阻": 3362, "阿": 3363, "陀": 3364, "陂": 3365, "附": 3366, "陌": 3367, "降": 3368, "限": 3369, "陞": 3370, "院": 3371, "陣": 3372, "除": 3373, "陪": 3374, "陰": 3375, "陳": 3376, "陶": 3377, "陷": 3378, "陸": 3379, "陽": 3380, "隆": 3381, "隊": 3382, "階": 3383, "隔": 3384, "隙": 3385, "際": 3386, "障": 3387, "隧": 3388, "隨": 3389, "險": 3390, "隱": 3391, "隴": 3392, "隸": 3393, "隻": 3394, "雀": 3395, "雁": 3396, "雄": 3397, "雅": 3398, "集": 3399, "雋": 3400, "雌": 3401, "雍": 3402, "雖": 3403, "雙": 3404, "雜": 3405, "雞": 3406, "離": 3407, "難": 3408, "雨": 3409, "雪": 3410, "雲": 3411, "零": 3412, "雷": 3413, "電": 3414, "需": 3415, "霄": 3416, "震": 3417, "霉": 3418, "霎": 3419, "霖": 3420, "霜": 3421, "霧": 3422, "露": 3423, "霸": 3424, "靈": 3425, "青": 3426, "靖": 3427, "靚": 3428, "靜": 3429, "非": 3430, "靠": 3431, "面": 3432, "革": 3433, "靴": 3434, "靶": 3435, "鞋": 3436, "鞍": 3437, "鞦": 3438, "鞭": 3439, "韆": 3440, "韌": 3441, "韓": 3442, "音": 3443, "韻": 3444, "響": 3445, "頁": 3446, "頂": 3447, "項": 3448, "順": 3449, "須": 3450, "頌": 3451, "預": 3452, "頒": 3453, "頓": 3454, "頗": 3455, "領": 3456, "頤": 3457, "頭": 3458, "頸": 3459, "頻": 3460, "題": 3461, "額": 3462, "顏": 3463, "顔": 3464, "願": 3465, "顛": 3466, "類": 3467, "顧": 3468, "顯": 3469, "顱": 3470, "風": 3471, "颱": 3472, "飄": 3473, "飛": 3474, "食": 3475, "飢": 3476, "飯": 3477, "飲": 3478, "飼": 3479, "飽": 3480, "飾": 3481, "餃": 3482, "餅": 3483, "餉": 3484, "養": 3485, "餋": 3486, "餐": 3487, "餒": 3488, "餓": 3489, "餘": 3490, "館": 3491, "餵": 3492, "餸": 3493, "餼": 3494, "饅": 3495, "饌": 3496, "饑": 3497, "饒": 3498, "饕": 3499, "首": 3500, "香": 3501, "馨": 3502, "馬": 3503, "馮": 3504, "馳": 3505, "駁": 3506, "駐": 3507, "駒": 3508, "駕": 3509, "駛": 3510, "駝": 3511, "駟": 3512, "駱": 3513, "駿": 3514, "騅": 3515, "騎": 3516, "騙": 3517, "騭": 3518, "騮": 3519, "騰": 3520, "騷": 3521, "騾": 3522, "驅": 3523, "驕": 3524, "驗": 3525, "驚": 3526, "驟": 3527, "驥": 3528, "骨": 3529, "骹": 3530, "髀": 3531, "髓": 3532, "體": 3533, "高": 3534, "髮": 3535, "髻": 3536, "鬆": 3537, "鬚": 3538, "鬠": 3539, "鬢": 3540, "鬥": 3541, "鬧": 3542, "鬱": 3543, "鬼": 3544, "魁": 3545, "魂": 3546, "魄": 3547, "魅": 3548, "魏": 3549, "魔": 3550, "魚": 3551, "魯": 3552, "魷": 3553, "鮑": 3554, "鮟": 3555, "鮫": 3556, "鮭": 3557, "鮮": 3558, "鯇": 3559, "鯉": 3560, "鯊": 3561, "鯖": 3562, "鯛": 3563, "鯪": 3564, "鰂": 3565, "鰭": 3566, "鰻": 3567, "鱇": 3568, "鱈": 3569, "鱔": 3570, "鱗": 3571, "鱲": 3572, "鱷": 3573, "鱸": 3574, "鲁": 3575, "鳥": 3576, "鳩": 3577, "鳳": 3578, "鳴": 3579, "鳶": 3580, "鴉": 3581, "鴛": 3582, "鴦": 3583, "鴨": 3584, "鴻": 3585, "鴿": 3586, "鵝": 3587, "鵪": 3588, "鵬": 3589, "鵲": 3590, "鶉": 3591, "鶴": 3592, "鷄": 3593, "鷯": 3594, "鷹": 3595, "鸞": 3596, "鹅": 3597, "鹹": 3598, "鹼": 3599, "鹽": 3600, "鹿": 3601, "麒": 3602, "麗": 3603, "麝": 3604, "麟": 3605, "麥": 3606, "麪": 3607, "麵": 3608, "麻": 3609, "麼": 3610, "黃": 3611, "黎": 3612, "黏": 3613, "黐": 3614, "黑": 3615, "默": 3616, "黚": 3617, "黛": 3618, "黜": 3619, "點": 3620, "黨": 3621, "黯": 3622, "鼆": 3623, "鼎": 3624, "鼓": 3625, "鼠": 3626, "鼻": 3627, "齊": 3628, "齋": 3629, "齒": 3630, "齡": 3631, "齪": 3632, "齷": 3633, "龍": 3634, "龐": 3635, "龜": 3636, "龢": 3637, "更": 3638, "來": 3639, "不": 3640, "年": 3641, "聯": 3642, "料": 3643, "利": 3644, "立": 3645, "行": 3646, ".": 3647, "a": 3648, "b": 3649, "": 3650, "|": 0, "[UNK]": 3651, "[PAD]": 3652}