NeonBohdan
commited on
Commit
·
a4ee148
1
Parent(s):
6c155e1
Set female config
Browse files- config.json +19 -26
- speaker_ids.json +1 -1
config.json
CHANGED
@@ -45,7 +45,7 @@
|
|
45 |
"use_grad_scaler": false,
|
46 |
"cudnn_enable": true,
|
47 |
"cudnn_deterministic": false,
|
48 |
-
"cudnn_benchmark":
|
49 |
"training_seed": 54321,
|
50 |
"model": "vits",
|
51 |
"num_loader_workers": 8,
|
@@ -58,7 +58,7 @@
|
|
58 |
"frame_shift_ms": null,
|
59 |
"frame_length_ms": null,
|
60 |
"stft_pad_mode": "reflect",
|
61 |
-
"sample_rate":
|
62 |
"resample": false,
|
63 |
"preemphasis": 0.0,
|
64 |
"ref_level_db": 20,
|
@@ -78,7 +78,7 @@
|
|
78 |
"do_amp_to_db_mel": true,
|
79 |
"pitch_fmax": 640.0,
|
80 |
"pitch_fmin": 0.0,
|
81 |
-
"signal_norm":
|
82 |
"min_level_db": -100,
|
83 |
"symmetric_norm": true,
|
84 |
"max_norm": 4.0,
|
@@ -100,7 +100,7 @@
|
|
100 |
"eos": "<EOS>",
|
101 |
"bos": "<BOS>",
|
102 |
"blank": "<BLNK>",
|
103 |
-
"characters": "
|
104 |
"punctuations": "!'(),-.:;? ",
|
105 |
"phonemes": null,
|
106 |
"is_unique": true,
|
@@ -120,12 +120,10 @@
|
|
120 |
"start_by_longest": false,
|
121 |
"datasets": [
|
122 |
{
|
123 |
-
"name": "
|
124 |
-
"path": "./
|
125 |
-
"meta_file_train": "",
|
126 |
-
"ignored_speakers":
|
127 |
-
"piotr_nater"
|
128 |
-
],
|
129 |
"language": "pl",
|
130 |
"meta_file_val": "",
|
131 |
"meta_file_attn_mask": ""
|
@@ -134,7 +132,7 @@
|
|
134 |
"test_sentences": [
|
135 |
[
|
136 |
"T\u0119cza, zjawisko optyczne i meteorologiczne, wyst\u0119puj\u0105ce w postaci charakterystycznego wielobarwnego \u0142uku.",
|
137 |
-
"
|
138 |
null,
|
139 |
"pl"
|
140 |
]
|
@@ -146,7 +144,7 @@
|
|
146 |
"use_language_weighted_sampler": true,
|
147 |
"language_weighted_sampler_alpha": 1.0,
|
148 |
"model_args": {
|
149 |
-
"num_chars":
|
150 |
"out_channels": 513,
|
151 |
"spec_segment_size": 32,
|
152 |
"hidden_channels": 192,
|
@@ -165,38 +163,33 @@
|
|
165 |
"resblock_type_decoder": "2",
|
166 |
"resblock_kernel_sizes_decoder": [
|
167 |
3,
|
168 |
-
|
169 |
-
|
170 |
],
|
171 |
"resblock_dilation_sizes_decoder": [
|
172 |
[
|
173 |
1,
|
174 |
-
|
175 |
-
5
|
176 |
],
|
177 |
[
|
178 |
-
|
179 |
-
|
180 |
-
5
|
181 |
],
|
182 |
[
|
183 |
-
1,
|
184 |
3,
|
185 |
-
|
186 |
]
|
187 |
],
|
188 |
"upsample_rates_decoder": [
|
189 |
8,
|
190 |
8,
|
191 |
-
|
192 |
-
2
|
193 |
],
|
194 |
-
"upsample_initial_channel_decoder":
|
195 |
"upsample_kernel_sizes_decoder": [
|
196 |
16,
|
197 |
16,
|
198 |
-
|
199 |
-
4
|
200 |
],
|
201 |
"use_sdp": true,
|
202 |
"noise_scale": 1.0,
|
|
|
45 |
"use_grad_scaler": false,
|
46 |
"cudnn_enable": true,
|
47 |
"cudnn_deterministic": false,
|
48 |
+
"cudnn_benchmark": false,
|
49 |
"training_seed": 54321,
|
50 |
"model": "vits",
|
51 |
"num_loader_workers": 8,
|
|
|
58 |
"frame_shift_ms": null,
|
59 |
"frame_length_ms": null,
|
60 |
"stft_pad_mode": "reflect",
|
61 |
+
"sample_rate": 22050,
|
62 |
"resample": false,
|
63 |
"preemphasis": 0.0,
|
64 |
"ref_level_db": 20,
|
|
|
78 |
"do_amp_to_db_mel": true,
|
79 |
"pitch_fmax": 640.0,
|
80 |
"pitch_fmin": 0.0,
|
81 |
+
"signal_norm": true,
|
82 |
"min_level_db": -100,
|
83 |
"symmetric_norm": true,
|
84 |
"max_norm": 4.0,
|
|
|
100 |
"eos": "<EOS>",
|
101 |
"bos": "<BOS>",
|
102 |
"blank": "<BLNK>",
|
103 |
+
"characters": "abcdefghijklmnopqrstuvwxyz\u00af\u00b7\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u00ff\u0101\u0105\u0107\u0113\u0119\u011b\u012b\u0131\u0142\u0144\u014d\u0151\u0153\u015b\u016b\u0171\u017a\u017c\u01ce\u01d0\u01d2\u01d4\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u0451\u0454\u0456\u0457\u0491",
|
104 |
"punctuations": "!'(),-.:;? ",
|
105 |
"phonemes": null,
|
106 |
"is_unique": true,
|
|
|
120 |
"start_by_longest": false,
|
121 |
"datasets": [
|
122 |
{
|
123 |
+
"name": "common_voice",
|
124 |
+
"path": "./datasets/pl_CV/",
|
125 |
+
"meta_file_train": "clean.tsv",
|
126 |
+
"ignored_speakers": null,
|
|
|
|
|
127 |
"language": "pl",
|
128 |
"meta_file_val": "",
|
129 |
"meta_file_attn_mask": ""
|
|
|
132 |
"test_sentences": [
|
133 |
[
|
134 |
"T\u0119cza, zjawisko optyczne i meteorologiczne, wyst\u0119puj\u0105ce w postaci charakterystycznego wielobarwnego \u0142uku.",
|
135 |
+
"MCV_72a8725537f15ecfbd2aca0c342ee4ac0367ad23595ccd3d7efadb015af1af5e0614adb92ad6bf009711df662453a870c31467ca0ec71005a00aa82dc5e02a40",
|
136 |
null,
|
137 |
"pl"
|
138 |
]
|
|
|
144 |
"use_language_weighted_sampler": true,
|
145 |
"language_weighted_sampler_alpha": 1.0,
|
146 |
"model_args": {
|
147 |
+
"num_chars": 129,
|
148 |
"out_channels": 513,
|
149 |
"spec_segment_size": 32,
|
150 |
"hidden_channels": 192,
|
|
|
163 |
"resblock_type_decoder": "2",
|
164 |
"resblock_kernel_sizes_decoder": [
|
165 |
3,
|
166 |
+
5,
|
167 |
+
7
|
168 |
],
|
169 |
"resblock_dilation_sizes_decoder": [
|
170 |
[
|
171 |
1,
|
172 |
+
2
|
|
|
173 |
],
|
174 |
[
|
175 |
+
2,
|
176 |
+
6
|
|
|
177 |
],
|
178 |
[
|
|
|
179 |
3,
|
180 |
+
12
|
181 |
]
|
182 |
],
|
183 |
"upsample_rates_decoder": [
|
184 |
8,
|
185 |
8,
|
186 |
+
4
|
|
|
187 |
],
|
188 |
+
"upsample_initial_channel_decoder": 256,
|
189 |
"upsample_kernel_sizes_decoder": [
|
190 |
16,
|
191 |
16,
|
192 |
+
8
|
|
|
193 |
],
|
194 |
"use_sdp": true,
|
195 |
"noise_scale": 1.0,
|
speaker_ids.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
"pl": 0,
|
3 |
-
"
|
4 |
}
|
|
|
1 |
{
|
2 |
"pl": 0,
|
3 |
+
"female": 0
|
4 |
}
|