NeonBohdan commited on
Commit
a4ee148
·
1 Parent(s): 6c155e1

Set female config

Browse files
Files changed (2) hide show
  1. config.json +19 -26
  2. speaker_ids.json +1 -1
config.json CHANGED
@@ -45,7 +45,7 @@
45
  "use_grad_scaler": false,
46
  "cudnn_enable": true,
47
  "cudnn_deterministic": false,
48
- "cudnn_benchmark": true,
49
  "training_seed": 54321,
50
  "model": "vits",
51
  "num_loader_workers": 8,
@@ -58,7 +58,7 @@
58
  "frame_shift_ms": null,
59
  "frame_length_ms": null,
60
  "stft_pad_mode": "reflect",
61
- "sample_rate": 16000,
62
  "resample": false,
63
  "preemphasis": 0.0,
64
  "ref_level_db": 20,
@@ -78,7 +78,7 @@
78
  "do_amp_to_db_mel": true,
79
  "pitch_fmax": 640.0,
80
  "pitch_fmin": 0.0,
81
- "signal_norm": false,
82
  "min_level_db": -100,
83
  "symmetric_norm": true,
84
  "max_norm": 4.0,
@@ -100,7 +100,7 @@
100
  "eos": "<EOS>",
101
  "bos": "<BOS>",
102
  "blank": "<BLNK>",
103
- "characters": "a\u0105bc\u0107de\u0119fghijkl\u0142mn\u0144o\u00f3pqrs\u015btuvwxyz\u017a\u017c",
104
  "punctuations": "!'(),-.:;? ",
105
  "phonemes": null,
106
  "is_unique": true,
@@ -120,12 +120,10 @@
120
  "start_by_longest": false,
121
  "datasets": [
122
  {
123
- "name": "mailabs",
124
- "path": "./logs/pl_PL",
125
- "meta_file_train": "",
126
- "ignored_speakers": [
127
- "piotr_nater"
128
- ],
129
  "language": "pl",
130
  "meta_file_val": "",
131
  "meta_file_attn_mask": ""
@@ -134,7 +132,7 @@
134
  "test_sentences": [
135
  [
136
  "T\u0119cza, zjawisko optyczne i meteorologiczne, wyst\u0119puj\u0105ce w postaci charakterystycznego wielobarwnego \u0142uku.",
137
- "nina_brown",
138
  null,
139
  "pl"
140
  ]
@@ -146,7 +144,7 @@
146
  "use_language_weighted_sampler": true,
147
  "language_weighted_sampler_alpha": 1.0,
148
  "model_args": {
149
- "num_chars": 50,
150
  "out_channels": 513,
151
  "spec_segment_size": 32,
152
  "hidden_channels": 192,
@@ -165,38 +163,33 @@
165
  "resblock_type_decoder": "2",
166
  "resblock_kernel_sizes_decoder": [
167
  3,
168
- 7,
169
- 11
170
  ],
171
  "resblock_dilation_sizes_decoder": [
172
  [
173
  1,
174
- 3,
175
- 5
176
  ],
177
  [
178
- 1,
179
- 3,
180
- 5
181
  ],
182
  [
183
- 1,
184
  3,
185
- 5
186
  ]
187
  ],
188
  "upsample_rates_decoder": [
189
  8,
190
  8,
191
- 2,
192
- 2
193
  ],
194
- "upsample_initial_channel_decoder": 512,
195
  "upsample_kernel_sizes_decoder": [
196
  16,
197
  16,
198
- 4,
199
- 4
200
  ],
201
  "use_sdp": true,
202
  "noise_scale": 1.0,
 
45
  "use_grad_scaler": false,
46
  "cudnn_enable": true,
47
  "cudnn_deterministic": false,
48
+ "cudnn_benchmark": false,
49
  "training_seed": 54321,
50
  "model": "vits",
51
  "num_loader_workers": 8,
 
58
  "frame_shift_ms": null,
59
  "frame_length_ms": null,
60
  "stft_pad_mode": "reflect",
61
+ "sample_rate": 22050,
62
  "resample": false,
63
  "preemphasis": 0.0,
64
  "ref_level_db": 20,
 
78
  "do_amp_to_db_mel": true,
79
  "pitch_fmax": 640.0,
80
  "pitch_fmin": 0.0,
81
+ "signal_norm": true,
82
  "min_level_db": -100,
83
  "symmetric_norm": true,
84
  "max_norm": 4.0,
 
100
  "eos": "<EOS>",
101
  "bos": "<BOS>",
102
  "blank": "<BLNK>",
103
+ "characters": "abcdefghijklmnopqrstuvwxyz\u00af\u00b7\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u00ff\u0101\u0105\u0107\u0113\u0119\u011b\u012b\u0131\u0142\u0144\u014d\u0151\u0153\u015b\u016b\u0171\u017a\u017c\u01ce\u01d0\u01d2\u01d4\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u0451\u0454\u0456\u0457\u0491",
104
  "punctuations": "!'(),-.:;? ",
105
  "phonemes": null,
106
  "is_unique": true,
 
120
  "start_by_longest": false,
121
  "datasets": [
122
  {
123
+ "name": "common_voice",
124
+ "path": "./datasets/pl_CV/",
125
+ "meta_file_train": "clean.tsv",
126
+ "ignored_speakers": null,
 
 
127
  "language": "pl",
128
  "meta_file_val": "",
129
  "meta_file_attn_mask": ""
 
132
  "test_sentences": [
133
  [
134
  "T\u0119cza, zjawisko optyczne i meteorologiczne, wyst\u0119puj\u0105ce w postaci charakterystycznego wielobarwnego \u0142uku.",
135
+ "MCV_72a8725537f15ecfbd2aca0c342ee4ac0367ad23595ccd3d7efadb015af1af5e0614adb92ad6bf009711df662453a870c31467ca0ec71005a00aa82dc5e02a40",
136
  null,
137
  "pl"
138
  ]
 
144
  "use_language_weighted_sampler": true,
145
  "language_weighted_sampler_alpha": 1.0,
146
  "model_args": {
147
+ "num_chars": 129,
148
  "out_channels": 513,
149
  "spec_segment_size": 32,
150
  "hidden_channels": 192,
 
163
  "resblock_type_decoder": "2",
164
  "resblock_kernel_sizes_decoder": [
165
  3,
166
+ 5,
167
+ 7
168
  ],
169
  "resblock_dilation_sizes_decoder": [
170
  [
171
  1,
172
+ 2
 
173
  ],
174
  [
175
+ 2,
176
+ 6
 
177
  ],
178
  [
 
179
  3,
180
+ 12
181
  ]
182
  ],
183
  "upsample_rates_decoder": [
184
  8,
185
  8,
186
+ 4
 
187
  ],
188
+ "upsample_initial_channel_decoder": 256,
189
  "upsample_kernel_sizes_decoder": [
190
  16,
191
  16,
192
+ 8
 
193
  ],
194
  "use_sdp": true,
195
  "noise_scale": 1.0,
speaker_ids.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
  "pl": 0,
3
- "nina_brown": 0
4
  }
 
1
  {
2
  "pl": 0,
3
+ "female": 0
4
  }