Dionyssos commited on
Commit
9146509
·
1 Parent(s): dafcadc
Files changed (3) hide show
  1. README.md +4 -2
  2. demo.py +5 -13
  3. msinference.py +6 -6
README.md CHANGED
@@ -26,9 +26,11 @@ Expansion of [SHIFT TTS tool](https://github.com/audeering/shift) with [AudioGen
26
 
27
  ## Available Voices
28
 
29
- <a href="https://audeering.github.io/shift/">Native English voices!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6">Non-native English accents!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv">Foreign languages</a>
30
 
31
- [TTS Demo](https://huggingface.co/dkounadis/artificial-styletts2/blob/main/demo.py) save `demo.wav`
 
 
32
 
33
  ## API
34
 
 
26
 
27
  ## Available Voices
28
 
29
+ <a href="https://audeering.github.io/shift/">Native English!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6">Non-native English accents!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv">Foreign languages</a>
30
 
31
+ ##
32
+
33
+ [TTS Demo](https://huggingface.co/dkounadis/artificial-styletts2/blob/main/demo.py)
34
 
35
  ## API
36
 
demo.py CHANGED
@@ -4,7 +4,7 @@ import msinference
4
 
5
 
6
  def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.',
7
- voice='af_ZA/google-nwu_1919', # 'serbian', # 'en_US/vctk_low#p276', 'isl', 'abi',
8
  speed=1.4, # only for non-english
9
  affect = True # False = high clarity for partially sight
10
  ):
@@ -14,11 +14,11 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
14
 
15
  or
16
 
17
- voice : FOREIGN ACCENTS
18
 
19
  or
20
 
21
- voice : 'deu' # from LHS code -> https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv
22
  '''
23
 
24
  # StyleTTS2 - En
@@ -33,11 +33,7 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
33
  '_low', '') + '.wav')
34
 
35
  x = msinference.inference(text,
36
- style_vector,
37
- alpha=0.3,
38
- beta=0.7,
39
- diffusion_steps=7,
40
- embedding_scale=1)
41
 
42
  # mimic-3 format of voice (English text - Foreign accent)
43
 
@@ -48,11 +44,7 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
48
  '_low', '') + '.wav')
49
 
50
  x = msinference.inference(text,
51
- style_vector,
52
- alpha=0.3,
53
- beta=0.7,
54
- diffusion_steps=7,
55
- embedding_scale=1)
56
 
57
 
58
  # Fallback - MMS TTS - Non-English
 
4
 
5
 
6
  def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.',
7
+ voice='af_ZA_google-nwu_1919', # 'serbian', # 'en_US/vctk_low#p276', 'isl', 'abi',
8
  speed=1.4, # only for non-english
9
  affect = True # False = high clarity for partially sight
10
  ):
 
14
 
15
  or
16
 
17
+ voice : 'af_ZA_google-nwu_1919' # from english non-native accents -> https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6
18
 
19
  or
20
 
21
+ voice : 'deu' # foreign langs -> https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv
22
  '''
23
 
24
  # StyleTTS2 - En
 
33
  '_low', '') + '.wav')
34
 
35
  x = msinference.inference(text,
36
+ style_vector)
 
 
 
 
37
 
38
  # mimic-3 format of voice (English text - Foreign accent)
39
 
 
44
  '_low', '') + '.wav')
45
 
46
  x = msinference.inference(text,
47
+ style_vector)
 
 
 
 
48
 
49
 
50
  # Fallback - MMS TTS - Non-English
msinference.py CHANGED
@@ -169,12 +169,12 @@ sampler = DiffusionSampler(
169
  clamp=False
170
  )
171
 
172
- def inference(text,
173
- ref_s,
174
- alpha = 0.3,
175
- beta = 0.7,
176
- diffusion_steps=5,
177
- embedding_scale=1,
178
  use_gruut=False):
179
  text = text.strip()
180
  ps = global_phonemizer.phonemize([text])
 
169
  clamp=False
170
  )
171
 
172
+ def inference(text,
173
+ ref_s,
174
+ alpha = 0.3,
175
+ beta = 0.7,
176
+ diffusion_steps=7,
177
+ embedding_scale=1,
178
  use_gruut=False):
179
  text = text.strip()
180
  ps = global_phonemizer.phonemize([text])