dkounadis
/

artificial-styletts2

@@ -26,9 +26,11 @@ Expansion of [SHIFT TTS tool](https://github.com/audeering/shift) with [AudioGen
 ## Available Voices
-<a href="https://audeering.github.io/shift/">Native English voices!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6">Non-native English accents!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv">Foreign languages</a>
-[TTS Demo](https://huggingface.co/dkounadis/artificial-styletts2/blob/main/demo.py) save `demo.wav`
 ## API

 ## Available Voices
+<a href="https://audeering.github.io/shift/">Native English!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6">Non-native English accents!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv">Foreign languages</a>
+##
+[TTS Demo](https://huggingface.co/dkounadis/artificial-styletts2/blob/main/demo.py)
 ## API

demo.py CHANGED Viewed

@@ -4,7 +4,7 @@ import msinference
 def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.',
-              voice='af_ZA/google-nwu_1919',  # 'serbian', # 'en_US/vctk_low#p276', 'isl', 'abi',
               speed=1.4,  # only for non-english
               affect = True  # False = high clarity for partially sight
               ):
@@ -14,11 +14,11 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
           or
-       voice : FOREIGN ACCENTS
           or
-       voice : 'deu'  # from LHS code -> https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv
        '''
     # StyleTTS2 - En
@@ -33,11 +33,7 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
                                                         '_low', '') + '.wav')
         x = msinference.inference(text,
-                                    style_vector,
-                                    alpha=0.3,
-                                    beta=0.7,
-                                    diffusion_steps=7,
-                                    embedding_scale=1)
     # mimic-3 format of voice (English text - Foreign accent)
@@ -48,11 +44,7 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
                                                         '_low', '') + '.wav')
         x = msinference.inference(text,
-                                    style_vector,
-                                    alpha=0.3,
-                                    beta=0.7,
-                                    diffusion_steps=7,
-                                    embedding_scale=1)
     # Fallback - MMS TTS - Non-English

 def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.',
+              voice='af_ZA_google-nwu_1919',  # 'serbian', # 'en_US/vctk_low#p276', 'isl', 'abi',
               speed=1.4,  # only for non-english
               affect = True  # False = high clarity for partially sight
               ):
           or
+       voice : 'af_ZA_google-nwu_1919' # from english non-native accents -> https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6
           or
+       voice : 'deu'  # foreign langs -> https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv
        '''
     # StyleTTS2 - En
                                                         '_low', '') + '.wav')
         x = msinference.inference(text,
+                                    style_vector)
     # mimic-3 format of voice (English text - Foreign accent)
                                                         '_low', '') + '.wav')
         x = msinference.inference(text,
+                                    style_vector)
     # Fallback - MMS TTS - Non-English

msinference.py CHANGED Viewed

@@ -169,12 +169,12 @@ sampler = DiffusionSampler(
     clamp=False
 )
-def inference(text,
-              ref_s,
-              alpha = 0.3,
-              beta = 0.7,
-              diffusion_steps=5,
-              embedding_scale=1,
               use_gruut=False):
     text = text.strip()
     ps = global_phonemizer.phonemize([text])

     clamp=False
 )
+def inference(text,
+              ref_s,
+              alpha = 0.3,
+              beta = 0.7,
+              diffusion_steps=7,
+              embedding_scale=1,
               use_gruut=False):
     text = text.strip()
     ps = global_phonemizer.phonemize([text])