ayymen commited on
Commit
ad86f4b
·
1 Parent(s): edbfc9a

Use multi-speaker model

Browse files
Files changed (3) hide show
  1. app.py +24 -13
  2. language_ids.json +4 -0
  3. speakers.pth +3 -0
app.py CHANGED
@@ -6,7 +6,7 @@ import torch
6
 
7
  CUDA = torch.cuda.is_available()
8
 
9
- REPO_ID = "ayymen/Coqui-TTS-Vits-shi"
10
 
11
  VOICE_CONVERSION_MODELS = {
12
  'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24',
@@ -14,26 +14,36 @@ VOICE_CONVERSION_MODELS = {
14
  'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2',
15
  }
16
 
 
 
 
 
17
  my_title = "ⴰⴹⵕⵉⵚ ⵙ ⵉⵎⵙⵍⵉ - Tamazight Text-to-Speech"
18
  my_description = "This model is based on [VITS](https://github.com/jaywalnut310/vits), thanks to 🐸 [Coqui.ai](https://coqui.ai/)."
19
 
20
  my_examples = [
21
- ["ⴰⵣⵓⵍ. ⵎⴰⵏⵣⴰⴽⵉⵏ?"],
22
- ["ⵡⴰ ⵜⴰⵎⵖⴰⵔⵜ ⵎⴰ ⴷ ⵓⴽⴰⵏ ⵜⵙⴽⵔⵜ?"],
23
- ["ⴳⵏ ⴰⴷ ⴰⴽ ⵉⵙⵙⴳⵏ ⵕⴱⴱⵉ ⵉⵜⵜⵓ ⴽ."],
24
- ["ⴰⵔⵔⴰⵡ ⵏ ⵍⵀⵎⵎ ⵢⵓⴽⵔ ⴰⵖ ⵉⵀⴷⵓⵎⵏ ⵏⵏⵖ!"]
 
 
 
 
25
  ]
26
 
27
  my_inputs = [
28
- gr.Textbox(lines=5, label="Input Text", placeholder="The only available characters are: ⴰⴱⴳⴷⴹⴻⴼⴽⵀⵃⵄⵅⵇⵉⵊⵍⵎⵏⵓⵔⵕⵖⵙⵚⵛⵜⵟⵡⵢⵣⵥⵯ !,.:?"),
29
- gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
30
- gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())),
31
- gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=True)
 
 
32
  ]
33
 
34
  my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
35
 
36
- best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth")
37
  config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
38
 
39
  api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
@@ -42,21 +52,22 @@ api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA
42
  for model in VOICE_CONVERSION_MODELS.values():
43
  api.load_vc_model_by_name(model, gpu=CUDA)
44
 
45
- def tts(text: str, speaker_wav: str = None, voice_cv_model: str = 'freevc24', split_sentences: bool = True):
46
  # replace oov characters
47
  text = text.replace("\n", ". ")
48
  text = text.replace("(", ",")
49
  text = text.replace(")", ",")
50
  text = text.replace('"', ",")
 
51
  text = text.replace(";", ",")
52
  text = text.replace("-", " ")
53
 
54
  with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
55
  if speaker_wav:
56
  api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA)
57
- api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences)
58
  else:
59
- api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences)
60
 
61
  return fp.name
62
 
 
6
 
7
  CUDA = torch.cuda.is_available()
8
 
9
+ REPO_ID = "ayymen/Coqui-TTS-Vits-Multispeaker"
10
 
11
  VOICE_CONVERSION_MODELS = {
12
  'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24',
 
14
  'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2',
15
  }
16
 
17
+ VARIANTS = {"Tachelhit": "shi", "Tarifit": "rif"}
18
+
19
+ SPEAKERS = ["yan", "sin", "idj"]
20
+
21
  my_title = "ⴰⴹⵕⵉⵚ ⵙ ⵉⵎⵙⵍⵉ - Tamazight Text-to-Speech"
22
  my_description = "This model is based on [VITS](https://github.com/jaywalnut310/vits), thanks to 🐸 [Coqui.ai](https://coqui.ai/)."
23
 
24
  my_examples = [
25
+ ["ⴰⵣⵓⵍ. ⵎⴰⵏⵣⴰⴽⵉⵏ?", "shi", "yan", True],
26
+ ["ⵡⴰ ⵜⴰⵎⵖⴰⵔⵜ ⵎⴰ ⴷ ⵓⴽⴰⵏ ⵜⵙⴽⵔⵜ?", "shi", "sin", False],
27
+ ["ⴳⵏ ⴰⴷ ⴰⴽ ⵉⵙⵙⴳⵏ ⵕⴱⴱⵉ ⵉⵜⵜⵓ ⴽ.", "shi", "yan", False],
28
+ ["ⴰⵔⵔⴰⵡ ⵏ ⵍⵀⵎⵎ ⵢⵓⴽⵔ ⴰⵖ ⵉⵀⴷⵓⵎⵏ ⵏⵏⵖ!", "shi", "yan", False],
29
+ ["ⴰⵣⵓⵍ. ⵎⴰⵎⵛ ⵜⴷⵊⵉⵜ?", "rif", "idj", True],
30
+ ["ⴰⵇⵎⵎⵓⵎ ⵉⵇⵏⴻⵏ ⵓⵔ ⵜ ⵜⵜⵉⴷⴼⵏ ⵉⵣⴰⵏ.", "rif", "idj", False],
31
+ ["ⵇⵇⵉⵎ ⵅ ⵜⴰⴷⴷⴰⵔⵜ ⵏⵏⵛ!", "rif", "idj", False],
32
+ ["ⵜⴻⵜⵜⵏ ⴰⴳ ⵡⵓⵛⵛⵏ, ⵜⵜⵔⵓⵏ ⵅ ⵓⵎⴽⵙⴰ.", "rif", "idj", False]
33
  ]
34
 
35
  my_inputs = [
36
+ gr.Textbox(lines=5, label="Input Text", placeholder="The only available characters are: ⴰⴱⴳⴷⴹⴻⴼⴽⵀⵃⵄⵅⵇⵉⵊⵍⵎⵏⵓⵔⵕⵖⵙⵚⵛⵜⵟⵡⵢⵣⵥⵯ !,.:?"),
37
+ gr.Dropdown(label="Variant", choices=list(VARIANTS.items()), value="shi"),
38
+ gr.Dropdown(label="Speaker", choices=SPEAKERS, value="yan"),
39
+ gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=False),
40
+ gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
41
+ gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())),
42
  ]
43
 
44
  my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
45
 
46
+ best_model_path = hf_hub_download(repo_id=REPO_ID, filename="checkpoint_390000.pth")
47
  config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
48
 
49
  api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
 
52
  for model in VOICE_CONVERSION_MODELS.values():
53
  api.load_vc_model_by_name(model, gpu=CUDA)
54
 
55
+ def tts(text: str, variant: str = "shi", speaker: str = "yan", split_sentences: bool = False, speaker_wav: str = None, voice_cv_model: str = 'freevc24'):
56
  # replace oov characters
57
  text = text.replace("\n", ". ")
58
  text = text.replace("(", ",")
59
  text = text.replace(")", ",")
60
  text = text.replace('"', ",")
61
+ text = text.replace("'", ",")
62
  text = text.replace(";", ",")
63
  text = text.replace("-", " ")
64
 
65
  with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
66
  if speaker_wav:
67
  api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA)
68
+ api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant)
69
  else:
70
+ api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant)
71
 
72
  return fp.name
73
 
language_ids.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "rif": 0,
3
+ "shi": 1
4
+ }
speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9df430489a8bf3eac98f38325dbdbd8d986fa731787724406062bacac5a471
3
+ size 864