Spaces:
Running
Running
feat: worked vixtts
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import torch
|
|
6 |
import torchaudio
|
7 |
|
8 |
# download for mecab
|
9 |
-
|
10 |
|
11 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
12 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
@@ -394,159 +394,6 @@ Supported languages: Arabic: ar, Brazilian Portuguese: pt , Mandarin Chinese: zh
|
|
394 |
article = """
|
395 |
|
396 |
"""
|
397 |
-
examples = [
|
398 |
-
[
|
399 |
-
"Once when I was six years old I saw a magnificent picture",
|
400 |
-
"en",
|
401 |
-
"examples/female.wav",
|
402 |
-
None,
|
403 |
-
False,
|
404 |
-
False,
|
405 |
-
False,
|
406 |
-
True,
|
407 |
-
],
|
408 |
-
[
|
409 |
-
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
410 |
-
"fr",
|
411 |
-
"examples/male.wav",
|
412 |
-
None,
|
413 |
-
False,
|
414 |
-
False,
|
415 |
-
False,
|
416 |
-
True,
|
417 |
-
],
|
418 |
-
[
|
419 |
-
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
420 |
-
"de",
|
421 |
-
"examples/female.wav",
|
422 |
-
None,
|
423 |
-
False,
|
424 |
-
False,
|
425 |
-
False,
|
426 |
-
True,
|
427 |
-
],
|
428 |
-
[
|
429 |
-
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
430 |
-
"es",
|
431 |
-
"examples/male.wav",
|
432 |
-
None,
|
433 |
-
False,
|
434 |
-
False,
|
435 |
-
False,
|
436 |
-
True,
|
437 |
-
],
|
438 |
-
[
|
439 |
-
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
440 |
-
"pt",
|
441 |
-
"examples/female.wav",
|
442 |
-
None,
|
443 |
-
False,
|
444 |
-
False,
|
445 |
-
False,
|
446 |
-
True,
|
447 |
-
],
|
448 |
-
[
|
449 |
-
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
450 |
-
"pl",
|
451 |
-
"examples/male.wav",
|
452 |
-
None,
|
453 |
-
False,
|
454 |
-
False,
|
455 |
-
False,
|
456 |
-
True,
|
457 |
-
],
|
458 |
-
[
|
459 |
-
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
460 |
-
"it",
|
461 |
-
"examples/female.wav",
|
462 |
-
None,
|
463 |
-
False,
|
464 |
-
False,
|
465 |
-
False,
|
466 |
-
True,
|
467 |
-
],
|
468 |
-
[
|
469 |
-
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
470 |
-
"tr",
|
471 |
-
"examples/female.wav",
|
472 |
-
None,
|
473 |
-
False,
|
474 |
-
False,
|
475 |
-
False,
|
476 |
-
True,
|
477 |
-
],
|
478 |
-
[
|
479 |
-
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
480 |
-
"ru",
|
481 |
-
"examples/female.wav",
|
482 |
-
None,
|
483 |
-
False,
|
484 |
-
False,
|
485 |
-
False,
|
486 |
-
True,
|
487 |
-
],
|
488 |
-
[
|
489 |
-
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
490 |
-
"nl",
|
491 |
-
"examples/male.wav",
|
492 |
-
None,
|
493 |
-
False,
|
494 |
-
False,
|
495 |
-
False,
|
496 |
-
True,
|
497 |
-
],
|
498 |
-
[
|
499 |
-
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
500 |
-
"cs",
|
501 |
-
"examples/female.wav",
|
502 |
-
None,
|
503 |
-
False,
|
504 |
-
False,
|
505 |
-
False,
|
506 |
-
True,
|
507 |
-
],
|
508 |
-
[
|
509 |
-
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
510 |
-
"zh-cn",
|
511 |
-
"examples/female.wav",
|
512 |
-
None,
|
513 |
-
False,
|
514 |
-
False,
|
515 |
-
False,
|
516 |
-
True,
|
517 |
-
],
|
518 |
-
[
|
519 |
-
"かつて 六歳のとき、素晴らしい絵を見ました",
|
520 |
-
"ja",
|
521 |
-
"examples/female.wav",
|
522 |
-
None,
|
523 |
-
False,
|
524 |
-
True,
|
525 |
-
False,
|
526 |
-
True,
|
527 |
-
],
|
528 |
-
[
|
529 |
-
"한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
|
530 |
-
"ko",
|
531 |
-
"examples/female.wav",
|
532 |
-
None,
|
533 |
-
False,
|
534 |
-
True,
|
535 |
-
False,
|
536 |
-
True,
|
537 |
-
],
|
538 |
-
[
|
539 |
-
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
540 |
-
"hu",
|
541 |
-
"examples/male.wav",
|
542 |
-
None,
|
543 |
-
False,
|
544 |
-
True,
|
545 |
-
False,
|
546 |
-
True,
|
547 |
-
],
|
548 |
-
]
|
549 |
-
|
550 |
|
551 |
with gr.Blocks(analytics_enabled=False) as demo:
|
552 |
with gr.Row():
|
@@ -601,7 +448,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
601 |
label="Reference Audio",
|
602 |
info="Click on the ✎ button to upload your own target speaker audio",
|
603 |
type="filepath",
|
604 |
-
value="
|
605 |
)
|
606 |
mic_gr = gr.Audio(
|
607 |
source="microphone",
|
@@ -638,25 +485,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
638 |
out_text_gr = gr.Text(label="Metrics")
|
639 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
640 |
|
641 |
-
with gr.Row():
|
642 |
-
gr.Examples(
|
643 |
-
examples,
|
644 |
-
label="Examples",
|
645 |
-
inputs=[
|
646 |
-
input_text_gr,
|
647 |
-
language_gr,
|
648 |
-
ref_gr,
|
649 |
-
mic_gr,
|
650 |
-
use_mic_gr,
|
651 |
-
clean_ref_gr,
|
652 |
-
auto_det_lang_gr,
|
653 |
-
tos_gr,
|
654 |
-
],
|
655 |
-
outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr],
|
656 |
-
fn=predict,
|
657 |
-
cache_examples=False,
|
658 |
-
)
|
659 |
-
|
660 |
tts_button.click(
|
661 |
predict,
|
662 |
[
|
|
|
6 |
import torchaudio
|
7 |
|
8 |
# download for mecab
|
9 |
+
os.system("python -m unidic download")
|
10 |
|
11 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
12 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
394 |
article = """
|
395 |
|
396 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
with gr.Blocks(analytics_enabled=False) as demo:
|
399 |
with gr.Row():
|
|
|
448 |
label="Reference Audio",
|
449 |
info="Click on the ✎ button to upload your own target speaker audio",
|
450 |
type="filepath",
|
451 |
+
value="model/samples/nu-luu-loat.wav",
|
452 |
)
|
453 |
mic_gr = gr.Audio(
|
454 |
source="microphone",
|
|
|
485 |
out_text_gr = gr.Text(label="Metrics")
|
486 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
tts_button.click(
|
489 |
predict,
|
490 |
[
|