fantos commited on
Commit
de41fea
·
verified ·
1 Parent(s): 736ae1b

Upload 3 files

Browse files
Files changed (3) hide show
  1. app (31).py +130 -0
  2. packages (4).txt +3 -0
  3. requirements (12).txt +2 -0
app (31).py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import outetts
4
+ from outetts.version.v2.interface import _DEFAULT_SPEAKERS
5
+ import torch
6
+ import spaces
7
+
8
+ def get_available_speakers():
9
+ speakers = list(_DEFAULT_SPEAKERS.keys())
10
+ return speakers
11
+
12
+ @spaces.GPU
13
+ def generate_tts(
14
+ text, temperature, repetition_penalty,
15
+ speaker_selection, reference_audio
16
+ ):
17
+
18
+ model_config = outetts.HFModelConfig_v2(
19
+ model_path="OuteAI/OuteTTS-0.3-1B",
20
+ tokenizer_path="OuteAI/OuteTTS-0.3-1B",
21
+ dtype=torch.bfloat16,
22
+ device="cuda"
23
+ )
24
+ interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config)
25
+
26
+ """Generate TTS with error handling and new features."""
27
+ try:
28
+ # Validate inputs for custom speaker
29
+ if reference_audio:
30
+ speaker = interface.create_speaker(reference_audio)
31
+
32
+ # Use selected default speaker
33
+ elif speaker_selection and speaker_selection != "None":
34
+ speaker = interface.load_default_speaker(speaker_selection)
35
+
36
+ # No speaker - random characteristics
37
+ else:
38
+ speaker = None
39
+
40
+ gen_cfg = outetts.GenerationConfig(
41
+ text=text,
42
+ temperature=temperature,
43
+ repetition_penalty=repetition_penalty,
44
+ max_length=4096,
45
+ speaker=speaker,
46
+ )
47
+ output = interface.generate(config=gen_cfg)
48
+
49
+ # Verify output
50
+ if output.audio is None:
51
+ raise ValueError("Model failed to generate audio. This may be due to input length constraints or early EOS token.")
52
+
53
+ # Save and return output
54
+ output_path = "output.wav"
55
+ output.save(output_path)
56
+ return output_path, None
57
+
58
+ except Exception as e:
59
+ return None, str(e)
60
+
61
+ with gr.Blocks() as demo:
62
+ gr.Markdown("# OuteTTS-0.3-1B Text-to-Speech Demo")
63
+
64
+ error_box = gr.Textbox(label="Error Messages", visible=False)
65
+
66
+ with gr.Row():
67
+ with gr.Column():
68
+
69
+ # Speaker selection
70
+ speaker_dropdown = gr.Dropdown(
71
+ choices=get_available_speakers(),
72
+ value="en_male_1",
73
+ label="Speaker Selection"
74
+ )
75
+
76
+ text_input = gr.Textbox(
77
+ label="Text to Synthesize",
78
+ placeholder="Enter text here..."
79
+ )
80
+
81
+ temperature = gr.Slider(
82
+ 0.1, 1.0,
83
+ value=0.1,
84
+ label="Temperature (lower = more stable tone, higher = more expressive)"
85
+ )
86
+
87
+ repetition_penalty = gr.Slider(
88
+ 0.5, 2.0,
89
+ value=1.1,
90
+ label="Repetition Penalty"
91
+ )
92
+
93
+ gr.Markdown("""
94
+ ### Voice Cloning Guidelines:
95
+ - Use around 7-10 seconds of clear, noise-free audio
96
+ - For transcription interface will use Whisper turbo to transcribe the audio file
97
+ - Longer audio clips will reduce maximum output length
98
+ - Custom speaker overrides speaker selection
99
+ """)
100
+
101
+ reference_audio = gr.Audio(
102
+ label="Reference Audio (for voice cloning)",
103
+ type="filepath"
104
+ )
105
+
106
+ submit_button = gr.Button("Generate Speech")
107
+
108
+ with gr.Column():
109
+ audio_output = gr.Audio(
110
+ label="Generated Audio",
111
+ type="filepath"
112
+ )
113
+
114
+ submit_button.click(
115
+ fn=generate_tts,
116
+ inputs=[
117
+ text_input,
118
+ temperature,
119
+ repetition_penalty,
120
+ speaker_dropdown,
121
+ reference_audio,
122
+ ],
123
+ outputs=[audio_output, error_box]
124
+ ).then(
125
+ fn=lambda x: gr.update(visible=bool(x)),
126
+ inputs=[error_box],
127
+ outputs=[error_box]
128
+ )
129
+
130
+ demo.launch()
packages (4).txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ libasound2-dev
2
+ portaudio19-dev
3
+ ffmpeg
requirements (12).txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ outetts==0.3.0