Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -109,6 +109,20 @@ custom_css = """
|
|
109 |
border-radius: 8px;
|
110 |
margin: 1rem 0;
|
111 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
"""
|
113 |
|
114 |
# Create the Gradio interface with 3D styling
|
@@ -118,60 +132,65 @@ with gr.Blocks(css=custom_css) as demo:
|
|
118 |
error_box = gr.Textbox(label="Error Messages", visible=False, elem_classes="error-box")
|
119 |
|
120 |
with gr.Row(elem_classes="container"):
|
121 |
-
|
122 |
-
|
123 |
-
speaker_dropdown = gr.Dropdown(
|
124 |
-
choices=get_available_speakers(),
|
125 |
-
value="en_male_1",
|
126 |
-
label="Speaker Selection",
|
127 |
-
elem_classes="input-group"
|
128 |
-
)
|
129 |
-
|
130 |
text_input = gr.Textbox(
|
131 |
label="Text to Synthesize",
|
132 |
placeholder="Enter text here...",
|
133 |
-
elem_classes="input-group"
|
134 |
-
|
135 |
-
|
136 |
-
temperature = gr.Slider(
|
137 |
-
0.1, 1.0,
|
138 |
-
value=0.1,
|
139 |
-
label="Temperature (lower = more stable tone, higher = more expressive)",
|
140 |
-
elem_classes="slider-3d"
|
141 |
-
)
|
142 |
-
|
143 |
-
repetition_penalty = gr.Slider(
|
144 |
-
0.5, 2.0,
|
145 |
-
value=1.1,
|
146 |
-
label="Repetition Penalty",
|
147 |
-
elem_classes="slider-3d"
|
148 |
-
)
|
149 |
-
|
150 |
-
gr.Markdown("""
|
151 |
-
### Voice Cloning Guidelines:
|
152 |
-
- Use around 7-10 seconds of clear, noise-free audio
|
153 |
-
- For transcription interface will use Whisper turbo to transcribe the audio file
|
154 |
-
- Longer audio clips will reduce maximum output length
|
155 |
-
- Custom speaker overrides speaker selection
|
156 |
-
""", elem_classes="input-group")
|
157 |
-
|
158 |
-
reference_audio = gr.Audio(
|
159 |
-
label="Reference Audio (for voice cloning)",
|
160 |
-
type="filepath",
|
161 |
-
elem_classes="input-group"
|
162 |
)
|
163 |
|
164 |
submit_button = gr.Button(
|
165 |
"Generate Speech",
|
166 |
elem_classes="button-3d"
|
167 |
)
|
168 |
-
|
169 |
-
|
|
|
|
|
170 |
audio_output = gr.Audio(
|
171 |
label="Generated Audio",
|
172 |
type="filepath",
|
173 |
elem_classes="input-group"
|
174 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
submit_button.click(
|
177 |
fn=generate_tts,
|
|
|
109 |
border-radius: 8px;
|
110 |
margin: 1rem 0;
|
111 |
}
|
112 |
+
|
113 |
+
.right-column {
|
114 |
+
display: flex;
|
115 |
+
flex-direction: column;
|
116 |
+
gap: 1rem;
|
117 |
+
}
|
118 |
+
|
119 |
+
.options-panel {
|
120 |
+
margin-top: 2rem;
|
121 |
+
background: linear-gradient(145deg, #f3f4f6, #ffffff);
|
122 |
+
border-radius: 15px;
|
123 |
+
padding: 1.5rem;
|
124 |
+
box-shadow: 5px 5px 10px #d1d1d1, -5px -5px 10px #ffffff;
|
125 |
+
}
|
126 |
"""
|
127 |
|
128 |
# Create the Gradio interface with 3D styling
|
|
|
132 |
error_box = gr.Textbox(label="Error Messages", visible=False, elem_classes="error-box")
|
133 |
|
134 |
with gr.Row(elem_classes="container"):
|
135 |
+
# Left column for text input
|
136 |
+
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
text_input = gr.Textbox(
|
138 |
label="Text to Synthesize",
|
139 |
placeholder="Enter text here...",
|
140 |
+
elem_classes="input-group",
|
141 |
+
lines=5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
)
|
143 |
|
144 |
submit_button = gr.Button(
|
145 |
"Generate Speech",
|
146 |
elem_classes="button-3d"
|
147 |
)
|
148 |
+
|
149 |
+
# Right column for output and options
|
150 |
+
with gr.Column(scale=1, elem_classes="right-column"):
|
151 |
+
# Audio output at the top
|
152 |
audio_output = gr.Audio(
|
153 |
label="Generated Audio",
|
154 |
type="filepath",
|
155 |
elem_classes="input-group"
|
156 |
)
|
157 |
+
|
158 |
+
# Options panel below the output
|
159 |
+
with gr.Box(elem_classes="options-panel"):
|
160 |
+
speaker_dropdown = gr.Dropdown(
|
161 |
+
choices=get_available_speakers(),
|
162 |
+
value="en_male_1",
|
163 |
+
label="Speaker Selection",
|
164 |
+
elem_classes="input-group"
|
165 |
+
)
|
166 |
+
|
167 |
+
temperature = gr.Slider(
|
168 |
+
0.1, 1.0,
|
169 |
+
value=0.1,
|
170 |
+
label="Temperature (lower = more stable tone, higher = more expressive)",
|
171 |
+
elem_classes="slider-3d"
|
172 |
+
)
|
173 |
+
|
174 |
+
repetition_penalty = gr.Slider(
|
175 |
+
0.5, 2.0,
|
176 |
+
value=1.1,
|
177 |
+
label="Repetition Penalty",
|
178 |
+
elem_classes="slider-3d"
|
179 |
+
)
|
180 |
+
|
181 |
+
reference_audio = gr.Audio(
|
182 |
+
label="Reference Audio (for voice cloning)",
|
183 |
+
type="filepath",
|
184 |
+
elem_classes="input-group"
|
185 |
+
)
|
186 |
+
|
187 |
+
gr.Markdown("""
|
188 |
+
### Voice Cloning Guidelines:
|
189 |
+
- Use around 7-10 seconds of clear, noise-free audio
|
190 |
+
- For transcription interface will use Whisper turbo to transcribe the audio file
|
191 |
+
- Longer audio clips will reduce maximum output length
|
192 |
+
- Custom speaker overrides speaker selection
|
193 |
+
""", elem_classes="input-group")
|
194 |
|
195 |
submit_button.click(
|
196 |
fn=generate_tts,
|