fantos commited on
Commit
bb90c13
·
verified ·
1 Parent(s): 5de3ec5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -40
app.py CHANGED
@@ -109,6 +109,20 @@ custom_css = """
109
  border-radius: 8px;
110
  margin: 1rem 0;
111
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  """
113
 
114
  # Create the Gradio interface with 3D styling
@@ -118,60 +132,65 @@ with gr.Blocks(css=custom_css) as demo:
118
  error_box = gr.Textbox(label="Error Messages", visible=False, elem_classes="error-box")
119
 
120
  with gr.Row(elem_classes="container"):
121
- with gr.Column():
122
- # Speaker selection with 3D styling
123
- speaker_dropdown = gr.Dropdown(
124
- choices=get_available_speakers(),
125
- value="en_male_1",
126
- label="Speaker Selection",
127
- elem_classes="input-group"
128
- )
129
-
130
  text_input = gr.Textbox(
131
  label="Text to Synthesize",
132
  placeholder="Enter text here...",
133
- elem_classes="input-group"
134
- )
135
-
136
- temperature = gr.Slider(
137
- 0.1, 1.0,
138
- value=0.1,
139
- label="Temperature (lower = more stable tone, higher = more expressive)",
140
- elem_classes="slider-3d"
141
- )
142
-
143
- repetition_penalty = gr.Slider(
144
- 0.5, 2.0,
145
- value=1.1,
146
- label="Repetition Penalty",
147
- elem_classes="slider-3d"
148
- )
149
-
150
- gr.Markdown("""
151
- ### Voice Cloning Guidelines:
152
- - Use around 7-10 seconds of clear, noise-free audio
153
- - For transcription interface will use Whisper turbo to transcribe the audio file
154
- - Longer audio clips will reduce maximum output length
155
- - Custom speaker overrides speaker selection
156
- """, elem_classes="input-group")
157
-
158
- reference_audio = gr.Audio(
159
- label="Reference Audio (for voice cloning)",
160
- type="filepath",
161
- elem_classes="input-group"
162
  )
163
 
164
  submit_button = gr.Button(
165
  "Generate Speech",
166
  elem_classes="button-3d"
167
  )
168
-
169
- with gr.Column():
 
 
170
  audio_output = gr.Audio(
171
  label="Generated Audio",
172
  type="filepath",
173
  elem_classes="input-group"
174
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  submit_button.click(
177
  fn=generate_tts,
 
109
  border-radius: 8px;
110
  margin: 1rem 0;
111
  }
112
+
113
+ .right-column {
114
+ display: flex;
115
+ flex-direction: column;
116
+ gap: 1rem;
117
+ }
118
+
119
+ .options-panel {
120
+ margin-top: 2rem;
121
+ background: linear-gradient(145deg, #f3f4f6, #ffffff);
122
+ border-radius: 15px;
123
+ padding: 1.5rem;
124
+ box-shadow: 5px 5px 10px #d1d1d1, -5px -5px 10px #ffffff;
125
+ }
126
  """
127
 
128
  # Create the Gradio interface with 3D styling
 
132
  error_box = gr.Textbox(label="Error Messages", visible=False, elem_classes="error-box")
133
 
134
  with gr.Row(elem_classes="container"):
135
+ # Left column for text input
136
+ with gr.Column(scale=1):
 
 
 
 
 
 
 
137
  text_input = gr.Textbox(
138
  label="Text to Synthesize",
139
  placeholder="Enter text here...",
140
+ elem_classes="input-group",
141
+ lines=5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  )
143
 
144
  submit_button = gr.Button(
145
  "Generate Speech",
146
  elem_classes="button-3d"
147
  )
148
+
149
+ # Right column for output and options
150
+ with gr.Column(scale=1, elem_classes="right-column"):
151
+ # Audio output at the top
152
  audio_output = gr.Audio(
153
  label="Generated Audio",
154
  type="filepath",
155
  elem_classes="input-group"
156
  )
157
+
158
+ # Options panel below the output
159
+ with gr.Box(elem_classes="options-panel"):
160
+ speaker_dropdown = gr.Dropdown(
161
+ choices=get_available_speakers(),
162
+ value="en_male_1",
163
+ label="Speaker Selection",
164
+ elem_classes="input-group"
165
+ )
166
+
167
+ temperature = gr.Slider(
168
+ 0.1, 1.0,
169
+ value=0.1,
170
+ label="Temperature (lower = more stable tone, higher = more expressive)",
171
+ elem_classes="slider-3d"
172
+ )
173
+
174
+ repetition_penalty = gr.Slider(
175
+ 0.5, 2.0,
176
+ value=1.1,
177
+ label="Repetition Penalty",
178
+ elem_classes="slider-3d"
179
+ )
180
+
181
+ reference_audio = gr.Audio(
182
+ label="Reference Audio (for voice cloning)",
183
+ type="filepath",
184
+ elem_classes="input-group"
185
+ )
186
+
187
+ gr.Markdown("""
188
+ ### Voice Cloning Guidelines:
189
+ - Use around 7-10 seconds of clear, noise-free audio
190
+ - For transcription interface will use Whisper turbo to transcribe the audio file
191
+ - Longer audio clips will reduce maximum output length
192
+ - Custom speaker overrides speaker selection
193
+ """, elem_classes="input-group")
194
 
195
  submit_button.click(
196
  fn=generate_tts,