DHEIVER commited on
Commit
2f8841d
ยท
verified ยท
1 Parent(s): 58e0199

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -112
app.py CHANGED
@@ -3,197 +3,253 @@ import torch
3
  import torchaudio
4
  import numpy as np
5
  from transformers import AutoProcessor, SeamlessM4Tv2Model
6
- from datetime import datetime
7
 
8
- class SeamlessTranslator:
9
  def __init__(self):
10
  self.model_name = "facebook/seamless-m4t-v2-large"
11
  print("Loading model...")
12
  self.processor = AutoProcessor.from_pretrained(self.model_name)
13
  self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name)
14
  self.sample_rate = self.model.config.sampling_rate
15
-
16
  self.languages = {
17
- "๐Ÿ‡บ๐Ÿ‡ธ English": "eng",
18
- "๐Ÿ‡ช๐Ÿ‡ธ Spanish": "spa",
19
- "๐Ÿ‡ซ๐Ÿ‡ท French": "fra",
20
- "๐Ÿ‡ฉ๐Ÿ‡ช German": "deu",
21
- "๐Ÿ‡ฎ๐Ÿ‡น Italian": "ita",
22
- "๐Ÿ‡ต๐Ÿ‡น Portuguese": "por",
23
- "๐Ÿ‡ท๐Ÿ‡บ Russian": "rus",
24
- "๐Ÿ‡จ๐Ÿ‡ณ Chinese": "cmn",
25
- "๐Ÿ‡ฏ๐Ÿ‡ต Japanese": "jpn",
26
- "๐Ÿ‡ฐ๐Ÿ‡ท Korean": "kor"
27
  }
28
 
29
  def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()):
30
- progress(0.3, desc="Processing input...")
31
  try:
 
32
  inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt")
33
- progress(0.6, desc="Generating audio...")
34
  audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
35
- progress(1.0, desc="Done!")
36
- return (self.sample_rate, audio_array), f"โœ… Translation completed: {src_lang} โ†’ {tgt_lang}"
37
  except Exception as e:
38
- raise gr.Error(f"โŒ Translation failed: {str(e)}")
39
 
40
  def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()):
41
- if audio_path is None:
42
- raise gr.Error("โŒ Please upload an audio file")
43
-
44
- progress(0.3, desc="Loading audio...")
45
  try:
 
46
  audio, orig_freq = torchaudio.load(audio_path)
47
  audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
48
 
49
  progress(0.6, desc="Translating...")
50
  inputs = self.processor(audios=audio, return_tensors="pt")
51
  audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
52
- progress(1.0, desc="Done!")
53
- return (self.sample_rate, audio_array), "โœ… Audio translation completed"
54
  except Exception as e:
55
- raise gr.Error(f"โŒ Translation failed: {str(e)}")
56
 
57
  css = """
 
 
 
 
 
 
 
 
 
58
  .gradio-container {
 
 
 
 
59
  max-width: 1200px !important;
60
- margin: auto !important;
 
61
  }
62
 
63
- .main-header {
64
  text-align: center;
65
- margin-bottom: 2rem;
66
- padding: 2rem;
67
- background: linear-gradient(135deg, #1e40af, #3b82f6);
68
- border-radius: 12px;
69
  color: white;
 
70
  }
71
 
72
- .main-title {
73
- font-size: 2.5rem;
74
- font-weight: bold;
75
- margin-bottom: 0.5rem;
76
  }
77
 
78
- .main-subtitle {
79
- font-size: 1.2rem;
80
  opacity: 0.9;
81
  }
82
 
83
- .container {
84
- padding: 1.5rem;
85
- border-radius: 12px;
86
  background: white;
87
- box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
88
- margin-bottom: 1.5rem;
 
 
 
 
 
 
 
 
89
  }
90
 
91
- .status-box {
92
- padding: 1rem;
93
- border-radius: 8px;
94
- background: #f0f9ff;
95
- border-left: 4px solid #3b82f6;
96
- margin-top: 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
 
99
  .footer {
100
  text-align: center;
101
- margin-top: 2rem;
102
- padding: 1rem;
103
- color: #666;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  }
105
  """
106
 
107
  def create_ui():
108
- translator = SeamlessTranslator()
109
 
110
- with gr.Blocks(css=css, title="A.R.I.S. Translator") as demo:
111
  gr.HTML(
112
  """
113
- <div class="main-header">
114
- <div class="main-title">A.R.I.S. Translator</div>
115
- <div class="main-subtitle">Advanced Real-time Interpretation System</div>
116
  </div>
117
  """
118
  )
119
 
120
  with gr.Tabs():
121
  # Text Translation Tab
122
- with gr.Tab("๐Ÿ”ค Text Translation"):
123
- with gr.Row():
124
- with gr.Column():
125
- text_input = gr.Textbox(
126
- label="Text to Translate",
127
- placeholder="Enter your text here...",
128
- lines=5
 
 
 
 
 
 
 
129
  )
130
- with gr.Row():
131
- src_lang = gr.Dropdown(
132
- choices=list(translator.languages.keys()),
133
- value="๐Ÿ‡บ๐Ÿ‡ธ English",
134
- label="Source Language"
135
- )
136
- tgt_lang = gr.Dropdown(
137
- choices=list(translator.languages.keys()),
138
- value="๐Ÿ‡ช๐Ÿ‡ธ Spanish",
139
- label="Target Language"
140
- )
141
- translate_btn = gr.Button("๐Ÿ”„ Translate", variant="primary")
142
- status_text = gr.Textbox(
143
- label="Status",
144
- interactive=False
145
- )
146
-
147
- with gr.Column():
148
- audio_output = gr.Audio(
149
- label="Translation Output",
150
- type="numpy"
151
  )
 
 
 
 
 
 
 
 
 
152
 
153
  # Audio Translation Tab
154
- with gr.Tab("๐ŸŽค Audio Translation"):
155
- with gr.Row():
156
- with gr.Column():
157
- audio_input = gr.Audio(
158
- label="Upload Audio",
159
- type="filepath"
160
- )
161
- tgt_lang_audio = gr.Dropdown(
162
- choices=list(translator.languages.keys()),
163
- value="๐Ÿ‡บ๐Ÿ‡ธ English",
164
- label="Target Language"
165
- )
166
- translate_audio_btn = gr.Button("๐Ÿ”„ Translate Audio", variant="primary")
167
- status_text_audio = gr.Textbox(
168
- label="Status",
169
- interactive=False
170
- )
171
-
172
- with gr.Column():
173
- audio_output_from_audio = gr.Audio(
174
- label="Translation Output",
175
- type="numpy"
176
- )
177
 
178
  gr.HTML(
179
  """
180
  <div class="footer">
181
- Powered by Meta's SeamlessM4T model | Built with Gradio
182
  </div>
183
  """
184
  )
185
 
186
  # Event handlers
187
  translate_btn.click(
188
- fn=translator.translate_text,
189
  inputs=[text_input, src_lang, tgt_lang],
190
- outputs=[audio_output, status_text]
191
  )
192
 
193
  translate_audio_btn.click(
194
- fn=translator.translate_audio,
195
  inputs=[audio_input, tgt_lang_audio],
196
- outputs=[audio_output_from_audio, status_text_audio]
197
  )
198
 
199
  return demo
 
3
  import torchaudio
4
  import numpy as np
5
  from transformers import AutoProcessor, SeamlessM4Tv2Model
 
6
 
7
+ class TranslationModel:
8
  def __init__(self):
9
  self.model_name = "facebook/seamless-m4t-v2-large"
10
  print("Loading model...")
11
  self.processor = AutoProcessor.from_pretrained(self.model_name)
12
  self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name)
13
  self.sample_rate = self.model.config.sampling_rate
14
+
15
  self.languages = {
16
+ "English": "eng",
17
+ "Spanish": "spa",
18
+ "French": "fra",
19
+ "German": "deu",
20
+ "Italian": "ita",
21
+ "Portuguese": "por",
22
+ "Russian": "rus",
23
+ "Chinese": "cmn",
24
+ "Japanese": "jpn",
25
+ "Korean": "kor"
26
  }
27
 
28
  def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()):
 
29
  try:
30
+ progress(0.3, desc="Processing...")
31
  inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt")
32
+ progress(0.6, desc="Generating...")
33
  audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
34
+ progress(1.0, desc="Complete")
35
+ return (self.sample_rate, audio_array)
36
  except Exception as e:
37
+ raise gr.Error(str(e))
38
 
39
  def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()):
40
+ if not audio_path:
41
+ raise gr.Error("Please upload an audio file")
 
 
42
  try:
43
+ progress(0.3, desc="Processing...")
44
  audio, orig_freq = torchaudio.load(audio_path)
45
  audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
46
 
47
  progress(0.6, desc="Translating...")
48
  inputs = self.processor(audios=audio, return_tensors="pt")
49
  audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
50
+ progress(1.0, desc="Complete")
51
+ return (self.sample_rate, audio_array)
52
  except Exception as e:
53
+ raise gr.Error(str(e))
54
 
55
  css = """
56
+ :root {
57
+ --primary-color: #2D3648;
58
+ --secondary-color: #5E6AD2;
59
+ --background-color: #F5F7FF;
60
+ --text-color: #2D3648;
61
+ --border-radius: 12px;
62
+ --spacing: 20px;
63
+ }
64
+
65
  .gradio-container {
66
+ background-color: var(--background-color) !important;
67
+ }
68
+
69
+ .main-container {
70
  max-width: 1200px !important;
71
+ margin: 0 auto !important;
72
+ padding: var(--spacing) !important;
73
  }
74
 
75
+ .app-header {
76
  text-align: center;
77
+ padding: 40px 20px;
78
+ background: linear-gradient(45deg, var(--primary-color), var(--secondary-color));
79
+ border-radius: var(--border-radius);
 
80
  color: white;
81
+ margin-bottom: var(--spacing);
82
  }
83
 
84
+ .app-title {
85
+ font-size: 2.5em;
86
+ font-weight: 700;
87
+ margin-bottom: 10px;
88
  }
89
 
90
+ .app-subtitle {
91
+ font-size: 1.2em;
92
  opacity: 0.9;
93
  }
94
 
95
+ .content-block {
 
 
96
  background: white;
97
+ padding: var(--spacing);
98
+ border-radius: var(--border-radius);
99
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
100
+ margin-bottom: var(--spacing);
101
+ }
102
+
103
+ .gr-button {
104
+ background: var(--secondary-color) !important;
105
+ border: none !important;
106
+ color: white !important;
107
  }
108
 
109
+ .gr-button:hover {
110
+ box-shadow: 0 4px 10px rgba(94, 106, 210, 0.3) !important;
111
+ transform: translateY(-1px);
112
+ }
113
+
114
+ .gr-input, .gr-select {
115
+ border-radius: 8px !important;
116
+ border: 2px solid #E5E7EB !important;
117
+ padding: 12px !important;
118
+ }
119
+
120
+ .gr-input:focus, .gr-select:focus {
121
+ border-color: var(--secondary-color) !important;
122
+ box-shadow: 0 0 0 3px rgba(94, 106, 210, 0.1) !important;
123
+ }
124
+
125
+ .gr-form {
126
+ background: white !important;
127
+ padding: var(--spacing) !important;
128
+ border-radius: var(--border-radius) !important;
129
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05) !important;
130
+ }
131
+
132
+ .gr-box {
133
+ border-radius: var(--border-radius) !important;
134
+ border: none !important;
135
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05) !important;
136
  }
137
 
138
  .footer {
139
  text-align: center;
140
+ color: var(--text-color);
141
+ padding: var(--spacing);
142
+ opacity: 0.8;
143
+ }
144
+
145
+ /* Custom Tabs Styling */
146
+ .tab-nav {
147
+ background: white !important;
148
+ padding: 10px !important;
149
+ border-radius: var(--border-radius) !important;
150
+ margin-bottom: var(--spacing) !important;
151
+ }
152
+
153
+ .tab-nav button {
154
+ border-radius: 8px !important;
155
+ padding: 12px 24px !important;
156
+ }
157
+
158
+ .tab-nav button.selected {
159
+ background: var(--secondary-color) !important;
160
+ color: white !important;
161
  }
162
  """
163
 
164
  def create_ui():
165
+ model = TranslationModel()
166
 
167
+ with gr.Blocks(css=css, title="AI Language Translator") as demo:
168
  gr.HTML(
169
  """
170
+ <div class="app-header">
171
+ <div class="app-title">AI Language Translator</div>
172
+ <div class="app-subtitle">Powered by Neural Machine Translation</div>
173
  </div>
174
  """
175
  )
176
 
177
  with gr.Tabs():
178
  # Text Translation Tab
179
+ with gr.Tab("Text to Speech"):
180
+ with gr.Column(variant="panel"):
181
+ gr.Markdown("### Enter Text")
182
+ text_input = gr.Textbox(
183
+ label="",
184
+ placeholder="Type or paste your text here...",
185
+ lines=4
186
+ )
187
+
188
+ with gr.Row():
189
+ src_lang = gr.Dropdown(
190
+ choices=sorted(model.languages.keys()),
191
+ value="English",
192
+ label="From"
193
  )
194
+ tgt_lang = gr.Dropdown(
195
+ choices=sorted(model.languages.keys()),
196
+ value="Spanish",
197
+ label="To"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  )
199
+
200
+ translate_btn = gr.Button("Translate", size="lg")
201
+
202
+ gr.Markdown("### Translation Output")
203
+ audio_output = gr.Audio(
204
+ label="",
205
+ type="numpy",
206
+ show_download_button=True
207
+ )
208
 
209
  # Audio Translation Tab
210
+ with gr.Tab("Speech to Speech"):
211
+ with gr.Column(variant="panel"):
212
+ gr.Markdown("### Upload Audio")
213
+ audio_input = gr.Audio(
214
+ label="",
215
+ type="filepath",
216
+ sources=["upload", "microphone"]
217
+ )
218
+
219
+ tgt_lang_audio = gr.Dropdown(
220
+ choices=sorted(model.languages.keys()),
221
+ value="English",
222
+ label="Translate to"
223
+ )
224
+
225
+ translate_audio_btn = gr.Button("Translate Audio", size="lg")
226
+
227
+ gr.Markdown("### Translation Output")
228
+ audio_output_from_audio = gr.Audio(
229
+ label="",
230
+ type="numpy",
231
+ show_download_button=True
232
+ )
233
 
234
  gr.HTML(
235
  """
236
  <div class="footer">
237
+ Built with โค๏ธ using Meta's SeamlessM4T and Gradio
238
  </div>
239
  """
240
  )
241
 
242
  # Event handlers
243
  translate_btn.click(
244
+ fn=model.translate_text,
245
  inputs=[text_input, src_lang, tgt_lang],
246
+ outputs=audio_output
247
  )
248
 
249
  translate_audio_btn.click(
250
+ fn=model.translate_audio,
251
  inputs=[audio_input, tgt_lang_audio],
252
+ outputs=audio_output_from_audio
253
  )
254
 
255
  return demo