DHEIVER commited on
Commit
aa1a596
·
verified ·
1 Parent(s): a9824c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -185
app.py CHANGED
@@ -5,17 +5,12 @@ import numpy as np
5
  from transformers import AutoProcessor, SeamlessM4Tv2Model
6
  from datetime import datetime
7
  import time
8
- import threading
9
- import queue
10
- import sounddevice as sd
11
 
12
  class ARISTranslator:
13
  def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"):
14
  self.processor = AutoProcessor.from_pretrained(model_name)
15
  self.model = SeamlessM4Tv2Model.from_pretrained(model_name)
16
  self.sample_rate = self.model.config.sampling_rate
17
- self.audio_queue = queue.Queue()
18
- self.is_recording = False
19
 
20
  self.language_codes = {
21
  "English (US)": "eng",
@@ -32,31 +27,27 @@ class ARISTranslator:
32
  "Arabic (AR)": "ara"
33
  }
34
 
35
- def start_recording(self):
36
- self.is_recording = True
37
- threading.Thread(target=self._record_audio).start()
38
-
39
- def stop_recording(self):
40
- self.is_recording = False
41
-
42
- def _record_audio(self):
43
- with sd.InputStream(channels=1, samplerate=16000, callback=self._audio_callback):
44
- while self.is_recording:
45
- time.sleep(0.1)
46
-
47
- def _audio_callback(self, indata, frames, time, status):
48
- self.audio_queue.put(indata.copy())
49
-
50
- def translate_realtime(self, audio_chunk, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
51
  try:
52
- inputs = self.processor(audios=audio_chunk, return_tensors="pt")
 
 
 
 
 
 
 
 
53
  audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
54
  return self.sample_rate, audio_array
55
  except Exception as e:
56
- raise gr.Error(f"Translation failed: {str(e)}")
57
 
58
  def translate_text(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
59
  try:
 
 
 
60
  inputs = self.processor(text=text, src_lang=self.language_codes[src_lang], return_tensors="pt")
61
  audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
62
  return self.sample_rate, audio_array
@@ -64,7 +55,6 @@ class ARISTranslator:
64
  raise gr.Error(f"Translation failed: {str(e)}")
65
 
66
  css = """
67
- /* Cores e temas da interface */
68
  :root {
69
  --primary: #00ffff;
70
  --secondary: #0066cc;
@@ -104,7 +94,6 @@ css = """
104
  margin: 5px 0;
105
  }
106
 
107
- /* Sistema de anéis central */
108
  #status-ring {
109
  width: 400px;
110
  height: 400px;
@@ -130,56 +119,17 @@ css = """
130
  animation: rotate 20s linear infinite;
131
  }
132
 
133
- #inner-ring {
134
- width: 300px;
135
- height: 300px;
136
- border: 2px solid var(--primary);
137
- border-radius: 50%;
138
- display: flex;
139
- align-items: center;
140
- justify-content: center;
141
- position: relative;
142
- }
143
-
144
- #core {
145
- width: 200px;
146
- height: 200px;
147
- border: 3px solid var(--primary);
148
- border-radius: 50%;
149
- background-color: rgba(0, 0, 0, 0.8);
150
- display: flex;
151
- flex-direction: column;
152
- align-items: center;
153
- justify-content: center;
154
- color: var(--primary);
155
- text-align: center;
156
- padding: 15px;
157
- position: relative;
158
- box-shadow: 0 0 20px rgba(0, 255, 255, 0.2);
159
  }
160
 
161
- /* Animações */
162
  @keyframes pulse {
163
  0% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0.4); }
164
  70% { box-shadow: 0 0 0 20px rgba(0, 255, 255, 0); }
165
  100% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0); }
166
  }
167
 
168
- @keyframes rotate {
169
- from { transform: rotate(0deg); }
170
- to { transform: rotate(360deg); }
171
- }
172
-
173
- /* Elementos da interface */
174
- .aris-controls {
175
- background: rgba(0, 0, 0, 0.7);
176
- border: 2px solid var(--primary);
177
- border-radius: 10px;
178
- padding: 20px;
179
- margin: 20px 0;
180
- box-shadow: 0 0 15px rgba(0, 255, 255, 0.1);
181
- }
182
-
183
  .aris-textbox {
184
  background-color: rgba(0, 0, 0, 0.8) !important;
185
  border: 2px solid var(--primary) !important;
@@ -237,50 +187,6 @@ css = """
237
  0% { left: -100%; }
238
  100% { left: 100%; }
239
  }
240
-
241
- .mode-indicator {
242
- position: absolute;
243
- top: 10px;
244
- right: 10px;
245
- padding: 5px 10px;
246
- background-color: var(--accent);
247
- color: var(--text);
248
- border-radius: 3px;
249
- font-size: 0.8em;
250
- letter-spacing: 1px;
251
- }
252
-
253
- .stats-container {
254
- display: grid;
255
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
256
- gap: 15px;
257
- margin-top: 20px;
258
- }
259
-
260
- .stat-item {
261
- background: rgba(0, 0, 0, 0.7);
262
- border: 1px solid var(--primary);
263
- padding: 10px;
264
- border-radius: 5px;
265
- text-align: center;
266
- color: var(--primary);
267
- }
268
-
269
- .language-pair-display {
270
- display: flex;
271
- align-items: center;
272
- justify-content: center;
273
- gap: 10px;
274
- margin: 10px 0;
275
- color: var(--primary);
276
- font-size: 1.2em;
277
- }
278
-
279
- .language-pair-display::before,
280
- .language-pair-display::after {
281
- content: '⟨';
282
- color: var(--secondary);
283
- }
284
  """
285
 
286
  def create_interface():
@@ -293,14 +199,6 @@ def create_interface():
293
  f"Neural Engine: ACTIVE\n"
294
  f"Translation Matrix: OPERATIONAL"
295
  )
296
-
297
- def start_realtime_translation(src_lang, tgt_lang):
298
- translator.start_recording()
299
- return "Real-time translation active..."
300
-
301
- def stop_realtime_translation():
302
- translator.stop_recording()
303
- return "Translation stopped."
304
 
305
  with gr.Blocks(css=css, title="A.R.I.S. - Advanced Real-time Interpretation System") as demo:
306
  gr.HTML('''
@@ -328,23 +226,6 @@ def create_interface():
328
 
329
  with gr.Row():
330
  with gr.Column():
331
- with gr.Tab("Real-time Translation"):
332
- src_lang_realtime = gr.Dropdown(
333
- choices=list(translator.language_codes.keys()),
334
- value="English (US)",
335
- label="SOURCE LANGUAGE",
336
- elem_classes=["aris-textbox"]
337
- )
338
- tgt_lang_realtime = gr.Dropdown(
339
- choices=list(translator.language_codes.keys()),
340
- value="Spanish (ES)",
341
- label="TARGET LANGUAGE",
342
- elem_classes=["aris-textbox"]
343
- )
344
- start_btn = gr.Button("▶ START REAL-TIME TRANSLATION", elem_classes=["aris-button"])
345
- stop_btn = gr.Button("⬛ STOP TRANSLATION", elem_classes=["aris-button"])
346
- status_realtime = gr.Textbox(label="REAL-TIME STATUS", elem_classes=["aris-textbox"])
347
-
348
  with gr.Tab("Text Translation"):
349
  text_input = gr.Textbox(
350
  label="INPUT TEXT",
@@ -366,6 +247,19 @@ def create_interface():
366
  elem_classes=["aris-textbox"]
367
  )
368
  translate_btn = gr.Button("▶ TRANSLATE TEXT", elem_classes=["aris-button"])
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
  with gr.Column():
371
  audio_output = gr.Audio(
@@ -412,66 +306,23 @@ def create_interface():
412
  </div>
413
  """
414
  )
415
-
416
- # Sistema de estatísticas
417
- with gr.Row():
418
- gr.HTML("""
419
- <div class="stats-container">
420
- <div class="stat-item">
421
- <div>Processing Speed</div>
422
- <div style="font-size: 1.2em; margin: 5px 0;">0.8ms</div>
423
- </div>
424
- <div class="stat-item">
425
- <div>Neural Load</div>
426
- <div style="font-size: 1.2em; margin: 5px 0;">78%</div>
427
- </div>
428
- <div class="stat-item">
429
- <div>Memory Usage</div>
430
- <div style="font-size: 1.2em; margin: 5px 0;">4.2GB</div>
431
- </div>
432
- </div>
433
- """)
434
 
435
  # Event handlers
436
- def update_stats():
437
- return {
438
- status_realtime: f"System Status: Active\nMemory Usage: {np.random.randint(70, 90)}%\nProcessing Speed: {np.random.randint(1, 5)}ms"
439
- }
440
-
441
- start_btn.click(
442
- fn=start_realtime_translation,
443
- inputs=[src_lang_realtime, tgt_lang_realtime],
444
- outputs=status_realtime
445
- )
446
-
447
- stop_btn.click(
448
- fn=stop_realtime_translation,
449
- outputs=status_realtime
450
- )
451
-
452
  translate_btn.click(
453
  fn=translator.translate_text,
454
  inputs=[text_input, src_lang_text, tgt_lang_text],
455
  outputs=audio_output
456
  )
457
 
458
- # Atualizações automáticas
459
- demo.load(fn=update_status, outputs=status_realtime)
 
 
 
460
 
461
  return demo
462
 
463
  if __name__ == "__main__":
464
  demo = create_interface()
465
  demo.queue()
466
- demo.launch()
467
-
468
- # Arquivo requirements.txt atualizado
469
- """
470
- gradio>=4.0.0
471
- torch>=2.0.0
472
- torchaudio>=2.0.0
473
- transformers
474
- sentencepiece>=0.1.99
475
- numpy>=1.21.0
476
- sounddevice>=0.4.5
477
- """
 
5
  from transformers import AutoProcessor, SeamlessM4Tv2Model
6
  from datetime import datetime
7
  import time
 
 
 
8
 
9
  class ARISTranslator:
10
  def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"):
11
  self.processor = AutoProcessor.from_pretrained(model_name)
12
  self.model = SeamlessM4Tv2Model.from_pretrained(model_name)
13
  self.sample_rate = self.model.config.sampling_rate
 
 
14
 
15
  self.language_codes = {
16
  "English (US)": "eng",
 
27
  "Arabic (AR)": "ara"
28
  }
29
 
30
+ def process_audio(self, audio_path: str, tgt_lang: str) -> tuple[int, np.ndarray]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
+ if audio_path is None:
33
+ raise gr.Error("No audio input provided")
34
+
35
+ # Carregar e resample do áudio
36
+ audio, orig_freq = torchaudio.load(audio_path)
37
+ audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
38
+
39
+ # Processar através do modelo
40
+ inputs = self.processor(audios=audio, return_tensors="pt")
41
  audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
42
  return self.sample_rate, audio_array
43
  except Exception as e:
44
+ raise gr.Error(f"Audio processing failed: {str(e)}")
45
 
46
  def translate_text(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
47
  try:
48
+ if not text.strip():
49
+ raise gr.Error("No text input provided")
50
+
51
  inputs = self.processor(text=text, src_lang=self.language_codes[src_lang], return_tensors="pt")
52
  audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
53
  return self.sample_rate, audio_array
 
55
  raise gr.Error(f"Translation failed: {str(e)}")
56
 
57
  css = """
 
58
  :root {
59
  --primary: #00ffff;
60
  --secondary: #0066cc;
 
94
  margin: 5px 0;
95
  }
96
 
 
97
  #status-ring {
98
  width: 400px;
99
  height: 400px;
 
119
  animation: rotate 20s linear infinite;
120
  }
121
 
122
+ @keyframes rotate {
123
+ from { transform: rotate(0deg); }
124
+ to { transform: rotate(360deg); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  }
126
 
 
127
  @keyframes pulse {
128
  0% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0.4); }
129
  70% { box-shadow: 0 0 0 20px rgba(0, 255, 255, 0); }
130
  100% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0); }
131
  }
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  .aris-textbox {
134
  background-color: rgba(0, 0, 0, 0.8) !important;
135
  border: 2px solid var(--primary) !important;
 
187
  0% { left: -100%; }
188
  100% { left: 100%; }
189
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  """
191
 
192
  def create_interface():
 
199
  f"Neural Engine: ACTIVE\n"
200
  f"Translation Matrix: OPERATIONAL"
201
  )
 
 
 
 
 
 
 
 
202
 
203
  with gr.Blocks(css=css, title="A.R.I.S. - Advanced Real-time Interpretation System") as demo:
204
  gr.HTML('''
 
226
 
227
  with gr.Row():
228
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  with gr.Tab("Text Translation"):
230
  text_input = gr.Textbox(
231
  label="INPUT TEXT",
 
247
  elem_classes=["aris-textbox"]
248
  )
249
  translate_btn = gr.Button("▶ TRANSLATE TEXT", elem_classes=["aris-button"])
250
+
251
+ with gr.Tab("Audio Translation"):
252
+ audio_input = gr.Audio(
253
+ label="AUDIO INPUT",
254
+ type="filepath"
255
+ )
256
+ tgt_lang_audio = gr.Dropdown(
257
+ choices=list(translator.language_codes.keys()),
258
+ value="English (US)",
259
+ label="TARGET LANGUAGE",
260
+ elem_classes=["aris-textbox"]
261
+ )
262
+ translate_audio_btn = gr.Button("▶ TRANSLATE AUDIO", elem_classes=["aris-button"])
263
 
264
  with gr.Column():
265
  audio_output = gr.Audio(
 
306
  </div>
307
  """
308
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
  # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  translate_btn.click(
312
  fn=translator.translate_text,
313
  inputs=[text_input, src_lang_text, tgt_lang_text],
314
  outputs=audio_output
315
  )
316
 
317
+ translate_audio_btn.click(
318
+ fn=translator.process_audio,
319
+ inputs=[audio_input, tgt_lang_audio],
320
+ outputs=audio_output
321
+ )
322
 
323
  return demo
324
 
325
  if __name__ == "__main__":
326
  demo = create_interface()
327
  demo.queue()
328
+ demo.launch()