DHEIVER commited on
Commit
388fe1b
·
verified ·
1 Parent(s): cb669b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -127
app.py CHANGED
@@ -1,22 +1,17 @@
1
  import gradio as gr
2
  import torch
3
  import torchaudio
4
- import scipy.io.wavfile
5
  import numpy as np
6
  from transformers import AutoProcessor, SeamlessM4Tv2Model
7
- from pathlib import Path
8
- from typing import Optional, Union
9
 
10
- class SeamlessTranslator:
11
  def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"):
12
- try:
13
- self.processor = AutoProcessor.from_pretrained(model_name)
14
- self.model = SeamlessM4Tv2Model.from_pretrained(model_name)
15
- self.sample_rate = self.model.config.sampling_rate
16
- except Exception as e:
17
- raise RuntimeError(f"Failed to initialize model: {str(e)}")
18
 
19
- # Available language pairs
20
  self.language_codes = {
21
  "English": "eng",
22
  "Spanish": "spa",
@@ -26,130 +21,135 @@ class SeamlessTranslator:
26
  "Portuguese": "por",
27
  "Russian": "rus",
28
  "Chinese": "cmn",
29
- "Japanese": "jpn",
30
- "Korean": "kor",
31
- "Arabic": "ara",
32
- "Hindi": "hin",
33
  }
34
 
35
- def translate_text(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
36
- try:
37
- inputs = self.processor(text=text, src_lang=src_lang, return_tensors="pt")
38
- audio_array = self.model.generate(**inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
39
- return self.sample_rate, audio_array
40
- except Exception as e:
41
- raise RuntimeError(f"Text translation failed: {str(e)}")
42
-
43
- def translate_audio(self, audio_path: str, tgt_lang: str) -> tuple[int, np.ndarray]:
44
- try:
45
- # Load and resample audio
46
- audio, orig_freq = torchaudio.load(audio_path)
47
- audio = torchaudio.functional.resample(
48
- audio,
49
- orig_freq=orig_freq,
50
- new_freq=16_000
51
- )
52
-
53
- # Process and generate translation
54
- inputs = self.processor(audios=audio, return_tensors="pt")
55
- audio_array = self.model.generate(**inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
56
- return self.sample_rate, audio_array
57
- except Exception as e:
58
- raise RuntimeError(f"Audio translation failed: {str(e)}")
59
-
60
- class GradioInterface:
61
- def __init__(self):
62
- self.translator = SeamlessTranslator()
63
- self.languages = list(self.translator.language_codes.keys())
64
-
65
- def text_to_speech(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
66
- src_code = self.translator.language_codes[src_lang]
67
- tgt_code = self.translator.language_codes[tgt_lang]
68
- return self.translator.translate_text(text, src_code, tgt_code)
69
 
70
- def speech_to_speech(self, audio_path: str, tgt_lang: str) -> tuple[int, np.ndarray]:
71
- tgt_code = self.translator.language_codes[tgt_lang]
72
- return self.translator.translate_audio(audio_path, tgt_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- def launch(self):
75
- # Create the Gradio interface
76
- with gr.Blocks(title="SeamlessM4T Translator") as demo:
77
- gr.Markdown("# 🌐 SeamlessM4T Translator")
78
- gr.Markdown("Translate text or speech to different languages using Meta's SeamlessM4T model")
79
 
80
- with gr.Tabs():
81
- # Text-to-Speech tab
82
- with gr.TabItem("Text to Speech"):
83
- with gr.Row():
84
- with gr.Column():
85
- text_input = gr.Textbox(
86
- label="Input Text",
87
- placeholder="Enter text to translate...",
88
- lines=3
89
- )
90
- src_lang = gr.Dropdown(
91
- choices=self.languages,
92
- value="English",
93
- label="Source Language"
94
- )
95
- tgt_lang_text = gr.Dropdown(
96
- choices=self.languages,
97
- value="Spanish",
98
- label="Target Language"
99
- )
100
- translate_btn = gr.Button("Translate", variant="primary")
101
-
102
- with gr.Column():
103
- audio_output = gr.Audio(
104
- label="Translated Speech",
105
- type="numpy"
106
- )
107
 
108
- translate_btn.click(
109
- fn=self.text_to_speech,
110
- inputs=[text_input, src_lang, tgt_lang_text],
111
- outputs=audio_output
112
- )
113
-
114
- # Speech-to-Speech tab
115
- with gr.TabItem("Speech to Speech"):
116
- with gr.Row():
117
- with gr.Column():
118
- audio_input = gr.Audio(
119
- label="Input Speech",
120
- type="filepath"
121
- )
122
- tgt_lang_speech = gr.Dropdown(
123
- choices=self.languages,
124
- value="Spanish",
125
- label="Target Language"
126
- )
127
- translate_audio_btn = gr.Button("Translate", variant="primary")
128
-
129
- with gr.Column():
130
- audio_output_s2s = gr.Audio(
131
- label="Translated Speech",
132
- type="numpy"
133
- )
134
-
135
- translate_audio_btn.click(
136
- fn=self.speech_to_speech,
137
- inputs=[audio_input, tgt_lang_speech],
138
- outputs=audio_output_s2s
139
- )
140
 
141
- gr.Markdown(
142
- """
143
- ### Notes
144
- - Text-to-Speech: Enter text and select source/target languages
145
- - Speech-to-Speech: Upload an audio file and select target language
146
- - Processing may take a few moments depending on input length
147
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  )
149
 
150
- # Launch the interface
151
- demo.launch(share=True)
152
 
153
  if __name__ == "__main__":
154
- interface = GradioInterface()
155
- interface.launch()
 
1
  import gradio as gr
2
  import torch
3
  import torchaudio
 
4
  import numpy as np
5
  from transformers import AutoProcessor, SeamlessM4Tv2Model
6
+ from datetime import datetime
7
+ import time
8
 
9
+ class JarvisTranslator:
10
  def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"):
11
+ self.processor = AutoProcessor.from_pretrained(model_name)
12
+ self.model = SeamlessM4Tv2Model.from_pretrained(model_name)
13
+ self.sample_rate = self.model.config.sampling_rate
 
 
 
14
 
 
15
  self.language_codes = {
16
  "English": "eng",
17
  "Spanish": "spa",
 
21
  "Portuguese": "por",
22
  "Russian": "rus",
23
  "Chinese": "cmn",
24
+ "Japanese": "jpn"
 
 
 
25
  }
26
 
27
+ def translate(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
28
+ inputs = self.processor(text=text, src_lang=src_lang, return_tensors="pt")
29
+ audio_array = self.model.generate(**inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
30
+ return self.sample_rate, audio_array
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ def create_jarvis_interface():
33
+ # Custom CSS for Jarvis-like theme
34
+ css = """
35
+ #jarvis-container {
36
+ background-color: #000000;
37
+ color: #00ffff;
38
+ font-family: 'Courier New', monospace;
39
+ padding: 20px;
40
+ border-radius: 10px;
41
+ border: 2px solid #00ffff;
42
+ }
43
+
44
+ #status-circle {
45
+ width: 150px;
46
+ height: 150px;
47
+ border: 4px solid #00ffff;
48
+ border-radius: 50%;
49
+ margin: 20px auto;
50
+ position: relative;
51
+ animation: pulse 2s infinite;
52
+ }
53
+
54
+ @keyframes pulse {
55
+ 0% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0.4); }
56
+ 70% { box-shadow: 0 0 0 20px rgba(0, 255, 255, 0); }
57
+ 100% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0); }
58
+ }
59
+
60
+ .custom-button {
61
+ background-color: transparent !important;
62
+ border: 2px solid #00ffff !important;
63
+ color: #00ffff !important;
64
+ font-family: 'Courier New', monospace !important;
65
+ }
66
+
67
+ .custom-button:hover {
68
+ background-color: rgba(0, 255, 255, 0.1) !important;
69
+ }
70
+
71
+ .status-text {
72
+ color: #00ffff;
73
+ text-align: center;
74
+ font-size: 1.2em;
75
+ margin: 10px 0;
76
+ }
77
+
78
+ .time-display {
79
+ position: absolute;
80
+ top: 10px;
81
+ right: 10px;
82
+ color: #00ffff;
83
+ font-family: 'Courier New', monospace;
84
+ }
85
+ """
86
 
87
+ translator = JarvisTranslator()
 
 
 
 
88
 
89
+ def update_status():
90
+ return f"JARVIS AI SYSTEM ACTIVE\nTime: {datetime.now().strftime('%H:%M:%S')}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ def process_command(text, src_lang, tgt_lang):
93
+ status = f"Processing command: {text}\nSource: {src_lang} → Target: {tgt_lang}"
94
+ time.sleep(1) # Simulate processing
95
+ try:
96
+ sample_rate, audio = translator.translate(text,
97
+ translator.language_codes[src_lang],
98
+ translator.language_codes[tgt_lang])
99
+ return audio, status + "\nStatus: Translation complete"
100
+ except Exception as e:
101
+ return None, f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ with gr.Blocks(css=css, title="JARVIS AI") as demo:
104
+ with gr.Column(elem_id="jarvis-container"):
105
+ gr.Markdown("# JARVIS AI TRANSLATION SYSTEM")
106
+
107
+ # Status display
108
+ status_html = gr.HTML(value="<div id='status-circle'></div>", show_label=False)
109
+ status_text = gr.Textbox(label="System Status", value=update_status)
110
+
111
+ with gr.Row():
112
+ text_input = gr.Textbox(
113
+ label="Command Input",
114
+ placeholder="Enter text to translate...",
115
+ lines=3
116
+ )
117
+
118
+ with gr.Row():
119
+ src_lang = gr.Dropdown(
120
+ choices=list(translator.language_codes.keys()),
121
+ value="English",
122
+ label="Source Language"
123
+ )
124
+ tgt_lang = gr.Dropdown(
125
+ choices=list(translator.language_codes.keys()),
126
+ value="Spanish",
127
+ label="Target Language"
128
+ )
129
+
130
+ with gr.Row():
131
+ process_btn = gr.Button("Execute Translation", elem_classes=["custom-button"])
132
+
133
+ audio_output = gr.Audio(
134
+ label="Translated Output",
135
+ type="numpy"
136
+ )
137
+
138
+ # Event handlers
139
+ process_btn.click(
140
+ fn=process_command,
141
+ inputs=[text_input, src_lang, tgt_lang],
142
+ outputs=[audio_output, status_text]
143
+ )
144
+
145
+ demo.load(
146
+ fn=update_status,
147
+ outputs=status_text,
148
+ every=1 # Update every second
149
  )
150
 
151
+ return demo
 
152
 
153
  if __name__ == "__main__":
154
+ demo = create_jarvis_interface()
155
+ demo.launch()