Spaces:
Sleeping
Sleeping
frogcho123
commited on
Commit
·
87d303a
1
Parent(s):
bd97165
Update app.py
Browse files
app.py
CHANGED
@@ -7,20 +7,15 @@ import sentencepiece
|
|
7 |
|
8 |
|
9 |
def translate_voice(file, target_lang):
|
10 |
-
|
11 |
-
# Load the model and switch to float32
|
12 |
model = whisper.load_model("base").float()
|
13 |
|
14 |
-
# Load the audio
|
15 |
audio = whisper.load_audio(file.name)
|
16 |
|
17 |
-
# Pad or trim the audio
|
18 |
audio = whisper.pad_or_trim(audio)
|
19 |
|
20 |
-
|
21 |
-
mel = whisper.log_mel_spectrogram(audio).to(model.device).float() # convert to full-precision float32
|
22 |
|
23 |
-
# Proceed with your language detection and decoding
|
24 |
_, probs = model.detect_language(mel)
|
25 |
options = whisper.DecodingOptions(fp16 = False)
|
26 |
result = whisper.decode(model, mel, options)
|
@@ -28,7 +23,6 @@ def translate_voice(file, target_lang):
|
|
28 |
text = result.text
|
29 |
lang = max(probs, key=probs.get)
|
30 |
|
31 |
-
# Translate
|
32 |
tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
|
33 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
34 |
|
@@ -37,15 +31,12 @@ def translate_voice(file, target_lang):
|
|
37 |
generated_tokens = model.generate(**encoded_bg)
|
38 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
39 |
|
40 |
-
# Text-to-audio (TTS)
|
41 |
tts = gTTS(text=translated_text, lang=target_lang)
|
42 |
filename = "to_speech.mp3"
|
43 |
tts.save(filename)
|
44 |
|
45 |
return filename, text, translated_text, target_lang
|
46 |
|
47 |
-
except Exception as e:
|
48 |
-
return str(e), "", "", ""
|
49 |
|
50 |
iface = gr.Interface(
|
51 |
fn=translate_voice,
|
|
|
7 |
|
8 |
|
9 |
def translate_voice(file, target_lang):
|
10 |
+
|
|
|
11 |
model = whisper.load_model("base").float()
|
12 |
|
|
|
13 |
audio = whisper.load_audio(file.name)
|
14 |
|
|
|
15 |
audio = whisper.pad_or_trim(audio)
|
16 |
|
17 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device).float()
|
|
|
18 |
|
|
|
19 |
_, probs = model.detect_language(mel)
|
20 |
options = whisper.DecodingOptions(fp16 = False)
|
21 |
result = whisper.decode(model, mel, options)
|
|
|
23 |
text = result.text
|
24 |
lang = max(probs, key=probs.get)
|
25 |
|
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
|
27 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
28 |
|
|
|
31 |
generated_tokens = model.generate(**encoded_bg)
|
32 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
33 |
|
|
|
34 |
tts = gTTS(text=translated_text, lang=target_lang)
|
35 |
filename = "to_speech.mp3"
|
36 |
tts.save(filename)
|
37 |
|
38 |
return filename, text, translated_text, target_lang
|
39 |
|
|
|
|
|
40 |
|
41 |
iface = gr.Interface(
|
42 |
fn=translate_voice,
|