Spaces:

Salman11223
/

Project

Sleeping

App Files Files Community

Project / app.py

Salman11223

Update app.py

e5c07ce verified 4 months ago

raw

history blame contribute delete

9.28 kB

	import os
	import requests
	import gradio as gr
	import moviepy.editor as mp
	from TTS.api import TTS
	import torch
	import assemblyai as aai

	os.environ["COQUI_TOS_AGREED"] = "1"

	# Download necessary models if not already present
	model_files = {
	"wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
	"wav2lip_gan.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth",
	"resnet50.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth",
	"mobilenet.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth",
	"s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
	}

	device = "cpu"

	# Initialize TTS model
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

	# Download models
	for filename, url in model_files.items():
	file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
	if not os.path.exists(file_path):
	print(f"Downloading {filename}...")
	r = requests.get(url)
	with open(file_path, 'wb') as f:
	f.write(r.content)

	# Translation class
	class translation:
	def __init__(self, video_path, original_language, target_language):
	self.video_path = video_path
	self.original_language = original_language
	self.target_language = target_language

	def org_language_parameters(self, original_language):
	language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
	self.lan_code = language_codes.get(original_language, '')

	def target_language_parameters(self, target_language):
	language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
	self.tran_code = language_codes.get(target_language, '')

	def extract_audio(self):
	video = mp.VideoFileClip(self.video_path)
	audio = video.audio
	audio_path = "output_audio.wav"
	audio.write_audiofile(audio_path)
	return audio_path

	def transcribe_audio(self, audio_path):
	aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
	config = aai.TranscriptionConfig(language_code=self.lan_code)
	transcriber = aai.Transcriber(config=config)
	transcript = transcriber.transcribe(audio_path)
	return transcript.text

	def translate_text(self, transcript_text):
	base_url = "https://api.cognitive.microsofttranslator.com/translate"
	headers = {
	"Ocp-Apim-Subscription-Key": os.getenv("MICROSOFT_TRANSLATOR_API_KEY"),
	"Content-Type": "application/json",
	"Ocp-Apim-Subscription-Region": "southeastasia"
	}
	params = {"api-version": "3.0", "from": self.lan_code, "to": self.tran_code}
	body = [{"text": transcript_text}]
	response = requests.post(base_url, headers=headers, params=params, json=body)
	translation = response.json()[0]["translations"][0]["text"]
	return translation

	def generate_audio(self, translated_text):
	tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
	return "output_synth.wav"

	def translate_video(self):
	audio_path = self.extract_audio()
	self.org_language_parameters(self.original_language)
	self.target_language_parameters(self.target_language)
	transcript_text = self.transcribe_audio(audio_path)
	translated_text = self.translate_text(transcript_text)
	translated_audio_path = self.generate_audio(translated_text)

	# Run Wav2Lip inference (update the path to inference.py)
	inference_script_path = "inference.py" # Update this to the actual location of inference.py
	os.system(f"python {inference_script_path} --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
	return 'output_video.mp4'

	# Gradio Interface
	def app(video_path, original_language, target_language):
	translator = translation(video_path, original_language, target_language)
	video_file = translator.translate_video()
	return video_file

	interface = gr.Interface(
	fn=app,
	inputs=[
	gr.Video(label="Video Path"),
	gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
	gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Targeted Language"),
	],
	outputs=gr.Video(label="Translated Video")
	)

	interface.launch()




	# import os
	# import requests
	# import gradio as gr
	# import moviepy.editor as mp
	# from TTS.api import TTS
	# import torch
	# import assemblyai as aai
	# os.environ["COQUI_TOS_AGREED"] = "1"
	# # Download necessary models if not already present
	# model_files = {
	# "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
	# "wav2lip_gan.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth",
	# "resnet50.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth",
	# "mobilenet.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth",
	# "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
	# }



	# device = "cpu"

	# tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)



	# for filename, url in model_files.items():
	# file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
	# if not os.path.exists(file_path):
	# print(f"Downloading {filename}...")
	# r = requests.get(url)
	# with open(file_path, 'wb') as f:
	# f.write(r.content)



	# # Translation class
	# class translation:
	# def __init__(self, video_path, original_language, target_language):
	# self.video_path = video_path
	# self.original_language = original_language
	# self.target_language = target_language

	# def org_language_parameters(self, original_language):
	# language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
	# self.lan_code = language_codes.get(original_language, '')

	# def target_language_parameters(self, target_language):
	# language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
	# self.tran_code = language_codes.get(target_language, '')

	# def extract_audio(self):
	# video = mp.VideoFileClip(self.video_path)
	# audio = video.audio
	# audio_path = "output_audio.wav"
	# audio.write_audiofile(audio_path)
	# return audio_path

	# def transcribe_audio(self, audio_path):
	# aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
	# config = aai.TranscriptionConfig(language_code=self.lan_code)
	# transcriber = aai.Transcriber(config=config)
	# transcript = transcriber.transcribe(audio_path)
	# return transcript.text

	# def translate_text(self, transcript_text):
	# base_url = "https://api.cognitive.microsofttranslator.com/translate"
	# headers = {
	# "Ocp-Apim-Subscription-Key": os.getenv("MICROSOFT_TRANSLATOR_API_KEY"),
	# "Content-Type": "application/json",
	# "Ocp-Apim-Subscription-Region": "southeastasia"
	# }
	# params = {"api-version": "3.0", "from": self.lan_code, "to": self.tran_code}
	# body = [{"text": transcript_text}]
	# response = requests.post(base_url, headers=headers, params=params, json=body)
	# translation = response.json()[0]["translations"][0]["text"]
	# return translation

	# def generate_audio(self, translated_text):
	# tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
	# return "output_synth.wav"

	# def translate_video(self):
	# audio_path = self.extract_audio()
	# self.org_language_parameters(self.original_language)
	# self.target_language_parameters(self.target_language)
	# transcript_text = self.transcribe_audio(audio_path)
	# translated_text = self.translate_text(transcript_text)
	# translated_audio_path = self.generate_audio(translated_text)

	# # Run Wav2Lip inference
	# os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
	# return 'output_video.mp4'


	# # Gradio Interface
	# def app(video_path, original_language, target_language):
	# translator = translation(video_path, original_language, target_language)
	# video_file = translator.translate_video()
	# return video_file

	# interface = gr.Interface(
	# fn=app,
	# inputs=[
	# gr.Video(label="Video Path"),
	# gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
	# gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Targeted Language"),
	# ],
	# outputs=gr.Video(label="Translated Video")
	# )

	# interface.launch()