marquesafonso
commited on
Commit
·
31ddfb8
1
Parent(s):
d0e3b53
bump gradio_client dep; add model_version selection
Browse files- main.py +3 -2
- requirements.txt +0 -0
- static/submit_video.html +5 -0
- utils/process_video.py +2 -1
- utils/transcriber.py +2 -1
main.py
CHANGED
@@ -41,6 +41,7 @@ async def get_temp_dir():
|
|
41 |
async def process_video_api(video_file: UploadFile = File(media_type="video"),
|
42 |
srt_file: UploadFile = File(media_type="text"), #ad validation
|
43 |
task: Optional[str] = Form("transcribe"),
|
|
|
44 |
max_words_per_line: Optional[int] = Form(6),
|
45 |
fontsize: Optional[int] = Form(42),
|
46 |
font: Optional[str] = Form("FuturaPTHeavy"),
|
@@ -65,14 +66,14 @@ async def process_video_api(video_file: UploadFile = File(media_type="video"),
|
|
65 |
finally:
|
66 |
srt_file.file.close()
|
67 |
logging.info("Processing the video...")
|
68 |
-
output_path, srt_path = process_video(temp_file.name, temp_srt_file.name, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
|
69 |
logging.info("Zipping response...")
|
70 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
71 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
72 |
return Response(content = zip_file, media_type = 'application/zip')
|
73 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
|
74 |
logging.info("Processing the video...")
|
75 |
-
output_path, srt_path = process_video(temp_file.name, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
|
76 |
logging.info("Zipping response...")
|
77 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
78 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
|
|
41 |
async def process_video_api(video_file: UploadFile = File(media_type="video"),
|
42 |
srt_file: UploadFile = File(media_type="text"), #ad validation
|
43 |
task: Optional[str] = Form("transcribe"),
|
44 |
+
model_version: Optional[str] = Form("deepdml/faster-whisper-large-v3-turbo-ct2"),
|
45 |
max_words_per_line: Optional[int] = Form(6),
|
46 |
fontsize: Optional[int] = Form(42),
|
47 |
font: Optional[str] = Form("FuturaPTHeavy"),
|
|
|
66 |
finally:
|
67 |
srt_file.file.close()
|
68 |
logging.info("Processing the video...")
|
69 |
+
output_path, srt_path = process_video(temp_file.name, temp_srt_file.name, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
|
70 |
logging.info("Zipping response...")
|
71 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
72 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
73 |
return Response(content = zip_file, media_type = 'application/zip')
|
74 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
|
75 |
logging.info("Processing the video...")
|
76 |
+
output_path, srt_path = process_video(temp_file.name, None, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
|
77 |
logging.info("Zipping response...")
|
78 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
79 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
static/submit_video.html
CHANGED
@@ -164,6 +164,11 @@
|
|
164 |
<option value="transcribe">Transcribe</option>
|
165 |
<option value="translate">Translate</option>
|
166 |
</select>
|
|
|
|
|
|
|
|
|
|
|
167 |
</div>
|
168 |
<div class="form-group">
|
169 |
<h3>Visual Parameters</h3>
|
|
|
164 |
<option value="transcribe">Transcribe</option>
|
165 |
<option value="translate">Translate</option>
|
166 |
</select>
|
167 |
+
<label for="model_version">Model Version</label>
|
168 |
+
<select id="model_version" name="model_version">
|
169 |
+
<option value="deepdml/faster-whisper-large-v3-turbo-ct2">faster-whisper-large-v3-turbo</option>
|
170 |
+
<option value="large-v3">large-v3</option>
|
171 |
+
</select>
|
172 |
</div>
|
173 |
<div class="form-group">
|
174 |
<h3>Visual Parameters</h3>
|
utils/process_video.py
CHANGED
@@ -5,6 +5,7 @@ from utils.subtitler import subtitler
|
|
5 |
def process_video(invideo_file: str,
|
6 |
srt_file: str | None,
|
7 |
task: str,
|
|
|
8 |
max_words_per_line:int,
|
9 |
fontsize:str,
|
10 |
font:str,
|
@@ -20,7 +21,7 @@ def process_video(invideo_file: str,
|
|
20 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
21 |
else:
|
22 |
srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
|
23 |
-
transcriber(invideo_file, srt_file, max_words_per_line, task)
|
24 |
logging.info("Subtitling...")
|
25 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
26 |
return OUTVIDEO_PATH, srt_file
|
|
|
5 |
def process_video(invideo_file: str,
|
6 |
srt_file: str | None,
|
7 |
task: str,
|
8 |
+
model_version: str,
|
9 |
max_words_per_line:int,
|
10 |
fontsize:str,
|
11 |
font:str,
|
|
|
21 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
22 |
else:
|
23 |
srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
|
24 |
+
transcriber(invideo_file, srt_file, max_words_per_line, task, model_version)
|
25 |
logging.info("Subtitling...")
|
26 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
27 |
return OUTVIDEO_PATH, srt_file
|
utils/transcriber.py
CHANGED
@@ -3,7 +3,7 @@ from dotenv import load_dotenv
|
|
3 |
from gradio_client import Client, handle_file
|
4 |
|
5 |
def transcriber(invideo_file:str, srt_file:str,
|
6 |
-
max_words_per_line:int, task:str):
|
7 |
load_dotenv()
|
8 |
HF_TOKEN = os.getenv("hf_token")
|
9 |
HF_SPACE = os.getenv("hf_space")
|
@@ -12,6 +12,7 @@ def transcriber(invideo_file:str, srt_file:str,
|
|
12 |
video_input=handle_file(invideo_file),
|
13 |
max_words_per_line=max_words_per_line,
|
14 |
task=task,
|
|
|
15 |
api_name="/predict"
|
16 |
)
|
17 |
with open(srt_file, "w", encoding='utf-8') as file:
|
|
|
3 |
from gradio_client import Client, handle_file
|
4 |
|
5 |
def transcriber(invideo_file:str, srt_file:str,
|
6 |
+
max_words_per_line:int, task:str, model_version:str):
|
7 |
load_dotenv()
|
8 |
HF_TOKEN = os.getenv("hf_token")
|
9 |
HF_SPACE = os.getenv("hf_space")
|
|
|
12 |
video_input=handle_file(invideo_file),
|
13 |
max_words_per_line=max_words_per_line,
|
14 |
task=task,
|
15 |
+
model_version=model_version,
|
16 |
api_name="/predict"
|
17 |
)
|
18 |
with open(srt_file, "w", encoding='utf-8') as file:
|