Spaces:

marquesafonso
/

multilang-asr-subtitler

Running

App Files Files Community

marquesafonso commited on Nov 12, 2024

Commit

31ddfb8

1 Parent(s): d0e3b53

bump gradio_client dep; add model_version selection

Browse files

Files changed (5) hide show

main.py +3 -2
requirements.txt +0 -0
static/submit_video.html +5 -0
utils/process_video.py +2 -1
utils/transcriber.py +2 -1

main.py CHANGED Viewed

@@ -41,6 +41,7 @@ async def get_temp_dir():
 async def process_video_api(video_file: UploadFile = File(media_type="video"),
                             srt_file: UploadFile = File(media_type="text"), #ad validation
                             task: Optional[str] = Form("transcribe"),
                             max_words_per_line: Optional[int] = Form(6),
                             fontsize: Optional[int] = Form(42),
                             font: Optional[str] = Form("FuturaPTHeavy"),
@@ -65,14 +66,14 @@ async def process_video_api(video_file: UploadFile = File(media_type="video"),
                     finally:
                         srt_file.file.close()
                 logging.info("Processing the video...")
-                output_path, srt_path = process_video(temp_file.name, temp_srt_file.name, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
                     return Response(content = zip_file, media_type = 'application/zip')
             with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
                 logging.info("Processing the video...")
-                output_path, srt_path = process_video(temp_file.name, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])

 async def process_video_api(video_file: UploadFile = File(media_type="video"),
                             srt_file: UploadFile = File(media_type="text"), #ad validation
                             task: Optional[str] = Form("transcribe"),
+                            model_version: Optional[str] = Form("deepdml/faster-whisper-large-v3-turbo-ct2"),
                             max_words_per_line: Optional[int] = Form(6),
                             fontsize: Optional[int] = Form(42),
                             font: Optional[str] = Form("FuturaPTHeavy"),
                     finally:
                         srt_file.file.close()
                 logging.info("Processing the video...")
+                output_path, srt_path = process_video(temp_file.name, temp_srt_file.name, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
                     return Response(content = zip_file, media_type = 'application/zip')
             with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
                 logging.info("Processing the video...")
+                output_path, srt_path = process_video(temp_file.name, None, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

static/submit_video.html CHANGED Viewed

@@ -164,6 +164,11 @@
                         <option value="transcribe">Transcribe</option>
                         <option value="translate">Translate</option>
                     </select>
                 </div>
                 <div class="form-group">
                     <h3>Visual Parameters</h3>

                         <option value="transcribe">Transcribe</option>
                         <option value="translate">Translate</option>
                     </select>
+                    <label for="model_version">Model Version</label>
+                    <select id="model_version" name="model_version">
+                        <option value="deepdml/faster-whisper-large-v3-turbo-ct2">faster-whisper-large-v3-turbo</option>
+                        <option value="large-v3">large-v3</option>
+                    </select>
                 </div>
                 <div class="form-group">
                     <h3>Visual Parameters</h3>

utils/process_video.py CHANGED Viewed

@@ -5,6 +5,7 @@ from utils.subtitler import subtitler
 def process_video(invideo_file: str,
                   srt_file: str | None,
                   task: str,
                   max_words_per_line:int,
                   fontsize:str,
                   font:str,
@@ -20,7 +21,7 @@ def process_video(invideo_file: str,
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     else:
         srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
-        transcriber(invideo_file, srt_file, max_words_per_line, task)
         logging.info("Subtitling...")
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     return OUTVIDEO_PATH, srt_file

 def process_video(invideo_file: str,
                   srt_file: str | None,
                   task: str,
+                  model_version: str,
                   max_words_per_line:int,
                   fontsize:str,
                   font:str,
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     else:
         srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
+        transcriber(invideo_file, srt_file, max_words_per_line, task, model_version)
         logging.info("Subtitling...")
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     return OUTVIDEO_PATH, srt_file

utils/transcriber.py CHANGED Viewed

@@ -3,7 +3,7 @@ from dotenv import load_dotenv
 from gradio_client import Client, handle_file
 def transcriber(invideo_file:str, srt_file:str,
-        max_words_per_line:int, task:str):
         load_dotenv()
         HF_TOKEN = os.getenv("hf_token")
         HF_SPACE = os.getenv("hf_space")
@@ -12,6 +12,7 @@ def transcriber(invideo_file:str, srt_file:str,
                 video_input=handle_file(invideo_file),
                 max_words_per_line=max_words_per_line,
                 task=task,
                 api_name="/predict"
         )
         with open(srt_file, "w", encoding='utf-8') as file:

 from gradio_client import Client, handle_file
 def transcriber(invideo_file:str, srt_file:str,
+        max_words_per_line:int, task:str, model_version:str):
         load_dotenv()
         HF_TOKEN = os.getenv("hf_token")
         HF_SPACE = os.getenv("hf_space")
                 video_input=handle_file(invideo_file),
                 max_words_per_line=max_words_per_line,
                 task=task,
+                model_version=model_version,
                 api_name="/predict"
         )
         with open(srt_file, "w", encoding='utf-8') as file: