marquesafonso commited on
Commit
31ddfb8
·
1 Parent(s): d0e3b53

bump gradio_client dep; add model_version selection

Browse files
main.py CHANGED
@@ -41,6 +41,7 @@ async def get_temp_dir():
41
  async def process_video_api(video_file: UploadFile = File(media_type="video"),
42
  srt_file: UploadFile = File(media_type="text"), #ad validation
43
  task: Optional[str] = Form("transcribe"),
 
44
  max_words_per_line: Optional[int] = Form(6),
45
  fontsize: Optional[int] = Form(42),
46
  font: Optional[str] = Form("FuturaPTHeavy"),
@@ -65,14 +66,14 @@ async def process_video_api(video_file: UploadFile = File(media_type="video"),
65
  finally:
66
  srt_file.file.close()
67
  logging.info("Processing the video...")
68
- output_path, srt_path = process_video(temp_file.name, temp_srt_file.name, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
69
  logging.info("Zipping response...")
70
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
71
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
72
  return Response(content = zip_file, media_type = 'application/zip')
73
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
74
  logging.info("Processing the video...")
75
- output_path, srt_path = process_video(temp_file.name, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
76
  logging.info("Zipping response...")
77
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
78
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
 
41
  async def process_video_api(video_file: UploadFile = File(media_type="video"),
42
  srt_file: UploadFile = File(media_type="text"), #ad validation
43
  task: Optional[str] = Form("transcribe"),
44
+ model_version: Optional[str] = Form("deepdml/faster-whisper-large-v3-turbo-ct2"),
45
  max_words_per_line: Optional[int] = Form(6),
46
  fontsize: Optional[int] = Form(42),
47
  font: Optional[str] = Form("FuturaPTHeavy"),
 
66
  finally:
67
  srt_file.file.close()
68
  logging.info("Processing the video...")
69
+ output_path, srt_path = process_video(temp_file.name, temp_srt_file.name, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
70
  logging.info("Zipping response...")
71
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
72
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
73
  return Response(content = zip_file, media_type = 'application/zip')
74
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
75
  logging.info("Processing the video...")
76
+ output_path, srt_path = process_video(temp_file.name, None, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
77
  logging.info("Zipping response...")
78
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
79
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
static/submit_video.html CHANGED
@@ -164,6 +164,11 @@
164
  <option value="transcribe">Transcribe</option>
165
  <option value="translate">Translate</option>
166
  </select>
 
 
 
 
 
167
  </div>
168
  <div class="form-group">
169
  <h3>Visual Parameters</h3>
 
164
  <option value="transcribe">Transcribe</option>
165
  <option value="translate">Translate</option>
166
  </select>
167
+ <label for="model_version">Model Version</label>
168
+ <select id="model_version" name="model_version">
169
+ <option value="deepdml/faster-whisper-large-v3-turbo-ct2">faster-whisper-large-v3-turbo</option>
170
+ <option value="large-v3">large-v3</option>
171
+ </select>
172
  </div>
173
  <div class="form-group">
174
  <h3>Visual Parameters</h3>
utils/process_video.py CHANGED
@@ -5,6 +5,7 @@ from utils.subtitler import subtitler
5
  def process_video(invideo_file: str,
6
  srt_file: str | None,
7
  task: str,
 
8
  max_words_per_line:int,
9
  fontsize:str,
10
  font:str,
@@ -20,7 +21,7 @@ def process_video(invideo_file: str,
20
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
21
  else:
22
  srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
23
- transcriber(invideo_file, srt_file, max_words_per_line, task)
24
  logging.info("Subtitling...")
25
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
26
  return OUTVIDEO_PATH, srt_file
 
5
  def process_video(invideo_file: str,
6
  srt_file: str | None,
7
  task: str,
8
+ model_version: str,
9
  max_words_per_line:int,
10
  fontsize:str,
11
  font:str,
 
21
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
22
  else:
23
  srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
24
+ transcriber(invideo_file, srt_file, max_words_per_line, task, model_version)
25
  logging.info("Subtitling...")
26
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
27
  return OUTVIDEO_PATH, srt_file
utils/transcriber.py CHANGED
@@ -3,7 +3,7 @@ from dotenv import load_dotenv
3
  from gradio_client import Client, handle_file
4
 
5
  def transcriber(invideo_file:str, srt_file:str,
6
- max_words_per_line:int, task:str):
7
  load_dotenv()
8
  HF_TOKEN = os.getenv("hf_token")
9
  HF_SPACE = os.getenv("hf_space")
@@ -12,6 +12,7 @@ def transcriber(invideo_file:str, srt_file:str,
12
  video_input=handle_file(invideo_file),
13
  max_words_per_line=max_words_per_line,
14
  task=task,
 
15
  api_name="/predict"
16
  )
17
  with open(srt_file, "w", encoding='utf-8') as file:
 
3
  from gradio_client import Client, handle_file
4
 
5
  def transcriber(invideo_file:str, srt_file:str,
6
+ max_words_per_line:int, task:str, model_version:str):
7
  load_dotenv()
8
  HF_TOKEN = os.getenv("hf_token")
9
  HF_SPACE = os.getenv("hf_space")
 
12
  video_input=handle_file(invideo_file),
13
  max_words_per_line=max_words_per_line,
14
  task=task,
15
+ model_version=model_version,
16
  api_name="/predict"
17
  )
18
  with open(srt_file, "w", encoding='utf-8') as file: