Spaces:
Runtime error
Runtime error
Update
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
|
|
4 |
import numpy as np
|
5 |
import json
|
6 |
import pickle
|
7 |
-
|
8 |
import torch
|
9 |
from torch.nn.utils.rnn import pad_sequence
|
10 |
from transformers import BridgeTowerProcessor
|
@@ -147,8 +147,10 @@ def extract_images_and_embeds(video_id, video_path, subtitles, output, expanded=
|
|
147 |
|
148 |
print('Read a new frame: ', idx, mid_time, frame_no, text)
|
149 |
vidcap.set(1, frame_no) # added this line
|
150 |
-
success,
|
151 |
if success:
|
|
|
|
|
152 |
img_fname = f'{video_id}_{idx:06d}'
|
153 |
img_fpath = os.path.join(output, 'frames', img_fname + '.jpg')
|
154 |
# image = maintain_aspect_ratio_resize(image, height=350) # save frame as JPEG file
|
@@ -163,7 +165,7 @@ def extract_images_and_embeds(video_id, video_path, subtitles, output, expanded=
|
|
163 |
'frame_no': frame_no
|
164 |
})
|
165 |
|
166 |
-
encoding = processor(
|
167 |
encoding['text'] = text
|
168 |
encoding['image_filepath'] = img_fpath
|
169 |
encoding['start_time'] = caption.start
|
@@ -236,14 +238,17 @@ def run_query(video_path, text_query, path='/tmp'):
|
|
236 |
_, I = faiss_index.search(emb_query, 6)
|
237 |
|
238 |
clip_images = []
|
|
|
239 |
for idx in I[0]:
|
240 |
frame_no = embeddings[idx]['frame_no']
|
241 |
vidcap.set(1, frame_no) # added this line
|
242 |
-
success,
|
243 |
-
|
|
|
|
|
|
|
|
|
244 |
|
245 |
-
# clip_images = [embeddings[idx]['image_filepath'] for idx in I[0]]
|
246 |
-
transcripts = [f"({embeddings[idx]['start_time']}) {embeddings[idx]['text']}" for idx in I[0]]
|
247 |
return clip_images, transcripts
|
248 |
|
249 |
|
|
|
4 |
import numpy as np
|
5 |
import json
|
6 |
import pickle
|
7 |
+
from PIL import Image
|
8 |
import torch
|
9 |
from torch.nn.utils.rnn import pad_sequence
|
10 |
from transformers import BridgeTowerProcessor
|
|
|
147 |
|
148 |
print('Read a new frame: ', idx, mid_time, frame_no, text)
|
149 |
vidcap.set(1, frame_no) # added this line
|
150 |
+
success, frame = vidcap.read()
|
151 |
if success:
|
152 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
153 |
+
frame = Image.fromarray(frame)
|
154 |
img_fname = f'{video_id}_{idx:06d}'
|
155 |
img_fpath = os.path.join(output, 'frames', img_fname + '.jpg')
|
156 |
# image = maintain_aspect_ratio_resize(image, height=350) # save frame as JPEG file
|
|
|
165 |
'frame_no': frame_no
|
166 |
})
|
167 |
|
168 |
+
encoding = processor(frame, text, return_tensors="pt").to(device)
|
169 |
encoding['text'] = text
|
170 |
encoding['image_filepath'] = img_fpath
|
171 |
encoding['start_time'] = caption.start
|
|
|
238 |
_, I = faiss_index.search(emb_query, 6)
|
239 |
|
240 |
clip_images = []
|
241 |
+
transcripts = []
|
242 |
for idx in I[0]:
|
243 |
frame_no = embeddings[idx]['frame_no']
|
244 |
vidcap.set(1, frame_no) # added this line
|
245 |
+
success, frame = vidcap.read()
|
246 |
+
if success:
|
247 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
248 |
+
frame = Image.fromarray(frame)
|
249 |
+
clip_images.append(frame)
|
250 |
+
transcripts.append(f"({embeddings[idx]['start_time']}) {embeddings[idx]['text']}")
|
251 |
|
|
|
|
|
252 |
return clip_images, transcripts
|
253 |
|
254 |
|