Spaces:
Paused
Paused
ManishThota
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -67,11 +67,14 @@ def video_to_frames(video_path):
|
|
67 |
return frames_png
|
68 |
|
69 |
def predict_answer(image, video, question, max_tokens=100):
|
|
|
|
|
|
|
|
|
70 |
|
71 |
if image:
|
72 |
# Process as an image
|
73 |
image = image.convert("RGB")
|
74 |
-
input_ids = tokenizer(question, return_tensors='pt').input_ids.to(device)
|
75 |
image_tensor = model.image_preprocess(image)
|
76 |
|
77 |
#Generate the answer
|
@@ -89,7 +92,6 @@ def predict_answer(image, video, question, max_tokens=100):
|
|
89 |
answers = []
|
90 |
for frame in frames:
|
91 |
frame = Image.open(frame).convert("RGB")
|
92 |
-
input_ids = tokenizer(question, return_tensors='pt').input_ids.to(device)
|
93 |
image_tensor = model.image_preprocess(frame)
|
94 |
|
95 |
# Generate the answer
|
|
|
67 |
return frames_png
|
68 |
|
69 |
def predict_answer(image, video, question, max_tokens=100):
|
70 |
+
|
71 |
+
text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question}? ASSISTANT:"
|
72 |
+
input_ids = tokenizer(text, return_tensors='pt').input_ids.to(device)
|
73 |
+
|
74 |
|
75 |
if image:
|
76 |
# Process as an image
|
77 |
image = image.convert("RGB")
|
|
|
78 |
image_tensor = model.image_preprocess(image)
|
79 |
|
80 |
#Generate the answer
|
|
|
92 |
answers = []
|
93 |
for frame in frames:
|
94 |
frame = Image.open(frame).convert("RGB")
|
|
|
95 |
image_tensor = model.image_preprocess(frame)
|
96 |
|
97 |
# Generate the answer
|