Build

Paused

ManishThota commited on Mar 7, 2024

Commit

feb8185

verified ·

1 Parent(s): 7294f1e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,11 +67,14 @@ def video_to_frames(video_path):
     return frames_png
 def predict_answer(image, video, question, max_tokens=100):
     if image:
         # Process as an image
         image = image.convert("RGB")
-        input_ids = tokenizer(question, return_tensors='pt').input_ids.to(device)
         image_tensor = model.image_preprocess(image)
         #Generate the answer
@@ -89,7 +92,6 @@ def predict_answer(image, video, question, max_tokens=100):
         answers = []
         for frame in frames:
             frame = Image.open(frame).convert("RGB")
-            input_ids = tokenizer(question, return_tensors='pt').input_ids.to(device)
             image_tensor = model.image_preprocess(frame)
             # Generate the answer

     return frames_png
 def predict_answer(image, video, question, max_tokens=100):
+    text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question}? ASSISTANT:"
+    input_ids = tokenizer(text, return_tensors='pt').input_ids.to(device)
     if image:
         # Process as an image
         image = image.convert("RGB")
         image_tensor = model.image_preprocess(image)
         #Generate the answer
         answers = []
         for frame in frames:
             frame = Image.open(frame).convert("RGB")
             image_tensor = model.image_preprocess(frame)
             # Generate the answer