Build

Paused

App Files Files Community

ManishThota commited on Feb 12, 2024

Commit

7ec133b

verified ·

1 Parent(s): 915e263

Create app.py

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+from PIL import Image
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Set default device to CUDA for GPU acceleration
+device = 'cuda' if torch.cuda.is_available() else "cpu"
+# torch.set_default_device("cuda")
+# Initialize the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("ManishThota/Sparrow").to(device)
+tokenizer = AutoTokenizer.from_pretrained("ManishThota/Sparrow", trust_remote_code=True)
+def predict_answer(image, question):
+    # Convert PIL image to RGB if not already
+    image = image.convert("RGB")
+    # # Format the text input for the model
+    # text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question} ASSISTANT:"
+    # Tokenize the text input
+    encoding = tokenizer(image, question, return_tensors='pt').to(device)
+    out = model.generate(**encoding)
+    # Preprocess the image for the model
+    generated_text = tokenizer.decode(out[0], skip_special_tokens=True)
+    # # Generate the answer
+    # output_ids = model.generate(
+    #     input_ids,
+    #     max_new_tokens=100,
+    #     images=image_tensor,
+    #     use_cache=True)[0]
+    # # Decode the generated tokens to get the answer
+    # answer = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
+    return generated_text
+def gradio_predict(image, question):
+    answer = predict_answer(image, question)
+    return answer
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=gradio_predict,
+    inputs=[gr.Image(type="pil", label="Upload or Drag an Image"), gr.Textbox(label="Question", placeholder="e.g. What are the colors of the bus in the image?", scale=4)],
+    outputs=gr.TextArea(label="Answer"),
+    title="Sparrow-based Visual Question Answering",
+    description="An interactive chat model that can answer questions about images.",
+)
+# Launch the app
+iface.queue().launch(debug=True)