Spaces:

sitammeur
/

paligemma2-docci

Running on Zero

App Files Files Community

sitammeur commited on Dec 5, 2024

Commit

7f731e7

•

1 Parent(s): bb56e07

Upload 12 files

Browse files

Files changed (12) hide show

app.py +62 -0
images/bird.jpg +0 -0
images/cat.jpg +0 -0
images/dog.jpg +0 -0
requirements.txt +3 -0
src/__init__.py +0 -0
src/app/__init__.py +0 -0
src/app/model.py +44 -0
src/app/response.py +59 -0
src/config.py +6 -0
src/exception.py +50 -0
src/logger.py +21 -0

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# Installing the latest version of the transformers library
+import os
+os.system("pip install ./transformers-4.47.0.dev0-py3-none-any.whl")
+# Importing the requirements
+import warnings
+warnings.filterwarnings("ignore")
+import gradio as gr
+from src.app.response import describe_image
+# Image, text query, and input parameters
+image = gr.Image(type="pil", label="Image")
+text = gr.Textbox(label="Question", placeholder="Enter your question here")
+max_new_tokens = gr.Slider(
+    minimum=20, maximum=160, step=1, value=80, step=10, label="Max Tokens"
+)
+# Output for the interface
+answer = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)
+# Examples for the interface
+examples = [
+    [
+        "images/cat.jpg",
+        "How many cats are there?",
+        80,
+    ],
+    [
+        "images/dog.jpg",
+        "What color is the dog?",
+        80,
+    ],
+    [
+        "images/bird.jpg",
+        "What is the bird doing?",
+        160,
+    ],
+]
+# Title, description, and article for the interface
+title = "Visual Question Answering"
+description = "Gradio Demo for the PaliGemma 2 Vision Language Understanding and Generation model. This model can answer questions about images in natural language. To use it, upload your image, type a question, select associated parameters, use the default values, click 'Submit', or click one of the examples to load them. You can read more at the links below."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2412.03555' target='_blank'>Model Paper</a> | <a href='https://huggingface.co/google/paligemma2-3b-ft-docci-448' target='_blank'>Model Page</a></p>"
+# Launch the interface
+interface = gr.Interface(
+    fn=describe_image,
+    inputs=[image, text, max_new_tokens],
+    outputs=answer,
+    examples=examples,
+    cache_examples=True,
+    cache_mode="lazy",
+    title=title,
+    description=description,
+    article=article,
+    theme="Nymbo/Nymbo_Theme",
+    flagging_mode="never",
+)
+interface.launch(debug=False)

images/bird.jpg ADDED Viewed

images/cat.jpg ADDED Viewed

images/dog.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+spaces
+gradio

src/__init__.py ADDED Viewed

File without changes

src/app/__init__.py ADDED Viewed

File without changes

src/app/model.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Necessary imports
+import sys
+from typing import Any
+import torch
+from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
+# Local imports
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+def load_model_and_processor(model_name: str, device: str) -> Any:
+    """
+    Load the model and processor.
+    Args:
+        - model_name (str): The name of the model to load.
+        - device (str): The device to load the model onto.
+    Returns:
+        - model: The loaded model.
+        - processor: The loaded processor.
+    """
+    try:
+        # Load the model and processor
+        model = (
+            PaliGemmaForConditionalGeneration.from_pretrained(
+                model_name, torch_dtype=torch.bfloat16
+            )
+            .eval()
+            .to(device)
+        )
+        processor = PaliGemmaProcessor.from_pretrained(model_name)
+        # Log the successful loading of the model and processor
+        logging.info("Model and processor loaded successfully.")
+        # Return the model and processor
+        return model, processor
+    # Handle exceptions that may occur during model and processor loading
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e

src/app/response.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# Necessary imports
+import sys
+import PIL.Image
+import torch
+import gradio as gr
+import spaces
+# Local imports
+from src.config import device, model_name, sampling
+from src.app.model import load_model_and_tokenizer
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+# Model, tokenizer and processor
+model, tokenizer, processor = load_model_and_tokenizer(model_name, device)
+@spaces.GPU
+def describe_image(text: str, image: PIL.Image.Image, max_new_tokens: int) -> str:
+    """
+    Generates a response based on the given text and image using the model.
+    Args:
+        - text (str): The input text to be processed.
+        - image (PIL.Image.Image): The input image to be processed.
+        - max_new_tokens (int): The maximum number of new tokens to generate.
+    Returns:
+        str: The generated response text.
+    """
+    try:
+        # Check if image or text is None
+        if not image or not text:
+            gr.Warning("Please provide an image and a question.")
+        # Prepare the inputs
+        text = "answer en " + text
+        inputs = processor(text=text, images=image, return_tensors="pt").to(device)
+        # Generate the response
+        with torch.inference_mode():
+            generated_ids = model.generate(
+                **inputs, max_new_tokens=max_new_tokens, do_sample=sampling
+            )
+        # Decode the generated response
+        result = processor.batch_decode(generated_ids, skip_special_tokens=True)
+        # Log the successful generation of the answer
+        logging.info("Answer generated successfully.")
+        # Return the generated response
+        return result[0][len(text) :].lstrip("\n")
+    # Handle exceptions that may occur during answer generation
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e

src/config.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Model settings
+device = "cuda"
+model_name = "google/paligemma2-3b-ft-docci-448"
+# Decoding settings
+sampling = True

src/exception.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""
+This module defines a custom exception handling class and a function to get error message with details of the error.
+"""
+# Standard Library
+import sys
+# Local imports
+from src.logger import logging
+# Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
+def get_error_message(error, error_detail: sys):
+    """
+    Get error message with details of the error.
+    Args:
+        - error (Exception): The error that occurred.
+        - error_detail (sys): The details of the error.
+    Returns:
+        str: A string containing the error message along with the file name and line number where the error occurred.
+    """
+    _, _, exc_tb = error_detail.exc_info()
+    # Get error details
+    file_name = exc_tb.tb_frame.f_code.co_filename
+    return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
+        file_name, exc_tb.tb_lineno, str(error)
+    )
+# Custom Exception Handling Class Definition
+class CustomExceptionHandling(Exception):
+    """
+    Custom Exception Handling:
+        This class defines a custom exception that can be raised when an error occurs in the program.
+        It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
+    """
+    # Constructor
+    def __init__(self, error_message, error_detail: sys):
+        """Initialize the exception"""
+        super().__init__(error_message)
+        self.error_message = get_error_message(error_message, error_detail=error_detail)
+    def __str__(self):
+        """String representation of the exception"""
+        return self.error_message

src/logger.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Importing the required modules
+import os
+import logging
+from datetime import datetime
+# Creating a log file with the current date and time as the name of the file
+LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
+# Creating a logs folder if it does not exist
+logs_path = os.path.join(os.getcwd(), "logs", LOG_FILE)
+os.makedirs(logs_path, exist_ok=True)
+# Setting the log file path and the log level
+LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
+# Configuring the logger
+logging.basicConfig(
+    filename=LOG_FILE_PATH,
+    format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
+    level=logging.INFO,
+)