sitammeur commited on
Commit
7f731e7
·
verified ·
1 Parent(s): bb56e07

Upload 12 files

Browse files
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Installing the latest version of the transformers library
2
+ import os
3
+ os.system("pip install ./transformers-4.47.0.dev0-py3-none-any.whl")
4
+
5
+ # Importing the requirements
6
+ import warnings
7
+ warnings.filterwarnings("ignore")
8
+
9
+ import gradio as gr
10
+ from src.app.response import describe_image
11
+
12
+
13
+ # Image, text query, and input parameters
14
+ image = gr.Image(type="pil", label="Image")
15
+ text = gr.Textbox(label="Question", placeholder="Enter your question here")
16
+ max_new_tokens = gr.Slider(
17
+ minimum=20, maximum=160, step=1, value=80, step=10, label="Max Tokens"
18
+ )
19
+
20
+ # Output for the interface
21
+ answer = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)
22
+
23
+ # Examples for the interface
24
+ examples = [
25
+ [
26
+ "images/cat.jpg",
27
+ "How many cats are there?",
28
+ 80,
29
+ ],
30
+ [
31
+ "images/dog.jpg",
32
+ "What color is the dog?",
33
+ 80,
34
+ ],
35
+ [
36
+ "images/bird.jpg",
37
+ "What is the bird doing?",
38
+ 160,
39
+ ],
40
+ ]
41
+
42
+ # Title, description, and article for the interface
43
+ title = "Visual Question Answering"
44
+ description = "Gradio Demo for the PaliGemma 2 Vision Language Understanding and Generation model. This model can answer questions about images in natural language. To use it, upload your image, type a question, select associated parameters, use the default values, click 'Submit', or click one of the examples to load them. You can read more at the links below."
45
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2412.03555' target='_blank'>Model Paper</a> | <a href='https://huggingface.co/google/paligemma2-3b-ft-docci-448' target='_blank'>Model Page</a></p>"
46
+
47
+
48
+ # Launch the interface
49
+ interface = gr.Interface(
50
+ fn=describe_image,
51
+ inputs=[image, text, max_new_tokens],
52
+ outputs=answer,
53
+ examples=examples,
54
+ cache_examples=True,
55
+ cache_mode="lazy",
56
+ title=title,
57
+ description=description,
58
+ article=article,
59
+ theme="Nymbo/Nymbo_Theme",
60
+ flagging_mode="never",
61
+ )
62
+ interface.launch(debug=False)
images/bird.jpg ADDED
images/cat.jpg ADDED
images/dog.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ spaces
3
+ gradio
src/__init__.py ADDED
File without changes
src/app/__init__.py ADDED
File without changes
src/app/model.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import sys
3
+ from typing import Any
4
+ import torch
5
+ from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
6
+
7
+ # Local imports
8
+ from src.logger import logging
9
+ from src.exception import CustomExceptionHandling
10
+
11
+
12
+ def load_model_and_processor(model_name: str, device: str) -> Any:
13
+ """
14
+ Load the model and processor.
15
+
16
+ Args:
17
+ - model_name (str): The name of the model to load.
18
+ - device (str): The device to load the model onto.
19
+
20
+ Returns:
21
+ - model: The loaded model.
22
+ - processor: The loaded processor.
23
+ """
24
+ try:
25
+ # Load the model and processor
26
+ model = (
27
+ PaliGemmaForConditionalGeneration.from_pretrained(
28
+ model_name, torch_dtype=torch.bfloat16
29
+ )
30
+ .eval()
31
+ .to(device)
32
+ )
33
+ processor = PaliGemmaProcessor.from_pretrained(model_name)
34
+
35
+ # Log the successful loading of the model and processor
36
+ logging.info("Model and processor loaded successfully.")
37
+
38
+ # Return the model and processor
39
+ return model, processor
40
+
41
+ # Handle exceptions that may occur during model and processor loading
42
+ except Exception as e:
43
+ # Custom exception handling
44
+ raise CustomExceptionHandling(e, sys) from e
src/app/response.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Necessary imports
2
+ import sys
3
+ import PIL.Image
4
+ import torch
5
+ import gradio as gr
6
+ import spaces
7
+
8
+ # Local imports
9
+ from src.config import device, model_name, sampling
10
+ from src.app.model import load_model_and_tokenizer
11
+ from src.logger import logging
12
+ from src.exception import CustomExceptionHandling
13
+
14
+
15
+ # Model, tokenizer and processor
16
+ model, tokenizer, processor = load_model_and_tokenizer(model_name, device)
17
+
18
+
19
+ @spaces.GPU
20
+ def describe_image(text: str, image: PIL.Image.Image, max_new_tokens: int) -> str:
21
+ """
22
+ Generates a response based on the given text and image using the model.
23
+
24
+ Args:
25
+ - text (str): The input text to be processed.
26
+ - image (PIL.Image.Image): The input image to be processed.
27
+ - max_new_tokens (int): The maximum number of new tokens to generate.
28
+
29
+ Returns:
30
+ str: The generated response text.
31
+ """
32
+ try:
33
+ # Check if image or text is None
34
+ if not image or not text:
35
+ gr.Warning("Please provide an image and a question.")
36
+
37
+ # Prepare the inputs
38
+ text = "answer en " + text
39
+ inputs = processor(text=text, images=image, return_tensors="pt").to(device)
40
+
41
+ # Generate the response
42
+ with torch.inference_mode():
43
+ generated_ids = model.generate(
44
+ **inputs, max_new_tokens=max_new_tokens, do_sample=sampling
45
+ )
46
+
47
+ # Decode the generated response
48
+ result = processor.batch_decode(generated_ids, skip_special_tokens=True)
49
+
50
+ # Log the successful generation of the answer
51
+ logging.info("Answer generated successfully.")
52
+
53
+ # Return the generated response
54
+ return result[0][len(text) :].lstrip("\n")
55
+
56
+ # Handle exceptions that may occur during answer generation
57
+ except Exception as e:
58
+ # Custom exception handling
59
+ raise CustomExceptionHandling(e, sys) from e
src/config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Model settings
2
+ device = "cuda"
3
+ model_name = "google/paligemma2-3b-ft-docci-448"
4
+
5
+ # Decoding settings
6
+ sampling = True
src/exception.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module defines a custom exception handling class and a function to get error message with details of the error.
3
+ """
4
+
5
+ # Standard Library
6
+ import sys
7
+
8
+ # Local imports
9
+ from src.logger import logging
10
+
11
+
12
+ # Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
13
+ def get_error_message(error, error_detail: sys):
14
+ """
15
+ Get error message with details of the error.
16
+
17
+ Args:
18
+ - error (Exception): The error that occurred.
19
+ - error_detail (sys): The details of the error.
20
+
21
+ Returns:
22
+ str: A string containing the error message along with the file name and line number where the error occurred.
23
+ """
24
+ _, _, exc_tb = error_detail.exc_info()
25
+
26
+ # Get error details
27
+ file_name = exc_tb.tb_frame.f_code.co_filename
28
+ return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
29
+ file_name, exc_tb.tb_lineno, str(error)
30
+ )
31
+
32
+
33
+ # Custom Exception Handling Class Definition
34
+ class CustomExceptionHandling(Exception):
35
+ """
36
+ Custom Exception Handling:
37
+ This class defines a custom exception that can be raised when an error occurs in the program.
38
+ It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
39
+ """
40
+
41
+ # Constructor
42
+ def __init__(self, error_message, error_detail: sys):
43
+ """Initialize the exception"""
44
+ super().__init__(error_message)
45
+
46
+ self.error_message = get_error_message(error_message, error_detail=error_detail)
47
+
48
+ def __str__(self):
49
+ """String representation of the exception"""
50
+ return self.error_message
src/logger.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the required modules
2
+ import os
3
+ import logging
4
+ from datetime import datetime
5
+
6
+ # Creating a log file with the current date and time as the name of the file
7
+ LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
8
+
9
+ # Creating a logs folder if it does not exist
10
+ logs_path = os.path.join(os.getcwd(), "logs", LOG_FILE)
11
+ os.makedirs(logs_path, exist_ok=True)
12
+
13
+ # Setting the log file path and the log level
14
+ LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
15
+
16
+ # Configuring the logger
17
+ logging.basicConfig(
18
+ filename=LOG_FILE_PATH,
19
+ format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
20
+ level=logging.INFO,
21
+ )