open-o1 / core /llms /utils.py
tikendraw's picture
second initial commit
22507c4
raw
history blame
2.94 kB
## supporting functions
import base64, io
from typing import Any, Union, List
from PIL import Image # Pillow image library
# thanks to https://community.openai.com/t/how-to-load-a-local-image-to-gpt4-vision-using-api/533090/5
def resize_image(image, max_dimension):
width, height = image.size
# Check if the image has a palette and convert it to true color mode
if image.mode == "P":
if "transparency" in image.info:
image = image.convert("RGBA")
else:
image = image.convert("RGB")
if width > max_dimension or height > max_dimension:
if width > height:
new_width = max_dimension
new_height = int(height * (max_dimension / width))
else:
new_height = max_dimension
new_width = int(width * (max_dimension / height))
image = image.resize((new_width, new_height), Image.LANCZOS)
return image
def convert_to_png(image):
with io.BytesIO() as output:
image.save(output, format="PNG")
return output.getvalue()
def create_image_content(image):
return {
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image}"}
}
def get_attr(attr:str, kwargs:dict, cls:Any=None) -> Any:
attribute = kwargs.get(attr, None) if kwargs else None
if (attribute is None) and (cls is not None):
if hasattr(cls, attr):
attribute = getattr(cls, attr)
return attribute
def process_image(image: Union[str, Image.Image], max_size: int) -> str:
if isinstance(image, str):
with Image.open(image) as img:
return process_pillow_image(img, max_size)
elif isinstance(image, Image.Image):
return process_pillow_image(image, max_size)
else:
raise ValueError("Input must be either a file path (str) or a Pillow Image object")
def process_pillow_image(image: Image.Image, max_size: int) -> str:
width, height = image.size
mimetype = image.get_format_mimetype() if hasattr(image, 'get_format_mimetype') else "image/png"
if mimetype == "image/png" and width <= max_size and height <= max_size:
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='PNG')
return base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
else:
resized_image = resize_image(image, max_size)
png_image = convert_to_png(resized_image)
return base64.b64encode(png_image).decode('utf-8')
def user_message_with_images(
user_msg_str: str,
images: List[Union[str, Image.Image]]|None = None,
max_size_px: int = 1024,
) -> dict:
if images is None:
images = []
base64_images = [process_image(img, max_size_px) for img in images]
content = [{"type": "text", "text": user_msg_str}]
content += [create_image_content(image) for image in base64_images]
return {"role": "user", "content": content}