Spaces:
Running
Running
import json | |
import base64 | |
import os | |
from anthropic import AnthropicBedrock | |
from PIL import Image | |
class MultimodalInputHandler: | |
def __init__(self, text, image=None): | |
self.text = text | |
self.image = image | |
self.client = AnthropicBedrock( | |
aws_region='us-west-2' | |
) | |
async def handleInput(self): | |
if self.image: | |
# Determine the format of the image | |
if self.image.endswith(".jpg"): | |
formatType = "image/jpeg" | |
elif self.image.endswith(".png"): | |
formatType = "image/png" | |
elif self.image.endswith(".gif"): | |
formatType = "image/gif" | |
elif self.image.endswith(".webp"): | |
formatType = "image/webp" | |
# Encode the image as base64 | |
b64EncodedImage = base64.b64encode(open(self.image, "rb").read()) | |
# Send the image and text to the Anthropic API | |
with self.client.messages.stream( | |
model="anthropic.claude-3-opus-20240229-v1:0", | |
max_tokens=5000, | |
messages=[{ | |
'role': 'user', | |
'content': [ | |
{ | |
"type": "image", | |
"source": { | |
"type": "base64", | |
"media_type": formatType, | |
"data": b64EncodedImage.decode("utf-8") | |
} | |
}, | |
{ | |
"type": "text", | |
"text": self.text | |
} | |
] | |
}] | |
) as stream: | |
for text in stream.text_stream: | |
yield text | |
else: | |
# Send the text to the Anthropic API | |
with self.client.messages.stream( | |
model="anthropic.claude-3-sonnet-20240229-v1:0", | |
max_tokens=5000, | |
messages=[{ | |
'role': 'user', | |
'content': self.text | |
}] | |
) as stream: | |
for text in stream.text_stream: | |
yield text | |
# MultimodalInputHandler = MultimodalInputHandler("What is this image?", "path/to/my/file") | |
# print(MultimodalInputHandler.handleInput()) |