import gradio as gr
import io
import os
from PIL import Image, ImageDraw
from anthropic import Anthropic
from anthropic.types import TextBlock
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
max_tokens = 4096
import base64
model = 'claude-3-5-sonnet-20241022'
system = """
* You are utilizing a Windows system with internet access.
* The current date is Monday, November 18, 2024.
"""
def save_image_or_get_url(image, filename="processed_image.png"):
if not os.path.isdir("static"):
os.mkdir("static")
filepath = os.path.join("static", filename)
image.save(filepath)
return filepath
def draw_circle_on_image(image, center, radius=30):
"""
Draws a circle on the given image using a center point and radius.
Parameters:
image (PIL.Image): The image to draw on.
center (tuple): A tuple (x, y) representing the center of the circle.
radius (int): The radius of the circle.
Returns:
PIL.Image: The image with the circle drawn.
"""
if not isinstance(center, tuple) or len(center) != 2:
raise ValueError("Center must be a tuple of two values (x, y).")
if not isinstance(radius, (int, float)) or radius <= 0:
raise ValueError("Radius must be a positive number.")
# Calculate the bounding box for the circle
bbox = [
center[0] - radius, center[1] - radius, # Top-left corner
center[0] + radius, center[1] + radius # Bottom-right corner
]
# Create a drawing context
draw = ImageDraw.Draw(image)
# Draw the circle
draw.ellipse(bbox, outline="red", width=15) # Change outline color and width as needed
return image
def pil_image_to_base64(pil_image):
# Save the PIL image to an in-memory buffer as a file-like object
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG") # Specify format (e.g., PNG, JPEG)
buffered.seek(0) # Rewind the buffer to the beginning
# Encode the bytes from the buffer to Base64
image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
return image_data
# Function to simulate chatbot responses
def chatbot_response(input_text, image, key, chat_history):
if not key:
return chat_history + [[input_text, "Please enter a valid key."]]
if image is None:
return chat_history + [[input_text, "Please upload an image."]]
new_size = (1512, 982) # For example, resizing to 800x600 pixels
image = image.resize(new_size)
api_key =key
client = Anthropic(api_key=api_key)
messages = [{'role': 'user', 'content': [TextBlock(text=f'Look at my screenshot, {input_text}', type='text')]},
{'role': 'assistant', 'content': [BetaTextBlock(
text="I'll help you check your screen, but first I need to take a screenshot to see what you're looking at.",
type='text'), BetaToolUseBlock(id='toolu_01PSTVtavFgmx6ctaiSvacCB',
input={'action': 'screenshot'}, name='computer',
type='tool_use')]}]
image_data = pil_image_to_base64(image)
tool_res = {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_01PSTVtavFgmx6ctaiSvacCB',
'is_error': False,
'content': [{'type': 'image',
'source': {'type': 'base64', 'media_type': 'image/png',
'data': image_data}}]}]}
messages.append(tool_res)
params = [{'name': 'computer', 'type': 'computer_20241022', 'display_width_px': 1512, 'display_height_px': 982,
'display_number': None}, {'type': 'bash_20241022', 'name': 'bash'},
{'name': 'str_replace_editor', 'type': 'text_editor_20241022'}]
raw_response = client.beta.messages.with_raw_response.create(
max_tokens=max_tokens,
messages=messages,
model=model,
system=system,
tools=params,
betas=["computer-use-2024-10-22"],
temperature=0.0,
)
response = raw_response.parse()
scale_x = image.width / 1512
scale_y = image.height / 982
for r in response.content:
if hasattr(r, 'text'):
chat_history = chat_history + [[input_text, r.text]]
if hasattr(r, 'input') and 'coordinate' in r.input:
coordinate = r.input['coordinate']
new_image = draw_circle_on_image(image, (int(coordinate[0] * scale_x), int(coordinate[1] * scale_y)))
# Save the image or encode it as a base64 string if needed
image_url = save_image_or_get_url(
new_image) # Define this function to save or generate the URL for the image
# Include the image as part of the chat history
image_html = f''
chat_history = chat_history + [[None, (image_url,)]]
return chat_history
# Read the image and encode it in base64
# Simulated response
response = f"Received input: {input_text}\nKey: {key}\nImage uploaded successfully!"
return chat_history + [[input_text, response]]
# Create the Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
image_input = gr.Image(label="Upload Image", type="pil", interactive=True)
with gr.Column():
chatbot = gr.Chatbot(label="Chatbot Interaction", height=400)
with gr.Row():
user_input = gr.Textbox(label="Type your message here", placeholder="Enter your message...")
key_input = gr.Textbox(label="API Key", placeholder="Enter your key...", type="password")
# Button to submit
submit_button = gr.Button("Submit")
# Initialize chat history
chat_history = gr.State(value=[])
# Set interactions
submit_button.click(
fn=chatbot_response,
inputs=[user_input, image_input, key_input, chat_history],
outputs=[chatbot],
)
# Launch the app
demo.launch()