Eladlev's picture
Upload app.py
94b4a05 verified
import gradio as gr
import io
import os
from PIL import Image, ImageDraw
from anthropic import Anthropic
from anthropic.types import TextBlock
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
max_tokens = 4096
import base64
model = 'claude-3-5-sonnet-20241022'
system = """<SYSTEM_CAPABILITY>
* You are utilizing a Windows system with internet access.
* The current date is Monday, November 18, 2024.
</SYSTEM_CAPABILITY>"""
def save_image_or_get_url(image, filename="processed_image.png"):
if not os.path.isdir("static"):
os.mkdir("static")
filepath = os.path.join("static", filename)
image.save(filepath)
return filepath
def draw_circle_on_image(image, center, radius=30):
"""
Draws a circle on the given image using a center point and radius.
Parameters:
image (PIL.Image): The image to draw on.
center (tuple): A tuple (x, y) representing the center of the circle.
radius (int): The radius of the circle.
Returns:
PIL.Image: The image with the circle drawn.
"""
if not isinstance(center, tuple) or len(center) != 2:
raise ValueError("Center must be a tuple of two values (x, y).")
if not isinstance(radius, (int, float)) or radius <= 0:
raise ValueError("Radius must be a positive number.")
# Calculate the bounding box for the circle
bbox = [
center[0] - radius, center[1] - radius, # Top-left corner
center[0] + radius, center[1] + radius # Bottom-right corner
]
# Create a drawing context
draw = ImageDraw.Draw(image)
# Draw the circle
draw.ellipse(bbox, outline="red", width=15) # Change outline color and width as needed
return image
def pil_image_to_base64(pil_image):
# Save the PIL image to an in-memory buffer as a file-like object
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG") # Specify format (e.g., PNG, JPEG)
buffered.seek(0) # Rewind the buffer to the beginning
# Encode the bytes from the buffer to Base64
image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
return image_data
# Function to simulate chatbot responses
def chatbot_response(input_text, image, key, chat_history):
if not key:
return chat_history + [[input_text, "Please enter a valid key."]]
if image is None:
return chat_history + [[input_text, "Please upload an image."]]
new_size = (1512, 982) # For example, resizing to 800x600 pixels
image = image.resize(new_size)
api_key =key
client = Anthropic(api_key=api_key)
messages = [{'role': 'user', 'content': [TextBlock(text=f'Look at my screenshot, {input_text}', type='text')]},
{'role': 'assistant', 'content': [BetaTextBlock(
text="I'll help you check your screen, but first I need to take a screenshot to see what you're looking at.",
type='text'), BetaToolUseBlock(id='toolu_01PSTVtavFgmx6ctaiSvacCB',
input={'action': 'screenshot'}, name='computer',
type='tool_use')]}]
image_data = pil_image_to_base64(image)
tool_res = {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_01PSTVtavFgmx6ctaiSvacCB',
'is_error': False,
'content': [{'type': 'image',
'source': {'type': 'base64', 'media_type': 'image/png',
'data': image_data}}]}]}
messages.append(tool_res)
params = [{'name': 'computer', 'type': 'computer_20241022', 'display_width_px': 1512, 'display_height_px': 982,
'display_number': None}, {'type': 'bash_20241022', 'name': 'bash'},
{'name': 'str_replace_editor', 'type': 'text_editor_20241022'}]
raw_response = client.beta.messages.with_raw_response.create(
max_tokens=max_tokens,
messages=messages,
model=model,
system=system,
tools=params,
betas=["computer-use-2024-10-22"],
temperature=0.0,
)
response = raw_response.parse()
scale_x = image.width / 1512
scale_y = image.height / 982
for r in response.content:
if hasattr(r, 'text'):
chat_history = chat_history + [[input_text, r.text]]
if hasattr(r, 'input') and 'coordinate' in r.input:
coordinate = r.input['coordinate']
new_image = draw_circle_on_image(image, (int(coordinate[0] * scale_x), int(coordinate[1] * scale_y)))
# Save the image or encode it as a base64 string if needed
image_url = save_image_or_get_url(
new_image) # Define this function to save or generate the URL for the image
# Include the image as part of the chat history
image_html = f'<img src="{image_url}" alt="Processed Image" style="max-width: 100%; max-height: 200px;">'
chat_history = chat_history + [[None, (image_url,)]]
return chat_history
# Read the image and encode it in base64
# Simulated response
response = f"Received input: {input_text}\nKey: {key}\nImage uploaded successfully!"
return chat_history + [[input_text, response]]
# Create the Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
image_input = gr.Image(label="Upload Image", type="pil", interactive=True)
with gr.Column():
chatbot = gr.Chatbot(label="Chatbot Interaction", height=400)
with gr.Row():
user_input = gr.Textbox(label="Type your message here", placeholder="Enter your message...")
key_input = gr.Textbox(label="API Key", placeholder="Enter your key...", type="password")
# Button to submit
submit_button = gr.Button("Submit")
# Initialize chat history
chat_history = gr.State(value=[])
# Set interactions
submit_button.click(
fn=chatbot_response,
inputs=[user_input, image_input, key_input, chat_history],
outputs=[chatbot],
)
# Launch the app
demo.launch()