import gradio as gr import io import os from PIL import Image, ImageDraw from anthropic import Anthropic from anthropic.types import TextBlock from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock max_tokens = 4096 import base64 model = 'claude-3-5-sonnet-20241022' system = """ * You are utilizing a Windows system with internet access. * The current date is Monday, November 18, 2024. """ def save_image_or_get_url(image, filename="processed_image.png"): if not os.path.isdir("static"): os.mkdir("static") filepath = os.path.join("static", filename) image.save(filepath) return filepath def draw_circle_on_image(image, center, radius=30): """ Draws a circle on the given image using a center point and radius. Parameters: image (PIL.Image): The image to draw on. center (tuple): A tuple (x, y) representing the center of the circle. radius (int): The radius of the circle. Returns: PIL.Image: The image with the circle drawn. """ if not isinstance(center, tuple) or len(center) != 2: raise ValueError("Center must be a tuple of two values (x, y).") if not isinstance(radius, (int, float)) or radius <= 0: raise ValueError("Radius must be a positive number.") # Calculate the bounding box for the circle bbox = [ center[0] - radius, center[1] - radius, # Top-left corner center[0] + radius, center[1] + radius # Bottom-right corner ] # Create a drawing context draw = ImageDraw.Draw(image) # Draw the circle draw.ellipse(bbox, outline="red", width=15) # Change outline color and width as needed return image def pil_image_to_base64(pil_image): # Save the PIL image to an in-memory buffer as a file-like object buffered = io.BytesIO() pil_image.save(buffered, format="PNG") # Specify format (e.g., PNG, JPEG) buffered.seek(0) # Rewind the buffer to the beginning # Encode the bytes from the buffer to Base64 image_data = base64.b64encode(buffered.getvalue()).decode("utf-8") return image_data # Function to simulate chatbot responses def chatbot_response(input_text, image, key, chat_history): if not key: return chat_history + [[input_text, "Please enter a valid key."]] if image is None: return chat_history + [[input_text, "Please upload an image."]] new_size = (1512, 982) # For example, resizing to 800x600 pixels image = image.resize(new_size) api_key =key client = Anthropic(api_key=api_key) messages = [{'role': 'user', 'content': [TextBlock(text=f'Look at my screenshot, {input_text}', type='text')]}, {'role': 'assistant', 'content': [BetaTextBlock( text="I'll help you check your screen, but first I need to take a screenshot to see what you're looking at.", type='text'), BetaToolUseBlock(id='toolu_01PSTVtavFgmx6ctaiSvacCB', input={'action': 'screenshot'}, name='computer', type='tool_use')]}] image_data = pil_image_to_base64(image) tool_res = {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_01PSTVtavFgmx6ctaiSvacCB', 'is_error': False, 'content': [{'type': 'image', 'source': {'type': 'base64', 'media_type': 'image/png', 'data': image_data}}]}]} messages.append(tool_res) params = [{'name': 'computer', 'type': 'computer_20241022', 'display_width_px': 1512, 'display_height_px': 982, 'display_number': None}, {'type': 'bash_20241022', 'name': 'bash'}, {'name': 'str_replace_editor', 'type': 'text_editor_20241022'}] raw_response = client.beta.messages.with_raw_response.create( max_tokens=max_tokens, messages=messages, model=model, system=system, tools=params, betas=["computer-use-2024-10-22"], temperature=0.0, ) response = raw_response.parse() scale_x = image.width / 1512 scale_y = image.height / 982 for r in response.content: if hasattr(r, 'text'): chat_history = chat_history + [[input_text, r.text]] if hasattr(r, 'input') and 'coordinate' in r.input: coordinate = r.input['coordinate'] new_image = draw_circle_on_image(image, (int(coordinate[0] * scale_x), int(coordinate[1] * scale_y))) # Save the image or encode it as a base64 string if needed image_url = save_image_or_get_url( new_image) # Define this function to save or generate the URL for the image # Include the image as part of the chat history image_html = f'Processed Image' chat_history = chat_history + [[None, (image_url,)]] return chat_history # Read the image and encode it in base64 # Simulated response response = f"Received input: {input_text}\nKey: {key}\nImage uploaded successfully!" return chat_history + [[input_text, response]] # Create the Gradio interface with gr.Blocks() as demo: with gr.Row(): with gr.Column(): image_input = gr.Image(label="Upload Image", type="pil", interactive=True) with gr.Column(): chatbot = gr.Chatbot(label="Chatbot Interaction", height=400) with gr.Row(): user_input = gr.Textbox(label="Type your message here", placeholder="Enter your message...") key_input = gr.Textbox(label="API Key", placeholder="Enter your key...", type="password") # Button to submit submit_button = gr.Button("Submit") # Initialize chat history chat_history = gr.State(value=[]) # Set interactions submit_button.click( fn=chatbot_response, inputs=[user_input, image_input, key_input, chat_history], outputs=[chatbot], ) # Launch the app demo.launch()