Spaces:
Sleeping
Sleeping
import gradio as gr | |
import io | |
import os | |
from PIL import Image, ImageDraw | |
from anthropic import Anthropic | |
from anthropic.types import TextBlock | |
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock | |
max_tokens = 4096 | |
import base64 | |
model = 'claude-3-5-sonnet-20241022' | |
system = """<SYSTEM_CAPABILITY> | |
* You are utilizing a Windows system with internet access. | |
* The current date is Monday, November 18, 2024. | |
</SYSTEM_CAPABILITY>""" | |
def save_image_or_get_url(image, filename="processed_image.png"): | |
if not os.path.isdir("static"): | |
os.mkdir("static") | |
filepath = os.path.join("static", filename) | |
image.save(filepath) | |
return filepath | |
def draw_circle_on_image(image, center, radius=30): | |
""" | |
Draws a circle on the given image using a center point and radius. | |
Parameters: | |
image (PIL.Image): The image to draw on. | |
center (tuple): A tuple (x, y) representing the center of the circle. | |
radius (int): The radius of the circle. | |
Returns: | |
PIL.Image: The image with the circle drawn. | |
""" | |
if not isinstance(center, tuple) or len(center) != 2: | |
raise ValueError("Center must be a tuple of two values (x, y).") | |
if not isinstance(radius, (int, float)) or radius <= 0: | |
raise ValueError("Radius must be a positive number.") | |
# Calculate the bounding box for the circle | |
bbox = [ | |
center[0] - radius, center[1] - radius, # Top-left corner | |
center[0] + radius, center[1] + radius # Bottom-right corner | |
] | |
# Create a drawing context | |
draw = ImageDraw.Draw(image) | |
# Draw the circle | |
draw.ellipse(bbox, outline="red", width=15) # Change outline color and width as needed | |
return image | |
def pil_image_to_base64(pil_image): | |
# Save the PIL image to an in-memory buffer as a file-like object | |
buffered = io.BytesIO() | |
pil_image.save(buffered, format="PNG") # Specify format (e.g., PNG, JPEG) | |
buffered.seek(0) # Rewind the buffer to the beginning | |
# Encode the bytes from the buffer to Base64 | |
image_data = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
return image_data | |
# Function to simulate chatbot responses | |
def chatbot_response(input_text, image, key, chat_history): | |
if not key: | |
return chat_history + [[input_text, "Please enter a valid key."]] | |
if image is None: | |
return chat_history + [[input_text, "Please upload an image."]] | |
new_size = (1512, 982) # For example, resizing to 800x600 pixels | |
image = image.resize(new_size) | |
api_key =key | |
client = Anthropic(api_key=api_key) | |
messages = [{'role': 'user', 'content': [TextBlock(text=f'Look at my screenshot, {input_text}', type='text')]}, | |
{'role': 'assistant', 'content': [BetaTextBlock( | |
text="I'll help you check your screen, but first I need to take a screenshot to see what you're looking at.", | |
type='text'), BetaToolUseBlock(id='toolu_01PSTVtavFgmx6ctaiSvacCB', | |
input={'action': 'screenshot'}, name='computer', | |
type='tool_use')]}] | |
image_data = pil_image_to_base64(image) | |
tool_res = {'role': 'user', 'content': [{'type': 'tool_result', 'tool_use_id': 'toolu_01PSTVtavFgmx6ctaiSvacCB', | |
'is_error': False, | |
'content': [{'type': 'image', | |
'source': {'type': 'base64', 'media_type': 'image/png', | |
'data': image_data}}]}]} | |
messages.append(tool_res) | |
params = [{'name': 'computer', 'type': 'computer_20241022', 'display_width_px': 1512, 'display_height_px': 982, | |
'display_number': None}, {'type': 'bash_20241022', 'name': 'bash'}, | |
{'name': 'str_replace_editor', 'type': 'text_editor_20241022'}] | |
raw_response = client.beta.messages.with_raw_response.create( | |
max_tokens=max_tokens, | |
messages=messages, | |
model=model, | |
system=system, | |
tools=params, | |
betas=["computer-use-2024-10-22"], | |
temperature=0.0, | |
) | |
response = raw_response.parse() | |
scale_x = image.width / 1512 | |
scale_y = image.height / 982 | |
for r in response.content: | |
if hasattr(r, 'text'): | |
chat_history = chat_history + [[input_text, r.text]] | |
if hasattr(r, 'input') and 'coordinate' in r.input: | |
coordinate = r.input['coordinate'] | |
new_image = draw_circle_on_image(image, (int(coordinate[0] * scale_x), int(coordinate[1] * scale_y))) | |
# Save the image or encode it as a base64 string if needed | |
image_url = save_image_or_get_url( | |
new_image) # Define this function to save or generate the URL for the image | |
# Include the image as part of the chat history | |
image_html = f'<img src="{image_url}" alt="Processed Image" style="max-width: 100%; max-height: 200px;">' | |
chat_history = chat_history + [[None, (image_url,)]] | |
return chat_history | |
# Read the image and encode it in base64 | |
# Simulated response | |
response = f"Received input: {input_text}\nKey: {key}\nImage uploaded successfully!" | |
return chat_history + [[input_text, response]] | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image(label="Upload Image", type="pil", interactive=True) | |
with gr.Column(): | |
chatbot = gr.Chatbot(label="Chatbot Interaction", height=400) | |
with gr.Row(): | |
user_input = gr.Textbox(label="Type your message here", placeholder="Enter your message...") | |
key_input = gr.Textbox(label="API Key", placeholder="Enter your key...", type="password") | |
# Button to submit | |
submit_button = gr.Button("Submit") | |
# Initialize chat history | |
chat_history = gr.State(value=[]) | |
# Set interactions | |
submit_button.click( | |
fn=chatbot_response, | |
inputs=[user_input, image_input, key_input, chat_history], | |
outputs=[chatbot], | |
) | |
# Launch the app | |
demo.launch() | |