import gradio as gr from lavague.ActionEngine import ActionEngine from lavague.defaults import DefaultLocalLLM, DefaultLLM from llama_index.llms.huggingface import HuggingFaceInferenceAPI import base64 import requests import uuid import re MAX_CHARS = 1500 USER_ID = str(uuid.uuid4()) SERVER_URL = "https://lavague.mithrilsecurity.io" # Use this action_engine instead to have a local inference # action_engine = ActionEngine(llm=DefaultLocalLLM()) action_engine = ActionEngine() title = """
Redefining internet surfing by transforming natural language instructions into seamless browser interactions.
Success!
""" full_code += code except Exception as e: output = f"Error in code execution: {str(e)}" status = """Failure! Open the Debug tab for more information
""" return output, code, html, status, full_code, url def update_image_display(img, url): r = send_request(url) f = open("screenshot.png", "wb") scr = base64.b64decode(r["result"]) f.write(scr) return "screenshot.png", url def show_processing_message(): return "Processing..." def create_demo(base_url, instructions): with gr.Blocks() as demo: with gr.Tab("LaVague"): with gr.Row(): gr.HTML(title) with gr.Row(): url_input = gr.Textbox(value=base_url, label="Enter URL and press 'Enter' to load the page.") with gr.Row(): with gr.Column(scale=7): image_display = gr.Image(label="Browser", interactive=False) with gr.Column(scale=3): with gr.Accordion(label="Full code", open=False): full_code = gr.Code(value="", language="python", interactive=False) code_display = gr.Code(label="Generated code", language="python", lines=5, interactive=True) status_html = gr.HTML() with gr.Row(): with gr.Column(scale=8): text_area = gr.Textbox(label="Enter instructions and press 'Enter' to generate code.") gr.Examples(examples=instructions, inputs=text_area) with gr.Tab("Debug"): with gr.Row(): with gr.Column(): log_display = gr.Textbox(interactive=False, lines=20) with gr.Column(): source_display = gr.Code(language="html", label="Retrieved nodes", interactive=False, lines=20) with gr.Row(): with gr.Accordion(label="Full HTML", open=False): full_html = gr.Code(language="html", label="Full HTML", interactive=False, lines=20) # Linking components url_input.submit(process_url, inputs=url_input, outputs=image_display) text_area.submit(show_processing_message, outputs=[status_html]).then( process_instruction, inputs=[text_area, url_input], outputs=[code_display, source_display] ).then( exec_code, inputs=[code_display, source_display, full_code, url_input], outputs=[log_display, code_display, full_html, status_html, full_code, url_input] ).then( update_image_display, inputs=[image_display, url_input], outputs=[image_display, url_input] ) demo.launch(share=True, debug=True) base_url = "https://huggingface.co/" instructions = ["Click on the Datasets item on the menu, between Models and Spaces", "Click on the search bar 'Filter by name', type 'The Stack', and press 'Enter'", "Scroll by 500 pixels",] create_demo(base_url, instructions)