import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import json # Last updated: 2023-12-30 10:00 AM AVAILABLE_MODELS = { "TinyLlama-1.1B": { "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "adapter": "morangold1/vacation-rental-assistant" }, "DeepSeek-7B": { "base_model": "deepseek-ai/deepseek-llm-7b-chat", "adapter": "morangold1/vacation-rental-assistant-deepseek" } } def create_system_prompt(): """Create the system prompt for the model.""" return """You are a vacation rental property assistant. Your role is to help guests with their inquiries, handle maintenance requests, and provide information about the property and local area. Please provide detailed, accurate responses in a professional and friendly manner. Your responses should follow this structured format: 1. Understand and analyze the guest's request 2. Provide clear and helpful information 3. Take appropriate actions (e.g., scheduling maintenance, updating reservations) 4. Offer additional assistance if needed Always maintain a helpful and professional tone while ensuring guest satisfaction and property care.""" def load_model(model_name="TinyLlama-1.1B"): """Load the model and tokenizer.""" try: model_config = AVAILABLE_MODELS[model_name] # Load base model and tokenizer base_model = AutoModelForCausalLM.from_pretrained( model_config["base_model"], torch_dtype=torch.float32, device_map="auto", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained(model_config["base_model"]) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load and apply LoRA weights model = PeftModel.from_pretrained(base_model, model_config["adapter"]) model.eval() return model, tokenizer except Exception as e: print(f"Error loading model: {str(e)}") raise def create_prompt(user_message, system_prompt): """Format the prompt for the model.""" return f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant\n" def generate_response(model, tokenizer, prompt, max_length=2048): """Generate a response from the model.""" inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length) inputs = {k: v.to(model.device) for k, v in inputs.items()} outputs = model.generate( **inputs, max_new_tokens=1024, temperature=0.7, top_p=0.9, do_sample=True, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.replace(prompt, "").strip() return response def chat(message, history, model_name): """Chat function for Gradio interface.""" system_prompt = create_system_prompt() model, tokenizer = load_model(model_name) prompt = create_prompt(message, system_prompt) response = generate_response(model, tokenizer, prompt) return response # Create Gradio interface with gr.Blocks(theme=gr.themes.Soft()) as iface: gr.Markdown("# Vacation Rental Assistant") gr.Markdown("Ask questions about your vacation rental property, make requests, or get local information.") with gr.Row(): model_dropdown = gr.Dropdown( choices=list(AVAILABLE_MODELS.keys()), value="TinyLlama-1.1B", label="Select Model", info="Choose the model to use for responses" ) chatbot = gr.ChatInterface( fn=lambda msg, history: chat(msg, history, model_dropdown.value), examples=[ "What time is check-in?", "Is early check-in available?", "The AC isn't working properly, can you help?", "What amenities are available?", "Is there a grocery store nearby?", ] ) # Launch the interface if __name__ == "__main__": iface.launch()