morangold1's picture
Upload app.py with huggingface_hub
5050a46 verified
raw
history blame
4.2 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import json
# Last updated: 2023-12-30 10:00 AM
AVAILABLE_MODELS = {
"TinyLlama-1.1B": {
"base_model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"adapter": "morangold1/vacation-rental-assistant"
},
"DeepSeek-7B": {
"base_model": "deepseek-ai/deepseek-llm-7b-chat",
"adapter": "morangold1/vacation-rental-assistant-deepseek"
}
}
def create_system_prompt():
"""Create the system prompt for the model."""
return """You are a vacation rental property assistant. Your role is to help guests with their inquiries, handle maintenance requests, and provide information about the property and local area. Please provide detailed, accurate responses in a professional and friendly manner.
Your responses should follow this structured format:
1. Understand and analyze the guest's request
2. Provide clear and helpful information
3. Take appropriate actions (e.g., scheduling maintenance, updating reservations)
4. Offer additional assistance if needed
Always maintain a helpful and professional tone while ensuring guest satisfaction and property care."""
def load_model(model_name="TinyLlama-1.1B"):
"""Load the model and tokenizer."""
try:
model_config = AVAILABLE_MODELS[model_name]
# Load base model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained(
model_config["base_model"],
torch_dtype=torch.float32,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_config["base_model"])
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load and apply LoRA weights
model = PeftModel.from_pretrained(base_model, model_config["adapter"])
model.eval()
return model, tokenizer
except Exception as e:
print(f"Error loading model: {str(e)}")
raise
def create_prompt(user_message, system_prompt):
"""Format the prompt for the model."""
return f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant\n"
def generate_response(model, tokenizer, prompt, max_length=2048):
"""Generate a response from the model."""
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
outputs = model.generate(
**inputs,
max_new_tokens=1024,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = response.replace(prompt, "").strip()
return response
def chat(message, history, model_name):
"""Chat function for Gradio interface."""
system_prompt = create_system_prompt()
model, tokenizer = load_model(model_name)
prompt = create_prompt(message, system_prompt)
response = generate_response(model, tokenizer, prompt)
return response
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as iface:
gr.Markdown("# Vacation Rental Assistant")
gr.Markdown("Ask questions about your vacation rental property, make requests, or get local information.")
with gr.Row():
model_dropdown = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value="TinyLlama-1.1B",
label="Select Model",
info="Choose the model to use for responses"
)
chatbot = gr.ChatInterface(
fn=lambda msg, history: chat(msg, history, model_dropdown.value),
examples=[
"What time is check-in?",
"Is early check-in available?",
"The AC isn't working properly, can you help?",
"What amenities are available?",
"Is there a grocery store nearby?",
]
)
# Launch the interface
if __name__ == "__main__":
iface.launch()