import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the model and tokenizer
model_name = "models/meta-llama/Llama-3.3-70B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

def chat_with_model(user_input):
    # Encode the user input and generate a response
    inputs = tokenizer.encode(user_input, return_tensors="pt")
    output = model.generate(inputs, max_length=150, num_return_sequences=1)
    
    # Decode the generated response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Create a Gradio interface for chatting
iface = gr.Interface(
    fn=chat_with_model,
    inputs=gr.Textbox(label="You:"),
    outputs=gr.Textbox(label="Bot:"),
    title="Llama Chatbot",
    description="Chat with Llama-3.3-70B-Instruct model."
)

# Launch the app
iface.launch()