import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load the model and tokenizer model_name = "models/meta-llama/Llama-3.3-70B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def chat_with_model(user_input): # Encode the user input and generate a response inputs = tokenizer.encode(user_input, return_tensors="pt") output = model.generate(inputs, max_length=150, num_return_sequences=1) # Decode the generated response response = tokenizer.decode(output[0], skip_special_tokens=True) return response # Create a Gradio interface for chatting iface = gr.Interface( fn=chat_with_model, inputs=gr.Textbox(label="You:"), outputs=gr.Textbox(label="Bot:"), title="Llama Chatbot", description="Chat with Llama-3.3-70B-Instruct model." ) # Launch the app iface.launch()