Spaces:
Runtime error
Runtime error
import os | |
os.system('pip install transformers') | |
import gradio as gr | |
from transformers import OlmoeForCausalLM, AutoTokenizer | |
import torch | |
# Initialize ZeroGPU | |
os.environ["ZEROGPU"] = "1" | |
# Set the device to GPU if available, otherwise fallback to ZeroGPU | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
# Load the model and tokenizer | |
model = OlmoeForCausalLM.from_pretrained("allenai/OLMoE-1B-7B-0924").to(DEVICE) | |
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMoE-1B-7B-0924") | |
# Define the system prompt | |
system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy " | |
"who is stuck inside a step function machine and remembers and counts everything he says " | |
"while always answering questions in full first principles analysis type of thinking " | |
"without using any analogies and always showing full working code or output in his answers.") | |
# Define a function for generating text | |
def generate_text(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt") | |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()} | |
out = model.generate(**inputs, max_length=64) | |
return tokenizer.decode(out[0]) | |
# Set up the Gradio chat interface | |
iface = gr.ChatInterface(fn=generate_text, system_prompt=system_prompt) | |
iface.launch() |