|
import gradio as gr |
|
import torch |
|
import spaces |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
model_name = "CreitinGameplays/bloom-3b-conversational" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
|
|
@spaces.GPU(duration=70) |
|
def generate_text(user_prompt): |
|
"""Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt.""" |
|
|
|
prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>" |
|
|
|
|
|
prompt_encoded = tokenizer.encode(prompt, return_tensors="pt").to(device) |
|
|
|
|
|
output = model.generate( |
|
input_ids=prompt_encoded, |
|
max_length=1900, |
|
num_beams=1, |
|
num_return_sequences=1, |
|
do_sample=True, |
|
top_k=0, |
|
top_p=1.0, |
|
temperature=0.6, |
|
repetition_penalty=1.1 |
|
) |
|
|
|
|
|
generated_text = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
|
|
|
assistant_response = generated_text.split("<|assistant|>")[-1] |
|
assistant_response = assistant_response.replace(f"{user_prompt}", "").strip() |
|
assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip() |
|
|
|
return assistant_response |
|
|
|
|
|
interface = gr.Interface( |
|
fn=generate_text, |
|
inputs=[ |
|
gr.Textbox(label="Text Prompt", value="What's an AI?"), |
|
], |
|
outputs="text", |
|
description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)", |
|
) |
|
|
|
|
|
|
|
interface.launch() |