icechat / app.py
Sigurdur's picture
Update app.py
911ec5f verified
raw
history blame
919 Bytes
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
model = AutoModelForCausalLM.from_pretrained("Sigurdur/icechat")
tokenizer = AutoTokenizer.from_pretrained("Sigurdur/icechat")
def streaming_respond(question, history):
input_ids = tokenizer.encode(f"### Question:\n{question}\n\n### Answer:\n", return_tensors="pt")
streamer = TextIteratorStreamer(
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = dict(
{"input_ids": input_ids},
streamer=streamer,
max_new_tokens=100,
temperature=0.7,
num_beams=1,
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
outputs = []
for text in streamer:
outputs.append(text)
yield "".join(outputs)
gr.ChatInterface(streaming_respond).launch()