import gradio as gr from transformers import GPT2Tokenizer from thirdai import bolt, licensing licensing.activate("7511CC-0E24D7-69439D-5D6CBA-33AAFD-V3") tokenizer = GPT2Tokenizer.from_pretrained("gpt2") model = bolt.GenerativeModel.load("./generative.model") def generate(prompt, beam_width, temperature): prompt = tokenizer.encode(prompt) stream = model.streaming_generation( input_tokens=prompt, prediction_chunk_size=2, max_predictions=80, beam_width=beam_width, temperature=temperature if temperature > 0 else None, ) for res in stream: yield tokenizer.decode(res) with gr.Blocks() as demo: prompt = gr.Textbox(label="Prompt", autofocus=True) output = gr.TextArea(label="Output") beam_width = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Beam Width") temperature = gr.Slider( minimum=0, maximum=3, step=0.1, value=1.2, label="Temperature (0 means temperature isn't used)", ) prompt.submit(generate, inputs=[prompt, beam_width, temperature], outputs=[output]) btn = gr.Button(value="Generate") btn.click(generate, inputs=[prompt, beam_width, temperature], outputs=[output]) gr.ClearButton(components=[prompt, output]) if __name__ == "__main__": demo.queue() demo.launch()