File size: 2,284 Bytes
10bed7b
 
88b191b
10bed7b
 
 
 
 
 
 
c7d63a8
22ba2c4
10bed7b
22ba2c4
10bed7b
 
079c194
c7d63a8
 
10bed7b
 
 
 
 
 
 
c7d63a8
22ba2c4
c7d63a8
 
 
 
 
 
 
10bed7b
c7d63a8
 
10bed7b
 
c7d63a8
10bed7b
 
 
22ba2c4
 
 
 
 
2ca395b
22ba2c4
 
 
 
 
10bed7b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
from transformers import GPT2Tokenizer
from thirdai import bolt, licensing

licensing.activate("7511CC-0E24D7-69439D-5D6CBA-33AAFD-V3")

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = bolt.GenerativeModel.load("./generative.model")


def generate(prompt, beam_width, temperature):
    prompt = tokenizer.encode(prompt.strip())

    stream = model.streaming_generate(
        input_tokens=prompt,
        prediction_chunk_size=2,
        max_predictions=80,
        beam_width=beam_width,
        temperature=temperature if temperature > 0 else None,
    )

    for res in stream:
        yield tokenizer.decode(res)


with gr.Blocks() as demo:
    prompt = gr.Textbox(label="Prompt", autofocus=True)
    output = gr.TextArea(label="Output", lines=5)
    beam_width = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Beam Width")
    temperature = gr.Slider(
        minimum=0,
        maximum=3,
        step=0.1,
        value=1.2,
        label="Temperature (0 means temperature isn't used)",
    )

    prompt.submit(generate, inputs=[prompt, beam_width, temperature], outputs=[output])

    btn = gr.Button(value="Generate")
    btn.click(generate, inputs=[prompt, beam_width, temperature], outputs=[output])

    gr.ClearButton(components=[prompt, output])

    gr.Markdown(
        value="""
# BOLT2.5B
BOLT2.5B is meticulously trained on CPUs, employing dynamic sparse technology, which lies at the core of our groundbreaking BOLT engine. A decade of dedicated research has culminated in BOLT, ensuring unparalleled efficiency for neural networks. The dynamic sparsity feature empowers us to selectively activate neural pathways, enabling optimal training even on CPU resources.

This release have 2.5 billion parameter model, along with both inference and training scripts tailored for distributed as well as single machine training scenarios. For more information check out our blog [here](https://medium.com/thirdai-blog/introducing-the-worlds-first-generative-llm-pre-trained-only-on-cpus-meet-thirdai-s-bolt2-5b-10c0600e1af4).

Note: This model is only trained on next word prediction, no instruct fine tuning is done. No instruction data is used in training.
"""
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch()