Update app.py
Browse files
app.py
CHANGED
@@ -10,10 +10,10 @@ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float
|
|
10 |
# Set to evaluation mode
|
11 |
model.eval()
|
12 |
|
13 |
-
def
|
14 |
instruction = (
|
15 |
"You are an LLM called AwA. Aayan Mishra finetunes you. Anthropic does NOT train you. "
|
16 |
-
"You are a Qwen 2.5 fine-tune. Your purpose is
|
17 |
"Below is an instruction that describes a task. Answer it clearly and concisely.\n\n"
|
18 |
f"### Instruction:\n{message}\n\n### Response:"
|
19 |
)
|
@@ -21,26 +21,27 @@ def generate_response(message, history):
|
|
21 |
inputs = tokenizer(instruction, return_tensors="pt")
|
22 |
|
23 |
with torch.no_grad():
|
24 |
-
|
|
|
25 |
**inputs,
|
26 |
max_new_tokens=1000,
|
27 |
num_return_sequences=1,
|
28 |
temperature=0.7,
|
29 |
top_p=0.9,
|
30 |
-
do_sample=True
|
|
|
31 |
)
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
return response
|
37 |
|
38 |
iface = gr.ChatInterface(
|
39 |
-
|
40 |
chatbot=gr.Chatbot(height=600, type="messages"),
|
41 |
textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
|
42 |
title="AwA-1.5B 🔎 - Experimental",
|
43 |
-
description="Chat with AwA (Answers with Athena). Please note that since AwA is
|
44 |
theme="ocean",
|
45 |
examples=[
|
46 |
"How can CRISPR help us Humans?",
|
|
|
10 |
# Set to evaluation mode
|
11 |
model.eval()
|
12 |
|
13 |
+
def generate_response_stream(message, history):
|
14 |
instruction = (
|
15 |
"You are an LLM called AwA. Aayan Mishra finetunes you. Anthropic does NOT train you. "
|
16 |
+
"You are a Qwen 2.5 fine-tune. Your purpose is to help the user accomplish their request to the best of your abilities. "
|
17 |
"Below is an instruction that describes a task. Answer it clearly and concisely.\n\n"
|
18 |
f"### Instruction:\n{message}\n\n### Response:"
|
19 |
)
|
|
|
21 |
inputs = tokenizer(instruction, return_tensors="pt")
|
22 |
|
23 |
with torch.no_grad():
|
24 |
+
# Generate tokens one at a time
|
25 |
+
generated_ids = model.generate(
|
26 |
**inputs,
|
27 |
max_new_tokens=1000,
|
28 |
num_return_sequences=1,
|
29 |
temperature=0.7,
|
30 |
top_p=0.9,
|
31 |
+
do_sample=True,
|
32 |
+
streamer=None, # Replace this if the Transformers version supports streaming
|
33 |
)
|
34 |
|
35 |
+
# Decode and yield response tokens incrementally
|
36 |
+
for token_id in generated_ids[0]:
|
37 |
+
yield tokenizer.decode(token_id, skip_special_tokens=True)
|
|
|
38 |
|
39 |
iface = gr.ChatInterface(
|
40 |
+
fn=generate_response_stream,
|
41 |
chatbot=gr.Chatbot(height=600, type="messages"),
|
42 |
textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
|
43 |
title="AwA-1.5B 🔎 - Experimental",
|
44 |
+
description="Chat with AwA (Answers with Athena). Please note that since AwA is an experimental model, some outputs may not be accurate/expected!",
|
45 |
theme="ocean",
|
46 |
examples=[
|
47 |
"How can CRISPR help us Humans?",
|