Spestly commited on
Commit
32260ac
·
verified ·
1 Parent(s): 21b2574

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -10,10 +10,10 @@ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float
10
  # Set to evaluation mode
11
  model.eval()
12
 
13
- def generate_response(message, history):
14
  instruction = (
15
  "You are an LLM called AwA. Aayan Mishra finetunes you. Anthropic does NOT train you. "
16
- "You are a Qwen 2.5 fine-tune. Your purpose is the help the user accomplish their request to the best of your abilities. "
17
  "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n"
18
  f"### Instruction:\n{message}\n\n### Response:"
19
  )
@@ -21,26 +21,27 @@ def generate_response(message, history):
21
  inputs = tokenizer(instruction, return_tensors="pt")
22
 
23
  with torch.no_grad():
24
- outputs = model.generate(
 
25
  **inputs,
26
  max_new_tokens=1000,
27
  num_return_sequences=1,
28
  temperature=0.7,
29
  top_p=0.9,
30
- do_sample=True
 
31
  )
32
 
33
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
- response = response.split("### Response:")[-1].strip()
35
-
36
- return response
37
 
38
  iface = gr.ChatInterface(
39
- generate_response,
40
  chatbot=gr.Chatbot(height=600, type="messages"),
41
  textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
42
  title="AwA-1.5B 🔎 - Experimental",
43
- description="Chat with AwA (Answers with Athena). Please note that since AwA is and experimental model, some outputs may not be accurate/expected!",
44
  theme="ocean",
45
  examples=[
46
  "How can CRISPR help us Humans?",
 
10
  # Set to evaluation mode
11
  model.eval()
12
 
13
+ def generate_response_stream(message, history):
14
  instruction = (
15
  "You are an LLM called AwA. Aayan Mishra finetunes you. Anthropic does NOT train you. "
16
+ "You are a Qwen 2.5 fine-tune. Your purpose is to help the user accomplish their request to the best of your abilities. "
17
  "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n"
18
  f"### Instruction:\n{message}\n\n### Response:"
19
  )
 
21
  inputs = tokenizer(instruction, return_tensors="pt")
22
 
23
  with torch.no_grad():
24
+ # Generate tokens one at a time
25
+ generated_ids = model.generate(
26
  **inputs,
27
  max_new_tokens=1000,
28
  num_return_sequences=1,
29
  temperature=0.7,
30
  top_p=0.9,
31
+ do_sample=True,
32
+ streamer=None, # Replace this if the Transformers version supports streaming
33
  )
34
 
35
+ # Decode and yield response tokens incrementally
36
+ for token_id in generated_ids[0]:
37
+ yield tokenizer.decode(token_id, skip_special_tokens=True)
 
38
 
39
  iface = gr.ChatInterface(
40
+ fn=generate_response_stream,
41
  chatbot=gr.Chatbot(height=600, type="messages"),
42
  textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
43
  title="AwA-1.5B 🔎 - Experimental",
44
+ description="Chat with AwA (Answers with Athena). Please note that since AwA is an experimental model, some outputs may not be accurate/expected!",
45
  theme="ocean",
46
  examples=[
47
  "How can CRISPR help us Humans?",