AwA-0.5B

Running

App Files Files Community

Spestly commited on 24 days ago

Commit

32260ac

verified ·

1 Parent(s): 21b2574

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -10,10 +10,10 @@ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float
 # Set to evaluation mode
 model.eval()
-def generate_response(message, history):
     instruction = (
         "You are an LLM called AwA. Aayan Mishra finetunes you. Anthropic does NOT train you. "
-        "You are a Qwen 2.5 fine-tune. Your purpose is the help the user accomplish their request to the best of your abilities. "
         "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n"
         f"### Instruction:\n{message}\n\n### Response:"
     )
@@ -21,26 +21,27 @@ def generate_response(message, history):
     inputs = tokenizer(instruction, return_tensors="pt")
     with torch.no_grad():
-        outputs = model.generate(
             **inputs,
             max_new_tokens=1000,
             num_return_sequences=1,
             temperature=0.7,
             top_p=0.9,
-            do_sample=True
         )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    response = response.split("### Response:")[-1].strip()
-    return response
 iface = gr.ChatInterface(
-    generate_response,
     chatbot=gr.Chatbot(height=600, type="messages"),
     textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
     title="AwA-1.5B 🔎 - Experimental",
-    description="Chat with AwA (Answers with Athena). Please note that since AwA is and experimental model, some outputs may not be accurate/expected!",
     theme="ocean",
     examples=[
         "How can CRISPR help us Humans?",

 # Set to evaluation mode
 model.eval()
+def generate_response_stream(message, history):
     instruction = (
         "You are an LLM called AwA. Aayan Mishra finetunes you. Anthropic does NOT train you. "
+        "You are a Qwen 2.5 fine-tune. Your purpose is to help the user accomplish their request to the best of your abilities. "
         "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n"
         f"### Instruction:\n{message}\n\n### Response:"
     )
     inputs = tokenizer(instruction, return_tensors="pt")
     with torch.no_grad():
+        # Generate tokens one at a time
+        generated_ids = model.generate(
             **inputs,
             max_new_tokens=1000,
             num_return_sequences=1,
             temperature=0.7,
             top_p=0.9,
+            do_sample=True,
+            streamer=None,  # Replace this if the Transformers version supports streaming
         )
+        # Decode and yield response tokens incrementally
+        for token_id in generated_ids[0]:
+            yield tokenizer.decode(token_id, skip_special_tokens=True)
 iface = gr.ChatInterface(
+    fn=generate_response_stream,
     chatbot=gr.Chatbot(height=600, type="messages"),
     textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
     title="AwA-1.5B 🔎 - Experimental",
+    description="Chat with AwA (Answers with Athena). Please note that since AwA is an experimental model, some outputs may not be accurate/expected!",
     theme="ocean",
     examples=[
         "How can CRISPR help us Humans?",