Omarrran
/

lora_model

@@ -57,7 +57,7 @@ from unsloth import FastLanguageModel
 from unsloth.chat_templates import get_chat_template
 from peft import PeftModel
-model_name = "unsloth/Phi-4"
 max_seq_length = 2048
 load_in_4bit = True
@@ -109,21 +109,87 @@ To interact with the fine-tuned model using **Gradio**, use:
 ```python
 import gradio as gr
-def chat_with_model(user_input):
-    inputs = tokenizer(user_input, return_tensors="pt")
-    output = model.generate(**inputs, max_length=200)
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    return response
 demo = gr.Interface(
     fn=chat_with_model,
-    inputs=gr.Textbox(label="Your Message"),
     outputs=gr.Textbox(label="Chatbot's Response"),
-    title="LoRA-Enhanced Phi-4 Chatbot"
 )
-demo.launch()
 ```
 ## 📌 Conclusion

 from unsloth.chat_templates import get_chat_template
 from peft import PeftModel
+model_name = "Omarrran/lora_model"
 max_seq_length = 2048
 load_in_4bit = True
 ```python
 import gradio as gr
+import torch
+from unsloth import FastLanguageModel
+from unsloth.chat_templates import get_chat_template
+from peft import PeftModel
+# Load the Base Model with Unsloth
+model_name = "Omarrran/lora_model"  # Change this if needed
+max_seq_length = 2048
+load_in_4bit = True  # Use 4-bit quantization to save memory
+# Load model and tokenizer
+base_model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=model_name,
+    max_seq_length=max_seq_length,
+    load_in_4bit=load_in_4bit
+)
+# Apply LoRA Adapter
+model = FastLanguageModel.get_peft_model(
+    base_model,
+    r=16,
+    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
+                    "gate_proj", "up_proj", "down_proj"],
+    lora_alpha=16,
+    lora_dropout=0,
+    bias="none",
+    use_gradient_checkpointing="unsloth"
+)
+# Apply Chat Formatting Template
+tokenizer = get_chat_template(tokenizer, chat_template="phi-4")
+# Chat Function
+def chat_with_model(user_input):
+    try:
+        inputs = tokenizer(user_input, return_tensors="pt")
+        output = model.generate(**inputs, max_length=200)
+        response = tokenizer.decode(output[0], skip_special_tokens=True)
+        return response
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Define Gradio Interface
+description = """
+### 🧠 Phi-4 Conversational AI Chatbot
+This chatbot is powered by **Unsloth's Phi-4 model**, optimized with **LoRA fine-tuning**.
+#### 🔹 Features:
+✅ **Lightweight LoRA adapter for efficiency**
+✅ **Supports long-context conversations (2048 tokens)**
+✅ **Optimized with 4-bit quantization for fast inference**
+#### 🔹 Example Questions:
+- "What is the capital of France?"
+- "Tell me a joke!"
+- "Explain black holes in simple terms."
+"""
+examples = [
+    "Hello, how are you?",
+    "What is the capital of France?",
+    "Tell me a joke!",
+    "What is quantum physics?",
+    "Translate 'Hello' to French."
+]
+# Launch Gradio UI
 demo = gr.Interface(
     fn=chat_with_model,
+    inputs=gr.Textbox(label="Your Message", placeholder="Type something here..."),
     outputs=gr.Textbox(label="Chatbot's Response"),
+    title="🔹 HNM_Phi_4_finetuned",
+    description=description,
+    examples=examples,
+    allow_flagging="never"
 )
+if __name__ == "__main__":
+    demo.launch()
 ```
 ## 📌 Conclusion