Omarrran commited on
Commit
5cb3d63
·
verified ·
1 Parent(s): 1bf92d5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +75 -9
README.md CHANGED
@@ -57,7 +57,7 @@ from unsloth import FastLanguageModel
57
  from unsloth.chat_templates import get_chat_template
58
  from peft import PeftModel
59
 
60
- model_name = "unsloth/Phi-4"
61
  max_seq_length = 2048
62
  load_in_4bit = True
63
 
@@ -109,21 +109,87 @@ To interact with the fine-tuned model using **Gradio**, use:
109
 
110
  ```python
111
  import gradio as gr
 
 
 
 
112
 
113
- def chat_with_model(user_input):
114
- inputs = tokenizer(user_input, return_tensors="pt")
115
- output = model.generate(**inputs, max_length=200)
116
- response = tokenizer.decode(output[0], skip_special_tokens=True)
117
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  demo = gr.Interface(
120
  fn=chat_with_model,
121
- inputs=gr.Textbox(label="Your Message"),
122
  outputs=gr.Textbox(label="Chatbot's Response"),
123
- title="LoRA-Enhanced Phi-4 Chatbot"
 
 
 
124
  )
125
 
126
- demo.launch()
 
 
 
127
  ```
128
 
129
  ## 📌 Conclusion
 
57
  from unsloth.chat_templates import get_chat_template
58
  from peft import PeftModel
59
 
60
+ model_name = "Omarrran/lora_model"
61
  max_seq_length = 2048
62
  load_in_4bit = True
63
 
 
109
 
110
  ```python
111
  import gradio as gr
112
+ import torch
113
+ from unsloth import FastLanguageModel
114
+ from unsloth.chat_templates import get_chat_template
115
+ from peft import PeftModel
116
 
117
+ # Load the Base Model with Unsloth
118
+ model_name = "Omarrran/lora_model" # Change this if needed
119
+ max_seq_length = 2048
120
+ load_in_4bit = True # Use 4-bit quantization to save memory
121
+
122
+ # Load model and tokenizer
123
+ base_model, tokenizer = FastLanguageModel.from_pretrained(
124
+ model_name=model_name,
125
+ max_seq_length=max_seq_length,
126
+ load_in_4bit=load_in_4bit
127
+ )
128
+
129
+ # Apply LoRA Adapter
130
+ model = FastLanguageModel.get_peft_model(
131
+ base_model,
132
+ r=16,
133
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
134
+ "gate_proj", "up_proj", "down_proj"],
135
+ lora_alpha=16,
136
+ lora_dropout=0,
137
+ bias="none",
138
+ use_gradient_checkpointing="unsloth"
139
+ )
140
+
141
+ # Apply Chat Formatting Template
142
+ tokenizer = get_chat_template(tokenizer, chat_template="phi-4")
143
 
144
+ # Chat Function
145
+ def chat_with_model(user_input):
146
+ try:
147
+ inputs = tokenizer(user_input, return_tensors="pt")
148
+ output = model.generate(**inputs, max_length=200)
149
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
150
+ return response
151
+ except Exception as e:
152
+ return f"Error: {str(e)}"
153
+
154
+ # Define Gradio Interface
155
+ description = """
156
+ ### 🧠 Phi-4 Conversational AI Chatbot
157
+ This chatbot is powered by **Unsloth's Phi-4 model**, optimized with **LoRA fine-tuning**.
158
+
159
+ #### 🔹 Features:
160
+ ✅ **Lightweight LoRA adapter for efficiency**
161
+ ✅ **Supports long-context conversations (2048 tokens)**
162
+ ✅ **Optimized with 4-bit quantization for fast inference**
163
+
164
+ #### 🔹 Example Questions:
165
+ - "What is the capital of France?"
166
+ - "Tell me a joke!"
167
+ - "Explain black holes in simple terms."
168
+ """
169
+
170
+ examples = [
171
+ "Hello, how are you?",
172
+ "What is the capital of France?",
173
+ "Tell me a joke!",
174
+ "What is quantum physics?",
175
+ "Translate 'Hello' to French."
176
+ ]
177
+
178
+ # Launch Gradio UI
179
  demo = gr.Interface(
180
  fn=chat_with_model,
181
+ inputs=gr.Textbox(label="Your Message", placeholder="Type something here..."),
182
  outputs=gr.Textbox(label="Chatbot's Response"),
183
+ title="🔹 HNM_Phi_4_finetuned",
184
+ description=description,
185
+ examples=examples,
186
+ allow_flagging="never"
187
  )
188
 
189
+ if __name__ == "__main__":
190
+ demo.launch()
191
+
192
+
193
  ```
194
 
195
  ## 📌 Conclusion