Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add --base-model-id
Browse files
app.py
CHANGED
@@ -14,9 +14,13 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
14 |
|
15 |
CONVERSION_SCRIPT = "convert_lora_to_gguf.py"
|
16 |
|
17 |
-
def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token: gr.OAuthToken | None):
|
18 |
if oauth_token is None or oauth_token.token is None:
|
19 |
raise gr.Error("You must be logged in to use GGUF-my-lora")
|
|
|
|
|
|
|
|
|
20 |
model_name = peft_model_id.split('/')[-1]
|
21 |
gguf_output_name = f"{model_name}-{q_method.lower()}.gguf"
|
22 |
|
@@ -62,7 +66,7 @@ def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token:
|
|
62 |
if not os.path.exists(adapter_config_dir):
|
63 |
raise Exception('adapter_config.json not found. Please ensure the selected repo is a PEFT LoRA model.<br/><br/>If you are converting a model (not a LoRA adapter), please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-repo" target="_blank" style="text-decoration:underline">GGUF-my-repo</a> instead.')
|
64 |
|
65 |
-
|
66 |
"python",
|
67 |
f"llama.cpp/{CONVERSION_SCRIPT}",
|
68 |
local_dir,
|
@@ -70,7 +74,12 @@ def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token:
|
|
70 |
q_method.lower(),
|
71 |
"--outfile",
|
72 |
gguf_output_path,
|
73 |
-
]
|
|
|
|
|
|
|
|
|
|
|
74 |
print(result)
|
75 |
if result.returncode != 0:
|
76 |
raise Exception(f"Error converting to GGUF {q_method}: {result.stderr}")
|
@@ -148,6 +157,12 @@ with gr.Blocks(css=css) as demo:
|
|
148 |
search_type="model",
|
149 |
)
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
q_method = gr.Dropdown(
|
152 |
["F32", "F16", "Q8_0"],
|
153 |
label="Quantization Method",
|
@@ -167,6 +182,7 @@ with gr.Blocks(css=css) as demo:
|
|
167 |
fn=process_model,
|
168 |
inputs=[
|
169 |
peft_model_id,
|
|
|
170 |
q_method,
|
171 |
private_repo,
|
172 |
],
|
|
|
14 |
|
15 |
CONVERSION_SCRIPT = "convert_lora_to_gguf.py"
|
16 |
|
17 |
+
def process_model(peft_model_id: str, base_model_id: str, q_method: str, private_repo, oauth_token: gr.OAuthToken | None):
|
18 |
if oauth_token is None or oauth_token.token is None:
|
19 |
raise gr.Error("You must be logged in to use GGUF-my-lora")
|
20 |
+
|
21 |
+
# validate the oauth token
|
22 |
+
whoami(oauth_token.token)
|
23 |
+
|
24 |
model_name = peft_model_id.split('/')[-1]
|
25 |
gguf_output_name = f"{model_name}-{q_method.lower()}.gguf"
|
26 |
|
|
|
66 |
if not os.path.exists(adapter_config_dir):
|
67 |
raise Exception('adapter_config.json not found. Please ensure the selected repo is a PEFT LoRA model.<br/><br/>If you are converting a model (not a LoRA adapter), please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-repo" target="_blank" style="text-decoration:underline">GGUF-my-repo</a> instead.')
|
68 |
|
69 |
+
cmd = [
|
70 |
"python",
|
71 |
f"llama.cpp/{CONVERSION_SCRIPT}",
|
72 |
local_dir,
|
|
|
74 |
q_method.lower(),
|
75 |
"--outfile",
|
76 |
gguf_output_path,
|
77 |
+
]
|
78 |
+
if base_model_id:
|
79 |
+
cmd.extend(["--base-model-id", base_model_id])
|
80 |
+
print("cmd", cmd)
|
81 |
+
|
82 |
+
result = subprocess.run(cmd, shell=False, capture_output=True)
|
83 |
print(result)
|
84 |
if result.returncode != 0:
|
85 |
raise Exception(f"Error converting to GGUF {q_method}: {result.stderr}")
|
|
|
157 |
search_type="model",
|
158 |
)
|
159 |
|
160 |
+
base_model_id = HuggingfaceHubSearch(
|
161 |
+
label="Base model repository (Optional)",
|
162 |
+
placeholder="If empty, we will use the value from adapter_config.json",
|
163 |
+
search_type="model",
|
164 |
+
)
|
165 |
+
|
166 |
q_method = gr.Dropdown(
|
167 |
["F32", "F16", "Q8_0"],
|
168 |
label="Quantization Method",
|
|
|
182 |
fn=process_model,
|
183 |
inputs=[
|
184 |
peft_model_id,
|
185 |
+
base_model_id,
|
186 |
q_method,
|
187 |
private_repo,
|
188 |
],
|