How to load a Checkpoint Merge from CivitAI as a Huggingface Model ?
Sorry for the noob question. I have been researching this for hours now but unable to make much progress.
I am trying to load this checkpoint merge on CivitAI as a Huggingface Model: https://civitai.com/models/989221/illustration-juaner-ghibli-style-2d-illustration-model-flux
Then load this Huggingface Model in a Space and generate an image.
Can you please give me some pointers on how to do this?
I tried the following steps but received errors:
- installed the diffusers library from Github to my mac
- downloaded the model's safetensors file from civitai
- Tried to convert it to diffusers format but received these error:
Command: python scripts/convert_flux_to_diffusers.py --checkpoint_path "
Command: scripts/convert_original_stable_diffusion_to_diffusers.py --checkpoint_path "
- I also tried using the StableDiffusionXLPipeline.from_single_file function as per code below but then for this error:
Code:
import spaces
import os
import gradio as gr
import torch
from diffusers import StableDiffusionXLPipeline, AutoencoderKL, DDIMScheduler
# Access token for HF if needed
HF_TOKEN = os.getenv("HF_TOKEN")
print(HF_TOKEN)
# --------------------------------------------------------------------
# 1) MODEL + PIPELINE SETUP
# --------------------------------------------------------------------
base_model = "https://huggingface.co/soulfulmachine/Ghibli/blob/main/IllustrationJuanerGhibli_v20.safetensors"
# vae_model_path = "./models/sdxl_vae.safetensors"
device = "cuda" # or "cpu" if you don't have a GPU
noise_scheduler = DDIMScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
steps_offset=1,
)
# Load VAE from a single .safetensors file
# vae = AutoencoderKL.from_single_file(vae_model_path).to(dtype=torch.float16)
# Load the main SDXL model (UNet, text encoders, etc.) from a single .safetensors file
pipe = StableDiffusionXLPipeline.from_single_file(
base_model,
torch_dtype=torch.float16,
scheduler=noise_scheduler,
# vae=vae,
use_auth_token=HF_TOKEN,
use_safetensors=True,
add_watermarker=False,
load_safety_checker=False
).to(device)
# --------------------------------------------------------------------
# 2) INFERENCE FUNCTION
# --------------------------------------------------------------------
@spaces.GPU
def generate_image(prompt, negative_prompt, steps, scale, width, height):
"""
Generates images using the loaded SDXL pipeline.
Returns a list of 2 images by default.
"""
images = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_images_per_prompt=2,
guidance_scale=scale,
width=width,
height=height,
num_inference_steps=steps,
).images
return images # Gradio can display multiple images if returned as a list
# --------------------------------------------------------------------
# 3) BUILD GRADIO INTERFACE
# --------------------------------------------------------------------
<Gradio code is truncated>
Error:
IllustrationJuanerGhibli_v20.safetensors: 0%| | 0.00/11.9G [00:00<?, ?B/s]
IllustrationJuanerGhibli_v20.safetensors: 2%|β | 241M/11.9G [00:01<00:49, 233MB/s]
IllustrationJuanerGhibli_v20.safetensors: 10%|β | 1.23G/11.9G [00:02<00:15, 670MB/s]
IllustrationJuanerGhibli_v20.safetensors: 24%|βββ | 2.85G/11.9G [00:03<00:08, 1.10GB/s]
IllustrationJuanerGhibli_v20.safetensors: 38%|ββββ | 4.49G/11.9G [00:04<00:05, 1.31GB/s]
IllustrationJuanerGhibli_v20.safetensors: 52%|ββββββ | 6.21G/11.9G [00:05<00:03, 1.46GB/s]
IllustrationJuanerGhibli_v20.safetensors: 65%|βββββββ | 7.73G/11.9G [00:06<00:02, 1.48GB/s]
IllustrationJuanerGhibli_v20.safetensors: 79%|ββββββββ | 9.37G/11.9G [00:07<00:01, 1.53GB/s]
IllustrationJuanerGhibli_v20.safetensors: 93%|ββββββββββ| 11.0G/11.9G [00:08<00:00, 1.56GB/s]
IllustrationJuanerGhibli_v20.safetensors: 100%|ββββββββββ| 11.9G/11.9G [00:08<00:00, 1.39GB/s]
model_index.json: 0%| | 0.00/536 [00:00<?, ?B/s]
model_index.json: 100%|ββββββββββ| 536/536 [00:00<00:00, 3.34MB/s]
scheduler/scheduler_config.json: 0%| | 0.00/273 [00:00<?, ?B/s]
scheduler/scheduler_config.json: 100%|ββββββββββ| 273/273 [00:00<00:00, 1.78MB/s]
text_encoder/config.json: 0%| | 0.00/613 [00:00<?, ?B/s]
text_encoder/config.json: 100%|ββββββββββ| 613/613 [00:00<00:00, 1.97MB/s]
text_encoder_2/config.json: 0%| | 0.00/782 [00:00<?, ?B/s]
text_encoder_2/config.json: 100%|ββββββββββ| 782/782 [00:00<00:00, 5.12MB/s]
(β¦)t_encoder_2/model.safetensors.index.json: 0%| | 0.00/19.9k [00:00<?, ?B/s]
(β¦)t_encoder_2/model.safetensors.index.json: 100%|ββββββββββ| 19.9k/19.9k [00:00<00:00, 77.5MB/s]
tokenizer/merges.txt: 0%| | 0.00/525k [00:00<?, ?B/s]
tokenizer/merges.txt: 100%|ββββββββββ| 525k/525k [00:00<00:00, 44.5MB/s]
tokenizer/special_tokens_map.json: 0%| | 0.00/588 [00:00<?, ?B/s]
tokenizer/special_tokens_map.json: 100%|ββββββββββ| 588/588 [00:00<00:00, 2.30MB/s]
tokenizer/tokenizer_config.json: 0%| | 0.00/705 [00:00<?, ?B/s]
tokenizer/tokenizer_config.json: 100%|ββββββββββ| 705/705 [00:00<00:00, 2.79MB/s]
tokenizer/vocab.json: 0%| | 0.00/1.06M [00:00<?, ?B/s]
tokenizer/vocab.json: 100%|ββββββββββ| 1.06M/1.06M [00:00<00:00, 23.1MB/s]
tokenizer_2/special_tokens_map.json: 0%| | 0.00/2.54k [00:00<?, ?B/s]
tokenizer_2/special_tokens_map.json: 100%|ββββββββββ| 2.54k/2.54k [00:00<00:00, 15.9MB/s]
spiece.model: 0%| | 0.00/792k [00:00<?, ?B/s]
spiece.model: 100%|ββββββββββ| 792k/792k [00:00<00:00, 186MB/s]
tokenizer_2/tokenizer.json: 0%| | 0.00/2.42M [00:00<?, ?B/s]
tokenizer_2/tokenizer.json: 100%|ββββββββββ| 2.42M/2.42M [00:00<00:00, 29.5MB/s]
tokenizer_2/tokenizer_config.json: 0%| | 0.00/20.8k [00:00<?, ?B/s]
tokenizer_2/tokenizer_config.json: 100%|ββββββββββ| 20.8k/20.8k [00:00<00:00, 66.7MB/s]
transformer/config.json: 0%| | 0.00/378 [00:00<?, ?B/s]
transformer/config.json: 100%|ββββββββββ| 378/378 [00:00<00:00, 1.94MB/s]
(β¦)ion_pytorch_model.safetensors.index.json: 0%| | 0.00/121k [00:00<?, ?B/s]
(β¦)ion_pytorch_model.safetensors.index.json: 100%|ββββββββββ| 121k/121k [00:00<00:00, 90.0MB/s]
vae/config.json: 0%| | 0.00/820 [00:00<?, ?B/s]
vae/config.json: 100%|ββββββββββ| 820/820 [00:00<00:00, 4.90MB/s]
Loading pipeline components...: 0%| | 0/6 [00:00<?, ?it/s]
Loading pipeline components...: 17%|ββ | 1/6 [00:00<00:00, 3211.57it/s]
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/diffusers/loaders/single_file.py", line 495, in from_single_file
loaded_sub_model = load_single_file_sub_model(
File "/usr/local/lib/python3.10/site-packages/diffusers/loaders/single_file.py", line 168, in load_single_file_sub_model
raise SingleFileComponentError(
diffusers.loaders.single_file_utils.SingleFileComponentError: Failed to load CLIPTextModel. Weights for this component appear to be missing in the checkpoint.
The model is FLUX, not SDXL, and it only has a transformer, so the loading method is special. You should be able to load it as follows.
import spaces
import os
import gradio as gr
import torch
from diffusers import FluxPipeline, FluxTransformer2DModel
# Access token for HF if needed
HF_TOKEN = os.getenv("HF_TOKEN")
print(HF_TOKEN)
# --------------------------------------------------------------------
# 1) MODEL + PIPELINE SETUP
# --------------------------------------------------------------------
base_model = "https://huggingface.co/soulfulmachine/Ghibli/blob/main/IllustrationJuanerGhibli_v20.safetensors"
diffusers_base_model = "camenduru/FLUX.1-dev-diffusers"
device = "cuda" # or "cpu" if you don't have a GPU
# Load the FLUX transformer (DiT) from a single .safetensors file
base_model = base_model.replace("/resolve/main/", "/blob/main/").replace("?download=true", "")
transformer = FluxTransformer2DModel.from_single_file(base_model, subfolder="transformer", torch_dtype=torch.bfloat16, config=diffusers_base_model, token=HF_TOKEN)
# Load the FLUX model with already loaded transformer
pipe = FluxPipeline.from_single_file(
diffusers_base_model,
transformer=transformer,
torch_dtype=torch.bfloat16,
token=HF_TOKEN,
).to(device)
# --------------------------------------------------------------------
# 2) INFERENCE FUNCTION
# --------------------------------------------------------------------
@spaces.GPU
def generate_image(prompt, negative_prompt, steps, scale, width, height):
"""
Generates images using the loaded FLUX pipeline.
Returns a list of 2 images by default.
"""
images = pipe(
prompt=prompt,
num_images_per_prompt=2,
guidance_scale=scale,
width=width,
height=height,
num_inference_steps=steps,
).images
return images # Gradio can display multiple images if returned as a list
# --------------------------------------------------------------------
# 3) BUILD GRADIO INTERFACE
# --------------------------------------------------------------------
<Gradio code is truncated>
Thank you for the response! Really appreciate all your work porting models!!
I tried your code above but getting a different error now. I tried various approaches but could not get it to work. Could you please check only when you have some time?
runtime error
Exit code: 1. Reason: 1:15, 156MB/s][A
IllustrationJuanerGhibli_v20.safetensors: 11%|β | 1.27G/11.9G [00:02<00:14, 714MB/s][A
IllustrationJuanerGhibli_v20.safetensors: 23%|βββ | 2.79G/11.9G [00:03<00:08, 1.08GB/s][A
IllustrationJuanerGhibli_v20.safetensors: 35%|ββββ | 4.20G/11.9G [00:04<00:06, 1.21GB/s][A
IllustrationJuanerGhibli_v20.safetensors: 47%|βββββ | 5.60G/11.9G [00:05<00:04, 1.27GB/s][A
IllustrationJuanerGhibli_v20.safetensors: 60%|ββββββ | 7.14G/11.9G [00:06<00:03, 1.36GB/s][A
IllustrationJuanerGhibli_v20.safetensors: 74%|ββββββββ | 8.85G/11.9G [00:07<00:02, 1.45GB/s][A
IllustrationJuanerGhibli_v20.safetensors: 88%|βββββββββ | 10.5G/11.9G [00:08<00:00, 1.51GB/s][A
IllustrationJuanerGhibli_v20.safetensors: 100%|ββββββββββ| 11.9G/11.9G [00:08<00:00, 1.34GB/s]
transformer/config.json: 0%| | 0.00/378 [00:00<?, ?B/s][A
transformer/config.json: 100%|ββββββββββ| 378/378 [00:00<00:00, 2.86MB/s]
Traceback (most recent call last):
File "/home/user/app/app.py", line 23, in
pipe = FluxPipeline.from_single_file(
File "/usr/local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/diffusers/loaders/single_file.py", line 378, in from_single_file
checkpoint = load_single_file_checkpoint(
File "/usr/local/lib/python3.10/site-packages/diffusers/loaders/single_file_utils.py", line 386, in load_single_file_checkpoint
repo_id, weights_name = _extract_repo_id_and_weights_name(pretrained_model_link_or_path)
File "/usr/local/lib/python3.10/site-packages/diffusers/loaders/single_file_utils.py", line 340, in _extract_repo_id_and_weights_name
raise ValueError("Invalid pretrained_model_name_or_path
provided. Please set it to a valid URL.")
ValueError: Invalid pretrained_model_name_or_path
provided. Please set it to a valid URL.
My full updated code:
import spaces
import os
import gradio as gr
import torch
from diffusers import FluxPipeline, FluxTransformer2DModel
# Access token for HF if needed
HF_TOKEN = os.getenv("HF_TOKEN")
# --------------------------------------------------------------------
# 1) MODEL + PIPELINE SETUP
# --------------------------------------------------------------------
base_model = "https://huggingface.co/soulfulmachine/Ghibli/blob/main/IllustrationJuanerGhibli_v20.safetensors"
diffusers_base_model = "camenduru/FLUX.1-dev-diffusers"
device = "cuda" # or "cpu" if you don't have a GPU
# Load the FLUX transformer (DiT) from a single .safetensors file
base_model = base_model.replace("/resolve/main/", "/blob/main/").replace("?download=true", "")
transformer = FluxTransformer2DModel.from_single_file(base_model, subfolder="transformer", torch_dtype=torch.bfloat16, config=diffusers_base_model, token=HF_TOKEN)
# Load the FLUX model with already loaded transformer
pipe = FluxPipeline.from_single_file(
diffusers_base_model,
transformer=transformer,
torch_dtype=torch.bfloat16,
token=HF_TOKEN,
).to(device)
# --------------------------------------------------------------------
# 2) INFERENCE FUNCTION
# --------------------------------------------------------------------
@spaces.GPU
def generate_image(prompt, negative_prompt, steps, scale, width, height):
"""
Generates images using the loaded FLUX pipeline.
Returns a list of 2 images by default.
"""
images = pipe(
prompt=prompt,
num_images_per_prompt=2,
guidance_scale=scale,
width=width,
height=height,
num_inference_steps=steps,
).images
return images # Gradio can display multiple images if returned as a list
# --------------------------------------------------------------------
# 3) BUILD GRADIO INTERFACE
# --------------------------------------------------------------------
def build_demo():
with gr.Blocks() as demo:
gr.Markdown("## Gradio Demo")
gr.Markdown(
"Enter your **prompt**, optional **negative prompt**, and adjust the generation parameters below."
)
with gr.Row():
prompt = gr.Textbox(
label="Prompt",
placeholder="e.g. A futuristic Asian utopian city, cinematic lighting..."
)
negative_prompt = gr.Textbox(
label="Negative Prompt",
placeholder="e.g. lowres, bad anatomy, blurred..."
)
with gr.Row():
steps = gr.Slider(
minimum=1,
maximum=100,
value=30,
step=1,
label="Number of Inference Steps"
)
scale = gr.Slider(
minimum=1.0,
maximum=20.0,
value=7.5,
step=0.1,
label="Guidance Scale (CFG)"
)
with gr.Row():
width = gr.Slider(
minimum=64,
maximum=2048,
value=1024,
step=64,
label="Width"
)
height = gr.Slider(
minimum=64,
maximum=2048,
value=1024,
step=64,
label="Height"
)
generate_button = gr.Button("Generate Image")
gallery = gr.Gallery(
label="Generated Images",
show_label=False
).style(grid=[2], height="auto")
# Wire up the button
generate_button.click(
fn=generate_image,
inputs=[prompt, negative_prompt, steps, scale, width, height],
outputs=gallery
)
return demo
# --------------------------------------------------------------------
# 4) LAUNCH THE DEMO
# --------------------------------------------------------------------
if __name__ == "__main__":
demo_app = build_demo()
demo_app.launch(server_name="0.0.0.0", server_port=7860)
Sorry, I made a mistake!π
# Load the FLUX model with already loaded transformer
#pipe = FluxPipeline.from_single_file(
pipe = FluxPipeline.from_pretrained(
Thank you. I updated as you suggested. I also had to add the following to my requirements.txt file:
accelerate
sentencepiece
I was then able to generate images with the Flux checkpoint file. Thank you so much !!!
Here is the final code:
import spaces
import os
import gradio as gr
import torch
from diffusers import FluxPipeline, FluxTransformer2DModel
# Access token for HF if needed
HF_TOKEN = os.getenv("HF_TOKEN")
# --------------------------------------------------------------------
# 1) MODEL + PIPELINE SETUP
# --------------------------------------------------------------------
base_model = "https://huggingface.co/soulfulmachine/Ghibli/blob/main/IllustrationJuanerGhibli_v20.safetensors"
diffusers_base_model = "camenduru/FLUX.1-dev-diffusers"
device = "cuda" # or "cpu" if you don't have a GPU
# Load the FLUX transformer (DiT) from a single .safetensors file
base_model = base_model.replace("/resolve/main/", "/blob/main/").replace("?download=true", "")
transformer = FluxTransformer2DModel.from_single_file(base_model, subfolder="transformer", torch_dtype=torch.bfloat16, config=diffusers_base_model, token=HF_TOKEN)
# Load the FLUX model with already loaded transformer
pipe = FluxPipeline.from_pretrained(
diffusers_base_model,
transformer=transformer,
torch_dtype=torch.bfloat16,
token=HF_TOKEN,
).to(device)
# --------------------------------------------------------------------
# 2) INFERENCE FUNCTION
# --------------------------------------------------------------------
@spaces.GPU
def generate_image(prompt, negative_prompt, steps, scale, width, height):
"""
Generates images using the loaded FLUX pipeline.
Returns a list of 2 images by default.
"""
images = pipe(
prompt=prompt,
num_images_per_prompt=2,
guidance_scale=scale,
width=width,
height=height,
num_inference_steps=steps,
).images
return images # Gradio can display multiple images if returned as a list
# --------------------------------------------------------------------
# 3) BUILD GRADIO INTERFACE
# --------------------------------------------------------------------
def build_demo():
with gr.Blocks() as demo:
gr.Markdown("## Gradio Demo")
gr.Markdown(
"Enter your **prompt**, optional **negative prompt**, and adjust the generation parameters below."
)
with gr.Row():
prompt = gr.Textbox(
label="Prompt",
placeholder="e.g. A futuristic Asian utopian city, cinematic lighting..."
)
negative_prompt = gr.Textbox(
label="Negative Prompt",
placeholder="e.g. lowres, bad anatomy, blurred..."
)
with gr.Row():
steps = gr.Slider(
minimum=1,
maximum=100,
value=30,
step=1,
label="Number of Inference Steps"
)
scale = gr.Slider(
minimum=1.0,
maximum=20.0,
value=7.5,
step=0.1,
label="Guidance Scale (CFG)"
)
with gr.Row():
width = gr.Slider(
minimum=64,
maximum=2048,
value=1024,
step=64,
label="Width"
)
height = gr.Slider(
minimum=64,
maximum=2048,
value=1024,
step=64,
label="Height"
)
generate_button = gr.Button("Generate Image")
gallery = gr.Gallery(label="Generated Images", show_label=False)
# Wire up the button
generate_button.click(
fn=generate_image,
inputs=[prompt, negative_prompt, steps, scale, width, height],
outputs=gallery
)
return demo
# --------------------------------------------------------------------
# 4) LAUNCH THE DEMO
# --------------------------------------------------------------------
if __name__ == "__main__":
demo_app = build_demo()
demo_app.launch(server_name="0.0.0.0", server_port=7860)