File size: 3,497 Bytes
80ac042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d53e6bc
80ac042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de552b3
80ac042
 
de552b3
80ac042
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
---
library_name: diffusers
license: other
license_name: flux-1-dev-non-commercial-license
license_link: LICENSE.md
---

> [!NOTE]
> Contains the NF4 checkpoints (`transformer` and `text_encoder_2`) of [`black-forest-labs/FLUX.1-Depth-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev). Please adhere to the original model licensing!

<details>
  <summary>Code</summary>

```py
# !pip install git+https://github.com/asomoza/image_gen_aux.git
from diffusers import DiffusionPipeline, FluxControlPipeline, FluxTransformer2DModel
import torch
from transformers import T5EncoderModel
from image_gen_aux import DepthPreprocessor
from diffusers.utils import load_image
import fire


def load_pipeline(four_bit=False):
    orig_pipeline = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
    if four_bit:
        print("Using four bit.")
        transformer = FluxTransformer2DModel.from_pretrained(
            "sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="transformer", torch_dtype=torch.bfloat16
        )
        text_encoder_2 = T5EncoderModel.from_pretrained(
            "sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="text_encoder_2", torch_dtype=torch.bfloat16
        )
        pipeline = FluxControlPipeline.from_pipe(
            orig_pipeline, transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16
        )
    else:
        transformer = FluxTransformer2DModel.from_pretrained(
            "black-forest-labs/FLUX.1-Depth-dev",
            subfolder="transformer",
            revision="refs/pr/1",
            torch_dtype=torch.bfloat16,
        )
        pipeline = FluxControlPipeline.from_pipe(orig_pipeline, transformer=transformer, torch_dtype=torch.bfloat16)

    pipeline.enable_model_cpu_offload()
    return pipeline

@torch.no_grad()
def get_depth(control_image):
    processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
    control_image = processor(control_image)[0].convert("RGB")
    return control_image

def load_conditions():
    prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
    control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
    control_image = get_depth(control_image)
    return prompt, control_image


def main(four_bit: bool = False):
    ckpt_id = "sayakpaul/FLUX.1-Depth-dev-nf4"
    pipe = load_pipeline(four_bit=four_bit)
    prompt, control_image = load_conditions()
    image = pipe(
        prompt=prompt,
        control_image=control_image,
        height=1024,
        width=1024,
        num_inference_steps=30,
        guidance_scale=10.0,
        max_sequence_length=512,
        generator=torch.Generator("cpu").manual_seed(0),
    ).images[0]
    filename = "output_" + ckpt_id.split("/")[-1].replace(".", "_")
    filename += "_4bit" if four_bit else ""
    image.save(f"{filename}.png")


if __name__ == "__main__":
    fire.Fire(main)
```

</details>

## Outputs

<table>
    <thead>
        <tr>
            <th>Original</th>
            <th>NF4</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>
                <img src="./assets/output_FLUX_1-Depth-dev.png" alt="Original">
            </td>
            <td>
                <img src="./assets/output_FLUX_1-Depth-dev_4bit.png" alt="NF4">
            </td>
        </tr>
    </tbody>
</table>