johnowhitaker commited on
Commit
b4e0431
·
1 Parent(s): 678d9e1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch, torchvision
3
+ import torch.nn.functional as F
4
+ import numpy as np
5
+ from PIL import Image, ImageColor
6
+ from diffusers import DDPMPipeline
7
+ from diffusers import DDIMScheduler
8
+
9
+ # Load the pretrained pipeline
10
+ pipeline_name = 'johnowhitaker/sd-class-wikiart-from-bedrooms'
11
+ image_pipe = DDPMPipeline.from_pretrained(pipeline_name).to(device)
12
+
13
+ # Set up the scheduler
14
+ scheduler = DDIMScheduler.from_pretrained(pipeline_name)
15
+ scheduler.set_timesteps(num_inference_steps=40)
16
+
17
+ def color_loss(images, target_color=(0.1, 0.9, 0.5)):
18
+ """Given a target color (R, G, B) return a loss for how far away on average
19
+ the images' pixels are from that color. Defaults to a light teal: (0.1, 0.9, 0.5) """
20
+ target = torch.tensor(target_color).to(images.device) * 2 - 1 # Map target color to (-1, 1)
21
+ target = target[None, :, None, None] # Get shape right to work with the images (b, c, h, w)
22
+ error = torch.abs(images - target).mean() # Mean absolute difference between the image pixels and the target color
23
+ return error
24
+
25
+
26
+ def generate(color, guidance_loss_scale):
27
+
28
+ # Target color as RGB
29
+ target_color = ImageColor.getcolor(color, "RGB")
30
+
31
+ # Initial random x - just one image but you could add a 'num_images' argument/input to give the user control
32
+ x = torch.randn(1, 3, 256, 256).to(device)
33
+
34
+ # Our custom sampling loop:
35
+ for i, t in tqdm(enumerate(scheduler.timesteps)):
36
+
37
+ # Prep the model input
38
+ model_input = scheduler.scale_model_input(x, t)
39
+
40
+ # predict the noise residual
41
+ with torch.no_grad():
42
+ noise_pred = image_pipe.unet(model_input, t)["sample"]
43
+
44
+ # Set requires grad on x (shortcut method - we're doing this AFTER the unet)
45
+ x = x.detach().requires_grad_()
46
+
47
+ # Get the predicted x0:
48
+ x0 = scheduler.step(noise_pred, t, x).pred_original_sample
49
+
50
+ # Calculate loss
51
+ loss = color_loss(x0, target_color) * guidance_loss_scale
52
+
53
+ # Get gradient
54
+ cond_grad = -torch.autograd.grad(loss, x)[0]
55
+
56
+ # Modify x based on this gradient
57
+ x = x.detach() + cond_grad
58
+
59
+ # Now step with scheduler
60
+ x = scheduler.step(noise_pred, t, x).prev_sample
61
+
62
+ # Return the final output as an image (or image grid if there are more than one images)
63
+ grid = torchvision.utils.make_grid(x, nrow=4)
64
+ im = grid.permute(1, 2, 0).cpu().clip(-1, 1)*0.5 + 0.5
65
+ return Image.fromarray(np.array(im*255).astype(np.uint8))
66
+
67
+
68
+ inputs = [
69
+ gr.ColorPicker(label="color", value='55FFAA'), # Add any inputs you need here
70
+ gr.Slider(label="guidance_scale", minimum=1, maximum=100, value=30)
71
+ ]
72
+ outputs = gr.Image(label="result")
73
+
74
+ demo = gr.Interface(
75
+ fn=generate,
76
+ inputs=inputs,
77
+ outputs=outputs,
78
+ examples=[
79
+ ["#BB2266"], # You can provide some example inputs to get people started
80
+ ],
81
+ )
82
+
83
+ if __name__ == "__main__":
84
+ demo.launch()