adamelliotfields commited on
Commit
1128e78
·
verified ·
1 Parent(s): 17fa6fa

Performance improvements

Browse files
Files changed (1) hide show
  1. generate.py +71 -56
generate.py CHANGED
@@ -19,6 +19,17 @@ from diffusers import (
19
  )
20
  from diffusers.models import AutoencoderTiny
21
 
 
 
 
 
 
 
 
 
 
 
 
22
  # some models use the deprecated CLIPFeatureExtractor class
23
  # should use CLIPImageProcessor instead
24
  filterwarnings("ignore", category=FutureWarning, module="transformers")
@@ -32,18 +43,13 @@ class Loader:
32
  cls._instance = super(Loader, cls).__new__(cls)
33
  cls._instance.cpu = torch.device("cpu")
34
  cls._instance.gpu = torch.device("cuda")
35
- cls._instance.model_cpu = None
36
- cls._instance.model_gpu = None
37
  return cls._instance
38
 
39
  def load(self, model, scheduler, karras):
40
- SPACES_ZERO_GPU = (
41
- environ.get("SPACES_ZERO_GPU", "").lower() == "true"
42
- or environ.get("SPACES_ZERO_GPU", "") == "1"
43
- )
44
  model_lower = model.lower()
45
 
46
- scheduler_map = {
47
  "DEIS 2M": DEISMultistepScheduler,
48
  "DPM++ 2M": DPMSolverMultistepScheduler,
49
  "DPM2 a": KDPM2AncestralDiscreteScheduler,
@@ -59,63 +65,63 @@ class Loader:
59
  "beta_schedule": "scaled_linear",
60
  "timestep_spacing": "leading",
61
  "steps_offset": 1,
 
62
  }
63
 
64
- if self.model_gpu is not None:
65
- same_model = self.model_gpu.config._name_or_path.lower() == model_lower
66
- same_scheduler = isinstance(self.model_gpu.scheduler, scheduler_map[scheduler])
67
- same_karras = (
68
- not hasattr(self.model_gpu.scheduler.config, "use_karras_sigmas")
69
- or self.model_gpu.scheduler.config.use_karras_sigmas == karras
70
- )
71
- if same_model and same_scheduler and same_karras:
72
- return self.model_gpu
73
-
74
- if karras:
75
- scheduler_kwargs["use_karras_sigmas"] = True
76
-
77
  if scheduler == "PNDM" or scheduler == "Euler a":
78
  del scheduler_kwargs["use_karras_sigmas"]
79
 
80
- variant = (
81
- None
82
- if model_lower in ["sg161222/realistic_vision_v5.1_novae", "prompthero/openjourney-v4"]
83
- else "fp16"
84
- )
85
-
86
- pipeline_kwargs = {
87
  "pretrained_model_name_or_path": model_lower,
88
  "requires_safety_checker": False,
89
  "safety_checker": None,
90
- "scheduler": scheduler_map[scheduler](**scheduler_kwargs),
91
- "torch_dtype": torch.float16,
92
- "variant": variant,
93
  "use_safetensors": True,
94
- "vae": AutoencoderTiny.from_pretrained(
95
- "madebyollin/taesd",
96
- torch_dtype=torch.float16,
97
- use_safetensors=True,
98
- ),
99
  }
100
 
101
- scheduler_cls = scheduler_map[scheduler]
102
- pipeline_kwargs["scheduler"] = scheduler_cls(**scheduler_kwargs)
103
-
104
- # in ZeroGPU we always start fresh
105
- if SPACES_ZERO_GPU:
106
- self.model_gpu = None
107
- self.model_cpu = None
 
 
108
 
109
- if self.model_gpu is not None:
110
- model_gpu_name = self.model_gpu.config._name_or_path
111
- self.model_cpu = self.model_gpu.to(self.cpu, silence_dtype_warnings=True)
112
- self.model_gpu = None
113
- torch.cuda.empty_cache()
114
- print(f"Moved {model_gpu_name} to CPU ✓")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- self.model_gpu = StableDiffusionPipeline.from_pretrained(**pipeline_kwargs).to(self.gpu)
117
- print(f"Moved {model_lower} to GPU ✓")
118
- return self.model_gpu
119
 
120
 
121
  # prepare prompts for Compel
@@ -153,12 +159,16 @@ def generate(
153
  model="lykon/dreamshaper-8",
154
  scheduler="DEIS 2M",
155
  aspect_ratio="1:1",
156
- guidance_scale=7,
157
  inference_steps=30,
158
  karras=True,
159
  num_images=1,
160
  increment_seed=True,
 
161
  ):
 
 
 
162
  # image dimensions
163
  aspect_ratios = {
164
  "16:9": (640, 360),
@@ -178,8 +188,8 @@ def generate(
178
  tokenizer=pipe.tokenizer,
179
  text_encoder=pipe.text_encoder,
180
  truncate_long_prompts=False,
181
- device=pipe.device.type,
182
- dtype_for_device_getter=lambda _: torch.float16,
183
  )
184
 
185
  neg_prompt = join_prompt(negative_prompt)
@@ -192,7 +202,9 @@ def generate(
192
  images = []
193
 
194
  for i in range(num_images):
195
- generator = torch.Generator(device=pipe.device.type).manual_seed(current_seed)
 
 
196
  all_positive_prompts = parse_prompt(positive_prompt)
197
  prompt_index = i % len(all_positive_prompts)
198
  pos_prompt = all_positive_prompts[prompt_index]
@@ -210,10 +222,13 @@ def generate(
210
  guidance_scale=guidance_scale,
211
  generator=generator,
212
  )
213
-
214
  images.append((result.images[0], str(current_seed)))
215
 
216
  if increment_seed:
217
  current_seed += 1
218
 
 
 
 
 
219
  return images
 
19
  )
20
  from diffusers.models import AutoencoderTiny
21
 
22
+ ZERO_GPU = (
23
+ environ.get("SPACES_ZERO_GPU", "").lower() == "true"
24
+ or environ.get("SPACES_ZERO_GPU", "") == "1"
25
+ )
26
+
27
+ TORCH_DTYPE = (
28
+ torch.bfloat16
29
+ if torch.cuda.is_available() and torch.cuda.is_bf16_supported()
30
+ else torch.float16
31
+ )
32
+
33
  # some models use the deprecated CLIPFeatureExtractor class
34
  # should use CLIPImageProcessor instead
35
  filterwarnings("ignore", category=FutureWarning, module="transformers")
 
43
  cls._instance = super(Loader, cls).__new__(cls)
44
  cls._instance.cpu = torch.device("cpu")
45
  cls._instance.gpu = torch.device("cuda")
46
+ cls._instance.pipe = None
 
47
  return cls._instance
48
 
49
  def load(self, model, scheduler, karras):
 
 
 
 
50
  model_lower = model.lower()
51
 
52
+ schedulers = {
53
  "DEIS 2M": DEISMultistepScheduler,
54
  "DPM++ 2M": DPMSolverMultistepScheduler,
55
  "DPM2 a": KDPM2AncestralDiscreteScheduler,
 
65
  "beta_schedule": "scaled_linear",
66
  "timestep_spacing": "leading",
67
  "steps_offset": 1,
68
+ "use_karras_sigmas": karras,
69
  }
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  if scheduler == "PNDM" or scheduler == "Euler a":
72
  del scheduler_kwargs["use_karras_sigmas"]
73
 
74
+ pipe_kwargs = {
 
 
 
 
 
 
75
  "pretrained_model_name_or_path": model_lower,
76
  "requires_safety_checker": False,
77
  "safety_checker": None,
78
+ "scheduler": schedulers[scheduler](**scheduler_kwargs),
79
+ "torch_dtype": TORCH_DTYPE,
 
80
  "use_safetensors": True,
 
 
 
 
 
81
  }
82
 
83
+ # already loaded
84
+ if self.pipe is not None:
85
+ model_name = self.pipe.config._name_or_path
86
+ same_model = model_name.lower() == model_lower
87
+ same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
88
+ same_karras = (
89
+ not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
90
+ or self.pipe.scheduler.config.use_karras_sigmas == karras
91
+ )
92
 
93
+ if same_model:
94
+ if not same_scheduler:
95
+ print(f"Swapping scheduler to {scheduler}...")
96
+ elif not same_karras:
97
+ print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
98
+ elif not (same_scheduler and same_karras):
99
+ self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
100
+ return self.pipe
101
+ else:
102
+ print(f"Unloading {model_name.lower()}...")
103
+ self.pipe = None
104
+ torch.cuda.empty_cache()
105
+
106
+ # no fp16 available
107
+ if not ZERO_GPU and model_lower not in [
108
+ "sg161222/realistic_vision_v5.1_novae",
109
+ "prompthero/openjourney-v4",
110
+ "linaqruf/anything-v3-1",
111
+ ]:
112
+ pipe_kwargs["variant"] = "fp16"
113
+
114
+ # uses special VAE
115
+ if model_lower not in ["linaqruf/anything-v3-1"]:
116
+ pipe_kwargs["vae"] = AutoencoderTiny.from_pretrained(
117
+ "madebyollin/taesd",
118
+ torch_dtype=TORCH_DTYPE,
119
+ use_safetensors=True,
120
+ )
121
 
122
+ print(f"Loading {model_lower}...")
123
+ self.pipe = StableDiffusionPipeline.from_pretrained(**pipe_kwargs).to(self.gpu)
124
+ return self.pipe
125
 
126
 
127
  # prepare prompts for Compel
 
159
  model="lykon/dreamshaper-8",
160
  scheduler="DEIS 2M",
161
  aspect_ratio="1:1",
162
+ guidance_scale=7.5,
163
  inference_steps=30,
164
  karras=True,
165
  num_images=1,
166
  increment_seed=True,
167
+ Error=Exception,
168
  ):
169
+ if not torch.cuda.is_available():
170
+ raise Error("CUDA not available")
171
+
172
  # image dimensions
173
  aspect_ratios = {
174
  "16:9": (640, 360),
 
188
  tokenizer=pipe.tokenizer,
189
  text_encoder=pipe.text_encoder,
190
  truncate_long_prompts=False,
191
+ device=pipe.device,
192
+ dtype_for_device_getter=lambda _: TORCH_DTYPE,
193
  )
194
 
195
  neg_prompt = join_prompt(negative_prompt)
 
202
  images = []
203
 
204
  for i in range(num_images):
205
+ generator = torch.Generator(device=pipe.device).manual_seed(current_seed)
206
+
207
+ # run the prompt for this iteration
208
  all_positive_prompts = parse_prompt(positive_prompt)
209
  prompt_index = i % len(all_positive_prompts)
210
  pos_prompt = all_positive_prompts[prompt_index]
 
222
  guidance_scale=guidance_scale,
223
  generator=generator,
224
  )
 
225
  images.append((result.images[0], str(current_seed)))
226
 
227
  if increment_seed:
228
  current_seed += 1
229
 
230
+ if ZERO_GPU:
231
+ # spaces always start fresh
232
+ loader.pipe = None
233
+
234
  return images