radames commited on
Commit
dd9c27c
·
1 Parent(s): 3e47535

make it work with OSX Mac m1/m2/m3

Browse files
Files changed (3) hide show
  1. README.md +14 -10
  2. app-img2img.py +38 -15
  3. app-txt2img.py +30 -7
README.md CHANGED
@@ -17,37 +17,42 @@ You need a webcam to run this demo. 🤗
17
 
18
  You need CUDA and Python
19
  `TIMEOUT`: limit user session timeout
20
- `SAFETY_CHECKER`: disabled if you want NSFW filter off
21
  `MAX_QUEUE_SIZE`: limit number of users on current app instance
22
 
23
  ### image to image
 
24
  ```bash
25
- python -m venv venv
26
- source venv/bin/activate
27
- pip install -r requirements.txt
28
  uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
29
  ```
30
 
31
  ### text to image
32
 
33
  ```bash
34
- python -m venv venv
35
- source venv/bin/activate
36
- pip install -r requirements.txt
37
  uvicorn "app-txt2img:app" --host 0.0.0.0 --port 7860 --reload
38
  ```
 
39
  or with environment variables
 
40
  ```bash
41
  TIMEOUT=120 SAFETY_CHECKER=True MAX_QUEUE_SIZE=4 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
42
  ```
43
 
44
- If you're running locally and want to test it on Mobile Safari, the webserver needs to be served over HTTPS.
45
 
46
  ```bash
47
  openssl req -newkey rsa:4096 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem
48
  uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload --log-level info --ssl-certfile=certificate.pem --ssl-keyfile=key.pem
49
  ```
 
50
  ## Docker
 
51
  You need NVIDIA Container Toolkit for Docker
52
 
53
  ```bash
@@ -62,8 +67,7 @@ docker run -ti -e TIMEOUT=0 -e SAFETY_CHECKER=False -p 7860:7860 --gpus all lcm-
62
  ```
63
 
64
  # Demo on Hugging Face
65
- https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model
66
 
 
67
 
68
  https://github.com/radames/Real-Time-Latent-Consistency-Model/assets/102277/c4003ac5-e7ff-44c0-97d3-464bb659de70
69
-
 
17
 
18
  You need CUDA and Python
19
  `TIMEOUT`: limit user session timeout
20
+ `SAFETY_CHECKER`: disabled if you want NSFW filter off
21
  `MAX_QUEUE_SIZE`: limit number of users on current app instance
22
 
23
  ### image to image
24
+
25
  ```bash
26
+ python -m venv venv
27
+ source venv/bin/activate
28
+ pip3 install -r requirements.txt
29
  uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
30
  ```
31
 
32
  ### text to image
33
 
34
  ```bash
35
+ python -m venv venv
36
+ source venv/bin/activate
37
+ pip3 install -r requirements.txt
38
  uvicorn "app-txt2img:app" --host 0.0.0.0 --port 7860 --reload
39
  ```
40
+
41
  or with environment variables
42
+
43
  ```bash
44
  TIMEOUT=120 SAFETY_CHECKER=True MAX_QUEUE_SIZE=4 uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload
45
  ```
46
 
47
+ If you're running locally and want to test it on Mobile Safari, the webserver needs to be served over HTTPS.
48
 
49
  ```bash
50
  openssl req -newkey rsa:4096 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem
51
  uvicorn "app-img2img:app" --host 0.0.0.0 --port 7860 --reload --log-level info --ssl-certfile=certificate.pem --ssl-keyfile=key.pem
52
  ```
53
+
54
  ## Docker
55
+
56
  You need NVIDIA Container Toolkit for Docker
57
 
58
  ```bash
 
67
  ```
68
 
69
  # Demo on Hugging Face
 
70
 
71
+ https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model
72
 
73
  https://github.com/radames/Real-Time-Latent-Consistency-Model/assets/102277/c4003ac5-e7ff-44c0-97d3-464bb659de70
 
app-img2img.py CHANGED
@@ -19,6 +19,8 @@ import io
19
  import uuid
20
  import os
21
  import time
 
 
22
 
23
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
24
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
@@ -28,6 +30,17 @@ print(f"TIMEOUT: {TIMEOUT}")
28
  print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
29
  print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  if SAFETY_CHECKER == "True":
32
  pipe = DiffusionPipeline.from_pretrained(
33
  "SimianLuo/LCM_Dreamshaper_v7",
@@ -41,19 +54,28 @@ else:
41
  custom_pipeline="latent_consistency_img2img.py",
42
  custom_revision="main",
43
  )
44
- #TODO try to use tiny VAE
45
  # pipe.vae = AutoencoderTiny.from_pretrained(
46
  # "madebyollin/taesd", torch_dtype=torch.float16, use_safetensors=True
47
  # )
48
  pipe.set_progress_bar_config(disable=True)
49
- pipe.to(torch_device="cuda", torch_dtype=torch.float16)
50
  pipe.unet.to(memory_format=torch.channels_last)
51
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
52
- compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate_long_prompts=False)
 
 
 
 
 
 
 
 
 
 
 
53
  user_queue_map = {}
54
 
55
- # for torch.compile
56
- pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
57
 
58
  def predict(input_image, prompt, guidance_scale=8.0, strength=0.5, seed=2159232):
59
  generator = torch.manual_seed(seed)
@@ -112,9 +134,7 @@ async def websocket_endpoint(websocket: WebSocket):
112
  await websocket.send_json(
113
  {"status": "success", "message": "Connected", "userId": uid}
114
  )
115
- user_queue_map[uid] = {
116
- "queue": asyncio.Queue()
117
- }
118
  await websocket.send_json(
119
  {"status": "start", "message": "Start Streaming", "userId": uid}
120
  )
@@ -155,7 +175,13 @@ async def stream(user_id: uuid.UUID):
155
  if input_image is None:
156
  continue
157
 
158
- image = predict(input_image, params.prompt, params.guidance_scale, params.strength, params.seed)
 
 
 
 
 
 
159
  if image is None:
160
  continue
161
  frame_data = io.BytesIO()
@@ -194,10 +220,7 @@ async def handle_websocket_data(websocket: WebSocket, user_id: uuid.UUID):
194
  queue.get_nowait()
195
  except asyncio.QueueEmpty:
196
  continue
197
- await queue.put({
198
- "image": pil_image,
199
- "params": params
200
- })
201
  if TIMEOUT > 0 and time.time() - last_time > TIMEOUT:
202
  await websocket.send_json(
203
  {
@@ -214,4 +237,4 @@ async def handle_websocket_data(websocket: WebSocket, user_id: uuid.UUID):
214
  traceback.print_exc()
215
 
216
 
217
- app.mount("/", StaticFiles(directory="img2img", html=True), name="public")
 
19
  import uuid
20
  import os
21
  import time
22
+ import psutil
23
+
24
 
25
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
26
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 
30
  print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
31
  print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
32
 
33
+ # check if MPS is available OSX only M1/M2/M3 chips
34
+ mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
35
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
+ torch_device = device
37
+ torch_dtype = torch.float16
38
+
39
+ if mps_available:
40
+ device = torch.device("mps")
41
+ torch_device = "cpu"
42
+ torch_dtype = torch.float32
43
+
44
  if SAFETY_CHECKER == "True":
45
  pipe = DiffusionPipeline.from_pretrained(
46
  "SimianLuo/LCM_Dreamshaper_v7",
 
54
  custom_pipeline="latent_consistency_img2img.py",
55
  custom_revision="main",
56
  )
57
+ # TODO try to use tiny VAE
58
  # pipe.vae = AutoencoderTiny.from_pretrained(
59
  # "madebyollin/taesd", torch_dtype=torch.float16, use_safetensors=True
60
  # )
61
  pipe.set_progress_bar_config(disable=True)
62
+ pipe.to(torch_device=torch_device, torch_dtype=torch_dtype).to(device)
63
  pipe.unet.to(memory_format=torch.channels_last)
64
+
65
+ if psutil.virtual_memory().total < 64 * 1024**3:
66
+ pipe.enable_attention_slicing()
67
+
68
+ if not mps_available:
69
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
70
+ pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
71
+
72
+ compel_proc = Compel(
73
+ tokenizer=pipe.tokenizer,
74
+ text_encoder=pipe.text_encoder,
75
+ truncate_long_prompts=False,
76
+ )
77
  user_queue_map = {}
78
 
 
 
79
 
80
  def predict(input_image, prompt, guidance_scale=8.0, strength=0.5, seed=2159232):
81
  generator = torch.manual_seed(seed)
 
134
  await websocket.send_json(
135
  {"status": "success", "message": "Connected", "userId": uid}
136
  )
137
+ user_queue_map[uid] = {"queue": asyncio.Queue()}
 
 
138
  await websocket.send_json(
139
  {"status": "start", "message": "Start Streaming", "userId": uid}
140
  )
 
175
  if input_image is None:
176
  continue
177
 
178
+ image = predict(
179
+ input_image,
180
+ params.prompt,
181
+ params.guidance_scale,
182
+ params.strength,
183
+ params.seed,
184
+ )
185
  if image is None:
186
  continue
187
  frame_data = io.BytesIO()
 
220
  queue.get_nowait()
221
  except asyncio.QueueEmpty:
222
  continue
223
+ await queue.put({"image": pil_image, "params": params})
 
 
 
224
  if TIMEOUT > 0 and time.time() - last_time > TIMEOUT:
225
  await websocket.send_json(
226
  {
 
237
  traceback.print_exc()
238
 
239
 
240
+ app.mount("/", StaticFiles(directory="img2img", html=True), name="public")
app-txt2img.py CHANGED
@@ -19,6 +19,8 @@ import io
19
  import uuid
20
  import os
21
  import time
 
 
22
 
23
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
24
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
@@ -28,6 +30,17 @@ print(f"TIMEOUT: {TIMEOUT}")
28
  print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
29
  print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  if SAFETY_CHECKER == "True":
32
  pipe = DiffusionPipeline.from_pretrained(
33
  "SimianLuo/LCM_Dreamshaper_v7",
@@ -42,17 +55,27 @@ else:
42
  custom_revision="main",
43
  )
44
  pipe.vae = AutoencoderTiny.from_pretrained(
45
- "madebyollin/taesd", torch_dtype=torch.float16, use_safetensors=True
46
  )
47
  pipe.set_progress_bar_config(disable=True)
48
- pipe.to(torch_device="cuda", torch_dtype=torch.float16)
49
  pipe.unet.to(memory_format=torch.channels_last)
50
- compel_proc = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate_long_prompts=False)
51
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 
 
 
 
 
 
 
 
 
 
 
 
52
  user_queue_map = {}
53
 
54
- # warmup trigger compilation
55
- pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
56
 
57
  def predict(prompt, guidance_scale=8.0, seed=2159232):
58
  generator = torch.manual_seed(seed)
@@ -148,7 +171,7 @@ async def stream(user_id: uuid.UUID):
148
  params = await queue.get()
149
  if params is None:
150
  continue
151
-
152
  image = predict(params.prompt, params.guidance_scale, params.seed)
153
  if image is None:
154
  continue
 
19
  import uuid
20
  import os
21
  import time
22
+ import psutil
23
+
24
 
25
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
26
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 
30
  print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
31
  print(f"MAX_QUEUE_SIZE: {MAX_QUEUE_SIZE}")
32
 
33
+ # check if MPS is available OSX only M1/M2/M3 chips
34
+ mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
35
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
+ torch_device = device
37
+ torch_dtype = torch.float16
38
+
39
+ if mps_available:
40
+ device = torch.device("mps")
41
+ torch_device = "cpu"
42
+ torch_dtype = torch.float32
43
+
44
  if SAFETY_CHECKER == "True":
45
  pipe = DiffusionPipeline.from_pretrained(
46
  "SimianLuo/LCM_Dreamshaper_v7",
 
55
  custom_revision="main",
56
  )
57
  pipe.vae = AutoencoderTiny.from_pretrained(
58
+ "madebyollin/taesd", torch_dtype=torch_dtype, use_safetensors=True
59
  )
60
  pipe.set_progress_bar_config(disable=True)
61
+ pipe.to(torch_device=torch_device, torch_dtype=torch_dtype).to(device)
62
  pipe.unet.to(memory_format=torch.channels_last)
63
+
64
+ # check if computer has less than 64GB of RAM using sys or os
65
+ if psutil.virtual_memory().total < 64 * 1024**3:
66
+ pipe.enable_attention_slicing()
67
+
68
+ if not mps_available:
69
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
70
+ pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
71
+
72
+ compel_proc = Compel(
73
+ tokenizer=pipe.tokenizer,
74
+ text_encoder=pipe.text_encoder,
75
+ truncate_long_prompts=False,
76
+ )
77
  user_queue_map = {}
78
 
 
 
79
 
80
  def predict(prompt, guidance_scale=8.0, seed=2159232):
81
  generator = torch.manual_seed(seed)
 
171
  params = await queue.get()
172
  if params is None:
173
  continue
174
+
175
  image = predict(params.prompt, params.guidance_scale, params.seed)
176
  if image is None:
177
  continue