Spaces:
Running
on
L40S
Running
on
L40S
ryanzhangfan
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -19,6 +19,14 @@ import torch
|
|
19 |
from emu3.mllm.processing_emu3 import Emu3Processor
|
20 |
import spaces
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Install flash attention, skipping CUDA build if necessary
|
24 |
subprocess.run(
|
@@ -170,7 +178,7 @@ def chat(history, user_input, user_image):
|
|
170 |
# Use Emu3-Chat for vision-language understanding
|
171 |
response = vision_language_understanding(user_image, user_input)
|
172 |
# Append the user input and response to the history
|
173 |
-
history = history + [(user_image
|
174 |
else:
|
175 |
# Use Emu3-Gen for image generation
|
176 |
generated_image = generate_image(user_input)
|
|
|
19 |
from emu3.mllm.processing_emu3 import Emu3Processor
|
20 |
import spaces
|
21 |
|
22 |
+
import io
|
23 |
+
import base64
|
24 |
+
|
25 |
+
def image2str(image):
|
26 |
+
buf = io.BytesIO()
|
27 |
+
image.save(buf, format="WEBP")
|
28 |
+
i_str = base64.b64encode(buf.getvalue()).decode()
|
29 |
+
return f'<div style="float:left"><img src="data:image/png;base64, {i_str}"></div>'
|
30 |
|
31 |
# Install flash attention, skipping CUDA build if necessary
|
32 |
subprocess.run(
|
|
|
178 |
# Use Emu3-Chat for vision-language understanding
|
179 |
response = vision_language_understanding(user_image, user_input)
|
180 |
# Append the user input and response to the history
|
181 |
+
history = history + [(image2str(user_image) + user_input, response)]
|
182 |
else:
|
183 |
# Use Emu3-Gen for image generation
|
184 |
generated_image = generate_image(user_input)
|