File size: 6,031 Bytes
cfccdc2
d77a781
596d353
d77a781
0006a92
586ddad
 
 
cfccdc2
a072a82
e6d1b4c
a072a82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec9fc50
 
 
 
 
 
 
0eaa580
c2305a3
 
 
 
 
36c3ba1
 
c2305a3
 
 
 
 
 
 
 
 
36c3ba1
c2305a3
 
 
 
 
 
 
 
a072a82
b2f39c8
a072a82
9a1d007
 
a072a82
 
 
 
 
 
 
 
 
6ef67fd
a072a82
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import gradio as gr
import numpy as np
# from edict_functions import EDICT_editing
from PIL import Image
from utils import Endpoint, get_token
from io import BytesIO
import requests


endpoint = Endpoint()

def local_edict(x, source_text, edit_text,
         edit_strength, guidance_scale,
          steps=50, mix_weight=0.93, ):
    x = Image.fromarray(x)
    return_im =  EDICT_editing(x,
                         source_text,
                         edit_text,
                  steps=steps,
                  mix_weight=mix_weight,
                  init_image_strength=edit_strength,
                  guidance_scale=guidance_scale
                              )[0]
    return np.array(return_im)

def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="JPEG", quality=95)
    buffered.seek(0)

    return buffered



def decode_image(img_obj):
    img = Image.open(img_obj).convert("RGB")
    return img

def edict(x, source_text, edit_text,
         edit_strength, guidance_scale,
          steps=50, mix_weight=0.93, ):

    url = endpoint.url
    url = url + "/api/edit"
    headers = {### Misc.

        "User-Agent": "EDICT HuggingFace Space",
        "Auth-Token": get_token(),
    }

    data = {
        "source_text": source_text,
        "edit_text": edit_text,
        "edit_strength": edit_strength,
        "guidance_scale": guidance_scale,
    }

    image = encode_image(Image.fromarray(x))
    files = {"image": image}  

    response = requests.post(url, data=data, files=files, headers=headers)
    
    if response.status_code == 200:
        return np.array(decode_image(BytesIO(response.content)))
    else:
        return "Error: " + response.text   
    # x = decode_image(response)
    # return np.array(x)

examples = [
        ['square_ims/american_gothic.jpg', 'A painting of two people frowning', 'A painting of two people smiling', 0.5, 3],
        ['square_ims/colloseum.jpg', 'An old ruined building', 'A new modern office building', 0.8, 3],
    ]


examples.append(['square_ims/scream.jpg', 'A painting of someone screaming', 'A painting of an alien', 0.5, 3])
examples.append(['square_ims/yosemite.jpg', 'Granite forest valley', 'Granite desert valley', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'Mouth open', 'Mouth closed', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'A man', 'A man in K.I.S.S. facepaint', 0.8, 3])
"""
examples.extend([
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Chinese New Year cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Union Jack cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Nigerian flag cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Santa Claus cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'An Easter cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A hedgehog cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A rose cupcake', 0.8, 3],
    ])
"""

for dog_i in [1, 2]:
    for breed in ['Golden Retriever', 'Chihuahua', 'Dalmatian']:
        examples.append([f'square_ims/imagenet_dog_{dog_i}.jpg', 'A dog', f'A {breed}', 0.8, 3])


description = """
**We have disabled image uploading from March 22. 2023.**

**Please try examples provided below.**

A gradio demo for [EDICT](https://arxiv.org/abs/2211.12446) (CVPR23)
"""
# description = gr.Markdown(description)

article = """

### Prompting Style

As with many text-to-image methods, the prompting style of EDICT can make a big difference. When in doubt, experiment! Some guidance:
* Parallel *Original Description* and *Edit Description* construction as much as possible. Inserting/editing single words often is enough to affect a change while maintaining a lot of the original structure
* Words that will affect the entire setting (e.g. "A photo of " vs. "A painting of") can make a big difference. Playing around with them can help a lot

### Parameters
Both `edit_strength` and `guidance_scale` have similar properties qualitatively: the higher the value the more the image will change. We suggest
* Increasing/decreasing `edit_strength` first, particularly to alter/preserve more of the original structure/content
* Then changing `guidance_scale` to make the change in the edited region more or less pronounced.

Usually we find changing `edit_strength` to be enough, but feel free to play around (and report any interesting results)!

### Misc.

Having difficulty coming up with a caption? Try [BLIP](https://huggingface.co/spaces/Salesforce/BLIP2) to automatically generate one!

As with most StableDiffusion approaches, faces/text are often problematic to render, especially if they're small. Having these in the foreground will help keep them cleaner.

A returned black image means that the [Safety Checker](https://huggingface.co/CompVis/stable-diffusion-safety-checker) triggered on the photo. This happens in odd cases sometimes (it often rejects
the huggingface logo or variations), but we need to keep it in for obvious reasons.
"""
# article = gr.Markdown(description)

iface = gr.Interface(fn=edict, inputs=[gr.Image(interactive=False),
                                       gr.Textbox(label="Original Description", interactive=False),
                                       gr.Textbox(label="Edit Description", interactive=False),
                                       # 50, # gr.Slider(5, 50, value=20, step=1),
                                       # 0.93, # gr.Slider(0.5, 1, value=0.7, step=0.05),
                                       gr.Slider(0.0, 1, value=0.8, step=0.05),
                                       gr.Slider(0, 10, value=3, step=0.5),
                                      ],
                     examples = examples,
                     outputs="image",
                     description=description,
                     article=article,
                     cache_examples=True
                    )
iface.launch()