File size: 9,438 Bytes
2eb58d1
199e379
84f6f2e
 
 
 
 
 
2eb58d1
 
0098e32
 
84f6f2e
a17b7c9
84f6f2e
 
 
 
 
 
 
 
e9f0715
 
 
 
 
 
 
744a1fc
199e379
2eb58d1
4e01792
744a1fc
2eb58d1
0098e32
 
744a1fc
0098e32
3ea6729
 
744a1fc
40e4bc2
744a1fc
40e4bc2
d303ce7
744a1fc
 
 
 
 
 
 
 
 
 
 
2eb58d1
b2ef087
 
 
84f6f2e
 
 
2eb58d1
fd00b5e
e4c75aa
f03818c
63f5381
850abe8
 
fd00b5e
523a420
63f5381
c0ceaaf
fd00b5e
 
 
 
 
 
63f5381
0098e32
 
 
ae24367
0098e32
7e5566d
0098e32
63f5381
0098e32
 
 
 
7e5566d
850abe8
 
 
 
 
 
fd00b5e
744a1fc
 
fd00b5e
 
4df993c
 
744a1fc
 
ae24367
3ea6729
b2ef087
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import random
import os
import io, base64
from PIL import Image
import numpy
import shortuuid

latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
rudalle = gr.Interface.load("spaces/multimodalart/rudalle")
diffusion = gr.Interface.load("spaces/multimodalart/diffusion")
vqgan = gr.Interface.load("spaces/multimodalart/vqgan")

def text2image_latent(text,steps,width,height,images,diversity):
    results = latent(text, steps, width, height, images, diversity)
    image_paths = []
    image_arrays = []
    for image in results[1]:
        image_str = image[0]
        image_str = image_str.replace("data:image/png;base64,","")
        decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
        img = Image.open(io.BytesIO(decoded_bytes))
        url = shortuuid.uuid()
        temp_dir = './tmp'
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir, exist_ok=True)
        image_path = f'{temp_dir}/{url}.png'
        img.save(f'{temp_dir}/{url}.png')
        image_paths.append(image_path)
    return(image_paths)
  
def text2image_rudalle(text,aspect,model):
    image = rudalle(text,aspect,model)[0]
    return([image])

def text2image_vqgan(text,width,height,style,steps,flavor):
    results = vqgan(text,width,height,style,steps,flavor)
    return([results])

def text2image_diffusion(text,steps_diff, images_diff, weight, clip):
    results = diffusion(text, steps_diff, images_diff, weight, clip)
    image_paths = []
    print(results)
    for image in results:
        print('how many')
        image_str = image
        image_str = image_str.replace("data:image/png;base64,","")
        decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
        img = Image.open(io.BytesIO(decoded_bytes))
        url = shortuuid.uuid()
        temp_dir = './tmp'
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir, exist_ok=True)
        image_path = f'{temp_dir}/{url}.png'
        img.save(f'{temp_dir}/{url}.png')
        image_paths.append(image_path)
    return(image_paths)

def text2image_dallemini(text):
    pass

css_mt = {"margin-top": "1em"}

empty = gr.outputs.HTML()    

with gr.Blocks() as mindseye:
    gr.Markdown("<h1>MindsEye Lite <small><small>run multiple text-to-image models in one place</small></small></h1><p>MindsEye Lite orchestrates multiple text-to-image models in one Spaces. This work carries the spirit of <a href='https://multimodal.art/mindseye' target='_blank'>MindsEye Beta</a>, but with simplified versions of the models due to current hardware limitations of Spaces. MindsEye Lite was created by <a style='color: rgb(99, 102, 241);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a>, keep up with the <a style='color: rgb(99, 102, 241);' href='https://multimodal.art/news' target='_blank'>latest multimodal ai art news here</a>, join our <a href='https://discord.gg/FsDBTE5BNx'>Discord</a> and consider <a style='color: rgb(99, 102, 241);' href='https://www.patreon.com/multimodalart' target='_blank'>supporting us on Patreon</a></div></p>")
    gr.Markdown("<style>h1{margin-bottom:0em !important} .svelte-9r19iu > .grid {grid-template-columns: repeat(3,minmax(0,1fr));} </style>")
    text = gr.inputs.Textbox(placeholder="Type your prompt to generate an image", label="Prompt - try adding increments to your prompt such as 'a painting of', 'in the style of Picasso'", default="A giant mecha robot in Rio de Janeiro, oil on canvas")
    with gr.Row():
        with gr.Column():
            with gr.Tabs():
                with gr.TabItem("Latent Diffusion"):
                    gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/latentdiffusion' target='_blank'>Latent Diffusion</a> is the state of the art of open source text-to-image models, superb in text synthesis. Sometimes struggles with complex prompts")
                    steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1)
                    width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
                    height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
                    images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4)
                    diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0)
                    get_image_latent = gr.Button("Generate Image",css=css_mt)
                with gr.TabItem("VQGAN+CLIP"):
                    gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/vqgan' target='_blank'>VQGAN+CLIP</a> is the most famous text-to-image generator. Can produce good artistic results")
                    width_vq = gr.inputs.Slider(label="Width", default=256, minimum=32, step=32, maximum=512)
                    height_vq= gr.inputs.Slider(label="Height", default=256, minimum=32, step=32, maximum=512)
                    style = gr.inputs.Dropdown(label="Style - Hyper Fast Results is fast but compromises a bit of the quality",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results")
                    steps_vq = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate. All styles that are not Hyper Fast need at least 200 steps",default=50,maximum=300,minimum=1,step=1)
                    flavor = gr.inputs.Dropdown(label="Flavor - pick a flavor for the style of the images, based on the images below",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu"])
                    get_image_vqgan = gr.Button("Generate Image",css=css_mt)
                with gr.TabItem("Guided Diffusion"):
                    gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/diffusion' target='_blank'>Guided Diffusion</a> models produce superb quality results. V-Diffusion is its latest implementation")
                    steps_diff = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=40,maximum=80,minimum=1,step=1)
                    images_diff = gr.inputs.Slider(label="Number of images in parallel", default=2, maximum=4, minimum=1, step=1)
                    weight = gr.inputs.Slider(label="Weight - how closely the image should resemble the prompt", default=5, maximum=15, minimum=0, step=1)
                    clip = gr.inputs.Checkbox(label="CLIP Guided - improves coherence with complex prompts, makes it slower")
                    get_image_diffusion = gr.Button("Generate Image",css=css_mt)
                with gr.TabItem("ruDALLE"):
                    gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/rudalle' target='_blank'>ruDALLE</a> is a replication of DALL-E 1 in the russian language. No worries, your prompts will be translated automatically to russian. In case you see an error, try again a few times")
                    aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square")
                    model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism")
                    get_image_rudalle = gr.Button("Generate Image",css=css_mt)
        with gr.Column():
            with gr.Tabs():
                #with gr.TabItem("Image output"):
                #    image = gr.outputs.Image()
                with gr.TabItem("Gallery output"):
                    gallery = gr.Gallery(label="Individual images")
    with gr.Row():
        gr.Markdown("<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on both the Imagenet dataset and in an undisclosed dataset by OpenAI.</div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>") 
    get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
    get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=gallery)
    get_image_vqgan.click(text2image_vqgan, inputs=[text,width_vq,height_vq,style,steps_vq,flavor],outputs=gallery)
    get_image_diffusion.click(text2image_diffusion, inputs=[text, steps_diff, images_diff, weight, clip],outputs=gallery)
mindseye.launch(enable_queue=False)