XiaoYun Zhang
commited on
Commit
·
9c54c90
1
Parent(s):
750c19e
clean up
Browse files- .gitattributes +2 -0
- .gitignore +2 -0
- cat.png → Asset/cat.png +0 -0
- autoencoder_kl.ckpt → Checkpoint/autoencoder_kl.ckpt +0 -0
- clip_encoder.ckpt → Checkpoint/clip_encoder.ckpt +0 -0
- ddim_v_sampler.ckpt → Checkpoint/ddim_v_sampler.ckpt +0 -0
- ClipTokenizer.cs +59 -0
- AutoencoderKL.cs → Model/AutoencoderKL.cs +0 -0
- ClipEnocder.cs → Model/ClipEnocder.cs +0 -0
- DDIMSampler.cs → Model/DDIMSampler.cs +0 -0
- DDPM.cs → Model/DDPM.cs +0 -0
- Program.cs +21 -21
- README.md +1 -1
- clip.csproj → StableDiffusionV2.csproj +22 -27
- merges.txt +0 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
36 |
cat.png filter=lfs diff=lfs merge=lfs -text
|
37 |
autoencoder_kl.ckpt filter=lfs diff=lfs merge=lfs -text
|
38 |
clip_encoder.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
36 |
cat.png filter=lfs diff=lfs merge=lfs -text
|
37 |
autoencoder_kl.ckpt filter=lfs diff=lfs merge=lfs -text
|
38 |
clip_encoder.ckpt filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Checkpoint/ filter=lfs diff=lfs merge=lfs -text
|
40 |
+
Checkpoint/*.ckpt filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -215,3 +215,5 @@ AppPackages/
|
|
215 |
# End of core ignore list, below put you custom 'per project' settings (patterns or path)
|
216 |
#####
|
217 |
|
|
|
|
|
|
215 |
# End of core ignore list, below put you custom 'per project' settings (patterns or path)
|
216 |
#####
|
217 |
|
218 |
+
Output/
|
219 |
+
|
cat.png → Asset/cat.png
RENAMED
File without changes
|
autoencoder_kl.ckpt → Checkpoint/autoencoder_kl.ckpt
RENAMED
File without changes
|
clip_encoder.ckpt → Checkpoint/clip_encoder.ckpt
RENAMED
File without changes
|
ddim_v_sampler.ckpt → Checkpoint/ddim_v_sampler.ckpt
RENAMED
File without changes
|
ClipTokenizer.cs
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
using Microsoft.ML.Tokenizers;
|
2 |
+
using System;
|
3 |
+
using System.Collections.Generic;
|
4 |
+
using System.IO;
|
5 |
+
using System.Linq;
|
6 |
+
using System.Net;
|
7 |
+
using System.Net.Http;
|
8 |
+
using System.Text;
|
9 |
+
using System.Threading.Tasks;
|
10 |
+
|
11 |
+
namespace StableDiffusionV2
|
12 |
+
{
|
13 |
+
internal class ClipTokenizer
|
14 |
+
{
|
15 |
+
private readonly Tokenizer _tokenizer;
|
16 |
+
private readonly int _startToken;
|
17 |
+
private readonly int _endToken;
|
18 |
+
|
19 |
+
public ClipTokenizer(string vocabPath, string mergesPath, int startToken = 49406, int endToken = 49407)
|
20 |
+
{
|
21 |
+
if(!File.Exists(vocabPath) || !File.Exists(mergesPath))
|
22 |
+
{
|
23 |
+
// download vocab from https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/vocab.json
|
24 |
+
// download merges from https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/blob/main/merges.txt
|
25 |
+
|
26 |
+
using (var client = new HttpClient())
|
27 |
+
{
|
28 |
+
Console.WriteLine("download vocab.json and merges.txt");
|
29 |
+
using (var s = client.GetStringAsync("https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/vocab.json"))
|
30 |
+
{
|
31 |
+
using (var fs = new StreamWriter(vocabPath))
|
32 |
+
{
|
33 |
+
fs.Write(s.Result);
|
34 |
+
}
|
35 |
+
}
|
36 |
+
|
37 |
+
using (var s = client.GetStringAsync("https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/merges.txt"))
|
38 |
+
{
|
39 |
+
using (var fs = new StreamWriter(mergesPath))
|
40 |
+
{
|
41 |
+
fs.Write(s.Result);
|
42 |
+
}
|
43 |
+
}
|
44 |
+
}
|
45 |
+
}
|
46 |
+
|
47 |
+
_tokenizer = new Tokenizer(new Bpe(vocabPath, mergesPath, endOfWordSuffix: "</w>"));
|
48 |
+
_startToken = startToken;
|
49 |
+
_endToken = endToken;
|
50 |
+
}
|
51 |
+
|
52 |
+
public int[] Tokenize(string text, int maxTokens = 77)
|
53 |
+
{
|
54 |
+
var res = _tokenizer.Encode(text);
|
55 |
+
var tokens = new[] { _startToken }.Concat(res.Ids.Concat(Enumerable.Repeat(0, maxTokens - res.Ids.Count - 2))).Concat(new[] { _endToken }).ToArray();
|
56 |
+
return tokens;
|
57 |
+
}
|
58 |
+
}
|
59 |
+
}
|
AutoencoderKL.cs → Model/AutoencoderKL.cs
RENAMED
File without changes
|
ClipEnocder.cs → Model/ClipEnocder.cs
RENAMED
File without changes
|
DDIMSampler.cs → Model/DDIMSampler.cs
RENAMED
File without changes
|
DDPM.cs → Model/DDPM.cs
RENAMED
File without changes
|
Program.cs
CHANGED
@@ -1,48 +1,48 @@
|
|
1 |
-
using
|
2 |
-
using Microsoft.ML.Tokenizers;
|
3 |
using System;
|
4 |
-
using System.Collections.Generic;
|
5 |
using System.IO;
|
6 |
-
using System.Linq;
|
7 |
using TorchSharp;
|
8 |
|
9 |
var batch = 1;
|
10 |
-
var
|
11 |
-
|
12 |
-
var start_token = 49406;
|
13 |
-
var end_token = 49407;
|
14 |
var prompt = "a wild cute green cat";
|
15 |
-
var
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
tokenTensor = tokenTensor.repeat(batch, 1);
|
20 |
-
var unconditional_tokenTensor = torch.tensor(uncontional_tokens.ToArray(), dtype: torch.ScalarType.Int64, device: device);
|
21 |
unconditional_tokenTensor = unconditional_tokenTensor.repeat(batch, 1);
|
22 |
|
23 |
-
torchvision.io.DefaultImager = new torchvision.io.SkiaImager();
|
24 |
-
var device = TorchSharp.torch.device("cuda:0");
|
25 |
var clipEncoder = new ClipEncoder("clip_encoder.ckpt", device);
|
26 |
var img = torch.randn(batch, 4, 64, 64, dtype: torch.ScalarType.Float32, device: device);
|
27 |
-
var t = torch.full(new[]{batch, 1L}, value: batch, dtype: torch.ScalarType.Int32, device: device);
|
28 |
var condition = clipEncoder.Forward(tokenTensor);
|
29 |
var unconditional_condition = clipEncoder.Forward(unconditional_tokenTensor);
|
30 |
-
|
31 |
clipEncoder.Dispose();
|
|
|
32 |
var ddpm = new DDPM("ddim_v_sampler.ckpt", device);
|
33 |
var ddimSampler = new DDIMSampler(ddpm);
|
34 |
var ddim_steps = 50;
|
35 |
img = ddimSampler.Sample(img, condition, unconditional_condition, ddim_steps);
|
36 |
ddpm.Dispose();
|
37 |
|
38 |
-
var
|
39 |
-
var decoded_images =
|
40 |
decoded_images = torch.clamp((decoded_images + 1.0) / 2.0, 0.0, 1.0);
|
41 |
|
42 |
-
|
43 |
for(int i = 0; i!= batch; ++i)
|
44 |
{
|
|
|
45 |
var image = decoded_images[i];
|
46 |
image = (image * 255.0).to(torch.ScalarType.Byte).cpu();
|
47 |
-
torchvision.io.write_image(image,
|
|
|
|
|
48 |
}
|
|
|
1 |
+
using StableDiffusionV2;
|
|
|
2 |
using System;
|
|
|
3 |
using System.IO;
|
|
|
4 |
using TorchSharp;
|
5 |
|
6 |
var batch = 1;
|
7 |
+
var device = torch.device("cuda:0");
|
8 |
+
torchvision.io.DefaultImager = new torchvision.io.SkiaImager();
|
|
|
|
|
9 |
var prompt = "a wild cute green cat";
|
10 |
+
var outputFolder = "Output";
|
11 |
+
if(!Directory.Exists(outputFolder))
|
12 |
+
{
|
13 |
+
Directory.CreateDirectory(outputFolder);
|
14 |
+
}
|
15 |
+
|
16 |
+
var clipTokenizer = new ClipTokenizer("vocab.json", "merges.txt");
|
17 |
+
var tokens = clipTokenizer.Tokenize(prompt);
|
18 |
+
var uncontional_tokens = clipTokenizer.Tokenize("");
|
19 |
+
var tokenTensor = torch.tensor(tokens, dtype: torch.ScalarType.Int64, device: device);
|
20 |
+
var unconditional_tokenTensor = torch.tensor(uncontional_tokens, dtype: torch.ScalarType.Int64, device: device);
|
21 |
tokenTensor = tokenTensor.repeat(batch, 1);
|
|
|
22 |
unconditional_tokenTensor = unconditional_tokenTensor.repeat(batch, 1);
|
23 |
|
|
|
|
|
24 |
var clipEncoder = new ClipEncoder("clip_encoder.ckpt", device);
|
25 |
var img = torch.randn(batch, 4, 64, 64, dtype: torch.ScalarType.Float32, device: device);
|
|
|
26 |
var condition = clipEncoder.Forward(tokenTensor);
|
27 |
var unconditional_condition = clipEncoder.Forward(unconditional_tokenTensor);
|
|
|
28 |
clipEncoder.Dispose();
|
29 |
+
|
30 |
var ddpm = new DDPM("ddim_v_sampler.ckpt", device);
|
31 |
var ddimSampler = new DDIMSampler(ddpm);
|
32 |
var ddim_steps = 50;
|
33 |
img = ddimSampler.Sample(img, condition, unconditional_condition, ddim_steps);
|
34 |
ddpm.Dispose();
|
35 |
|
36 |
+
var vae = new AutoencoderKL("autoencoder_kl.ckpt", device);
|
37 |
+
var decoded_images = vae.Forward(img);
|
38 |
decoded_images = torch.clamp((decoded_images + 1.0) / 2.0, 0.0, 1.0);
|
39 |
|
|
|
40 |
for(int i = 0; i!= batch; ++i)
|
41 |
{
|
42 |
+
var savedPath = Path.Join(outputFolder, $"{i}.png");
|
43 |
var image = decoded_images[i];
|
44 |
image = (image * 255.0).to(torch.ScalarType.Byte).cpu();
|
45 |
+
torchvision.io.write_image(image, savedPath, torchvision.ImageFormat.Png);
|
46 |
+
|
47 |
+
Console.WriteLine($"save image to {savedPath}, enjoy");
|
48 |
}
|
README.md
CHANGED
@@ -16,7 +16,7 @@ This project serves as a proof-of-concept purpose of bringing StableDiffusion mo
|
|
16 |
|
17 |
# Example output
|
18 |
|
19 |
-
![a wild cute green cat](cat.png)
|
20 |
*a wild cute green cat*
|
21 |
---
|
22 |
license: mit
|
|
|
16 |
|
17 |
# Example output
|
18 |
|
19 |
+
![a wild cute green cat](Asset/cat.png)
|
20 |
*a wild cute green cat*
|
21 |
---
|
22 |
license: mit
|
clip.csproj → StableDiffusionV2.csproj
RENAMED
@@ -1,27 +1,22 @@
|
|
1 |
-
<Project Sdk="Microsoft.NET.Sdk">
|
2 |
-
|
3 |
-
<PropertyGroup>
|
4 |
-
<OutputType>Exe</OutputType>
|
5 |
-
<TargetFramework>net6.0</TargetFramework>
|
6 |
-
<Nullable>enable</Nullable>
|
7 |
-
<Platform>x64</Platform>
|
8 |
-
<TorchVersion>0.99.3</TorchVersion>
|
9 |
-
</PropertyGroup>
|
10 |
-
|
11 |
-
<ItemGroup>
|
12 |
-
<PackageReference Include="Microsoft.ML" Version="2.0.1" />
|
13 |
-
<PackageReference Include="Microsoft.ML.Tokenizers" Version="0.20.1" />
|
14 |
-
<PackageReference Include="TorchVision" Version="$(TorchVersion)" />
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
24 |
-
</None>
|
25 |
-
</ItemGroup>
|
26 |
-
|
27 |
-
</Project>
|
|
|
1 |
+
<Project Sdk="Microsoft.NET.Sdk">
|
2 |
+
|
3 |
+
<PropertyGroup>
|
4 |
+
<OutputType>Exe</OutputType>
|
5 |
+
<TargetFramework>net6.0</TargetFramework>
|
6 |
+
<Nullable>enable</Nullable>
|
7 |
+
<Platform>x64</Platform>
|
8 |
+
<TorchVersion>0.99.3</TorchVersion>
|
9 |
+
</PropertyGroup>
|
10 |
+
|
11 |
+
<ItemGroup>
|
12 |
+
<PackageReference Include="Microsoft.ML" Version="2.0.1" />
|
13 |
+
<PackageReference Include="Microsoft.ML.Tokenizers" Version="0.20.1" />
|
14 |
+
<PackageReference Include="TorchVision" Version="$(TorchVersion)" />
|
15 |
+
<PackageReference Condition="'$(OS)' == 'Windows_NT'" Include="TorchSharp-cuda-windows" Version="$(TorchVersion)" />
|
16 |
+
<PackageReference Condition="'$(OS)' == 'Linux'" Include="TorchSharp-cuda-linux" Version="$(TorchVersion)" />
|
17 |
+
<None Update="Checkpoint/**/*">
|
18 |
+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
19 |
+
</None>
|
20 |
+
</ItemGroup>
|
21 |
+
|
22 |
+
</Project>
|
|
|
|
|
|
|
|
|
|
merges.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|
vocab.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|