Upload folder using huggingface_hub
Browse files- Untitled.ipynb +113 -57
- __pycache__/pipeline_waifu.cpython-311.pyc +0 -0
- pipeline_waifu.py +6 -3
Untitled.ipynb
CHANGED
@@ -2,61 +2,65 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"id": "dca3239c-17d6-4284-a2cf-83237a55a7df",
|
7 |
"metadata": {},
|
8 |
"outputs": [
|
9 |
{
|
10 |
"data": {
|
11 |
"application/vnd.jupyter.widget-view+json": {
|
12 |
-
"model_id": "
|
13 |
"version_major": 2,
|
14 |
"version_minor": 0
|
15 |
},
|
16 |
"text/plain": [
|
17 |
-
"
|
18 |
]
|
19 |
},
|
20 |
"metadata": {},
|
21 |
"output_type": "display_data"
|
22 |
},
|
23 |
{
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
|
28 |
-
"\
|
29 |
-
"\
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"
|
41 |
-
"
|
42 |
-
"
|
43 |
-
"
|
44 |
-
"
|
45 |
-
"
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"
|
49 |
-
"\
|
|
|
|
|
|
|
50 |
]
|
51 |
}
|
52 |
],
|
53 |
"source": [
|
54 |
"import torch\n",
|
55 |
"from diffusers import DiffusionPipeline\n",
|
|
|
56 |
"\n",
|
57 |
-
"pipe_id = \"
|
58 |
"variant = \"fp16\"\n",
|
59 |
-
"pipe =
|
60 |
" pipe_id, \n",
|
61 |
" variant=variant,\n",
|
62 |
" trust_remote_code=True\n",
|
@@ -67,37 +71,89 @@
|
|
67 |
},
|
68 |
{
|
69 |
"cell_type": "code",
|
70 |
-
"execution_count":
|
71 |
"id": "b6ebc579-0eb2-4828-89d5-b40f6d5e758e",
|
72 |
"metadata": {},
|
73 |
"outputs": [],
|
74 |
"source": [
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"
|
80 |
-
"
|
81 |
-
"
|
82 |
-
"
|
83 |
-
"
|
84 |
-
"
|
85 |
-
"
|
86 |
-
"
|
87 |
-
"
|
88 |
-
" \
|
89 |
-
" \"
|
90 |
-
"
|
91 |
-
"
|
92 |
-
" \
|
93 |
-
" \
|
94 |
-
"
|
95 |
-
"
|
96 |
-
" \
|
97 |
-
"
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
}
|
102 |
],
|
103 |
"metadata": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
"id": "dca3239c-17d6-4284-a2cf-83237a55a7df",
|
7 |
"metadata": {},
|
8 |
"outputs": [
|
9 |
{
|
10 |
"data": {
|
11 |
"application/vnd.jupyter.widget-view+json": {
|
12 |
+
"model_id": "add6682df5934d75bee02c59c4507a43",
|
13 |
"version_major": 2,
|
14 |
"version_minor": 0
|
15 |
},
|
16 |
"text/plain": [
|
17 |
+
"Loading pipeline components...: 0%| | 0/5 [00:00<?, ?it/s]"
|
18 |
]
|
19 |
},
|
20 |
"metadata": {},
|
21 |
"output_type": "display_data"
|
22 |
},
|
23 |
{
|
24 |
+
"name": "stdout",
|
25 |
+
"output_type": "stream",
|
26 |
+
"text": [
|
27 |
+
"WaifuPipeline {\n",
|
28 |
+
" \"_class_name\": \"WaifuPipeline\",\n",
|
29 |
+
" \"_diffusers_version\": \"0.32.0.dev0\",\n",
|
30 |
+
" \"_name_or_path\": \"/home/recoilme/models/waifu-2b\",\n",
|
31 |
+
" \"scheduler\": [\n",
|
32 |
+
" \"diffusers\",\n",
|
33 |
+
" \"FlowMatchEulerDiscreteScheduler\"\n",
|
34 |
+
" ],\n",
|
35 |
+
" \"text_encoder\": [\n",
|
36 |
+
" \"transformers\",\n",
|
37 |
+
" \"XLMRobertaModel\"\n",
|
38 |
+
" ],\n",
|
39 |
+
" \"tokenizer\": [\n",
|
40 |
+
" \"transformers\",\n",
|
41 |
+
" \"XLMRobertaTokenizerFast\"\n",
|
42 |
+
" ],\n",
|
43 |
+
" \"transformer\": [\n",
|
44 |
+
" \"diffusers\",\n",
|
45 |
+
" \"SanaTransformer2DModel\"\n",
|
46 |
+
" ],\n",
|
47 |
+
" \"vae\": [\n",
|
48 |
+
" \"diffusers\",\n",
|
49 |
+
" \"AutoencoderKL\"\n",
|
50 |
+
" ]\n",
|
51 |
+
"}\n",
|
52 |
+
"\n"
|
53 |
]
|
54 |
}
|
55 |
],
|
56 |
"source": [
|
57 |
"import torch\n",
|
58 |
"from diffusers import DiffusionPipeline\n",
|
59 |
+
"from pipeline_waifu import WaifuPipeline\n",
|
60 |
"\n",
|
61 |
+
"pipe_id = \"/home/recoilme/models/waifu-2b\"\n",
|
62 |
"variant = \"fp16\"\n",
|
63 |
+
"pipe = WaifuPipeline.from_pretrained(\n",
|
64 |
" pipe_id, \n",
|
65 |
" variant=variant,\n",
|
66 |
" trust_remote_code=True\n",
|
|
|
71 |
},
|
72 |
{
|
73 |
"cell_type": "code",
|
74 |
+
"execution_count": 3,
|
75 |
"id": "b6ebc579-0eb2-4828-89d5-b40f6d5e758e",
|
76 |
"metadata": {},
|
77 |
"outputs": [],
|
78 |
"source": [
|
79 |
+
"def txt_embeds(prompt):\n",
|
80 |
+
" max_length = 512\n",
|
81 |
+
" select_index = [0] + list(range(-max_length + 1, 0))\n",
|
82 |
+
" \n",
|
83 |
+
" text_inputs = tokenizer(\n",
|
84 |
+
" prompt,\n",
|
85 |
+
" padding=\"max_length\",\n",
|
86 |
+
" max_length=max_length,\n",
|
87 |
+
" truncation=True,\n",
|
88 |
+
" add_special_tokens=True,\n",
|
89 |
+
" return_tensors=\"pt\",\n",
|
90 |
+
" )\n",
|
91 |
+
" text_input_ids = text_inputs.input_ids\n",
|
92 |
+
" prompt_attention_mask = text_inputs.attention_mask\n",
|
93 |
+
" device = \"cuda\"\n",
|
94 |
+
" prompt_attention_mask = prompt_attention_mask.to(device)\n",
|
95 |
+
" \n",
|
96 |
+
" prompt_embeds = text_encoder(input_ids=text_input_ids.to(device), attention_mask=prompt_attention_mask)[0][:, select_index]\n",
|
97 |
+
" #print(prompt_embeds.shape)\n",
|
98 |
+
" #prompt_embeds = prompt_embeds[0][:, select_index]\n",
|
99 |
+
" prompt_attention_mask = prompt_attention_mask[:, select_index]\n",
|
100 |
+
" #print(prompt_attention_mask.shape)\n",
|
101 |
+
" return prompt_embeds, prompt_attention_mask"
|
102 |
+
]
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"cell_type": "code",
|
106 |
+
"execution_count": 4,
|
107 |
+
"id": "3704e4a3-6855-4a32-a682-7ec307a668a0",
|
108 |
+
"metadata": {},
|
109 |
+
"outputs": [
|
110 |
+
{
|
111 |
+
"ename": "NameError",
|
112 |
+
"evalue": "name 'tokenizer' is not defined",
|
113 |
+
"output_type": "error",
|
114 |
+
"traceback": [
|
115 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
116 |
+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
117 |
+
"Cell \u001b[0;32mIn[4], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m prompt \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mA young girl with long brown hair and brown eyes is standing against a brick wall, wearing a white shirt with a black sailor collar and a black pleated skirt. She is smiling and looking directly at the viewer, with sunlight filtering through the green vines behind her\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m#prompt = 'нарядная новогодняя елка, красивые игрушки, звезда сверху, огоньки, на тёмном фоне' \u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m prompt_embeds, prompt_attention_mask \u001b[38;5;241m=\u001b[39m \u001b[43mtxt_embeds\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m negative_prompt_embeds, negative_prompt_attention_mask\u001b[38;5;241m=\u001b[39mtxt_embeds(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(prompt_embeds\u001b[38;5;241m.\u001b[39mshape)\n",
|
118 |
+
"Cell \u001b[0;32mIn[3], line 5\u001b[0m, in \u001b[0;36mtxt_embeds\u001b[0;34m(prompt)\u001b[0m\n\u001b[1;32m 2\u001b[0m max_length \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m512\u001b[39m\n\u001b[1;32m 3\u001b[0m select_index \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m-\u001b[39mmax_length \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m0\u001b[39m))\n\u001b[0;32m----> 5\u001b[0m text_inputs \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m(\n\u001b[1;32m 6\u001b[0m prompt,\n\u001b[1;32m 7\u001b[0m padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 8\u001b[0m max_length\u001b[38;5;241m=\u001b[39mmax_length,\n\u001b[1;32m 9\u001b[0m truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 10\u001b[0m add_special_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 11\u001b[0m return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 13\u001b[0m text_input_ids \u001b[38;5;241m=\u001b[39m text_inputs\u001b[38;5;241m.\u001b[39minput_ids\n\u001b[1;32m 14\u001b[0m prompt_attention_mask \u001b[38;5;241m=\u001b[39m text_inputs\u001b[38;5;241m.\u001b[39mattention_mask\n",
|
119 |
+
"\u001b[0;31mNameError\u001b[0m: name 'tokenizer' is not defined"
|
120 |
+
]
|
121 |
+
}
|
122 |
+
],
|
123 |
+
"source": [
|
124 |
+
"prompt = '1girl, solo, animal ears, bow, teeth, jacket, tail, open mouth, brown hair, orange background, bowtie, orange nails, simple background, cat ears, orange eyes, blue bow, animal ear fluff, cat tail, looking at viewer, upper body, shirt, school uniform, hood, striped bow, striped, white shirt, black jacket, blue bowtie, fingernails, long sleeves, cat girl, bangs, fangs, collared shirt, striped bowtie, short hair, tongue, hoodie, sharp teeth, facial mark, claw pose'\n",
|
125 |
+
"prompt = 'A young girl with long brown hair and brown eyes is standing against a brick wall, wearing a white shirt with a black sailor collar and a black pleated skirt. She is smiling and looking directly at the viewer, with sunlight filtering through the green vines behind her'\n",
|
126 |
+
"#prompt = 'нарядная новогодняя елка, красивые игрушки, звезда сверху, огоньки, на тёмном фоне' \n",
|
127 |
+
"prompt_embeds, prompt_attention_mask = txt_embeds(prompt)\n",
|
128 |
+
"negative_prompt_embeds, negative_prompt_attention_mask=txt_embeds(\"\")\n",
|
129 |
+
"print(prompt_embeds.shape)\n",
|
130 |
+
"\n",
|
131 |
+
"image = pipe(\n",
|
132 |
+
" prompt = None,\n",
|
133 |
+
" negative_prompt = None,\n",
|
134 |
+
" use_resolution_binning = False,\n",
|
135 |
+
" prompt_embeds=prompt_embeds,\n",
|
136 |
+
" prompt_attention_mask= prompt_attention_mask,\n",
|
137 |
+
" negative_prompt_embeds = negative_prompt_embeds,\n",
|
138 |
+
" negative_prompt_attention_mask = negative_prompt_attention_mask,\n",
|
139 |
+
" height=768,\n",
|
140 |
+
" width=768,\n",
|
141 |
+
" guidance_scale=4.5,\n",
|
142 |
+
" num_inference_steps=24,\n",
|
143 |
+
" generator=torch.Generator(device=\"cuda\").manual_seed(42),\n",
|
144 |
+
")[0]\n",
|
145 |
+
"\n",
|
146 |
+
"for img in image:\n",
|
147 |
+
" img.show()"
|
148 |
]
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"cell_type": "code",
|
152 |
+
"execution_count": null,
|
153 |
+
"id": "1b82a561-b93b-4261-9bc7-fe867168bdd2",
|
154 |
+
"metadata": {},
|
155 |
+
"outputs": [],
|
156 |
+
"source": []
|
157 |
}
|
158 |
],
|
159 |
"metadata": {
|
__pycache__/pipeline_waifu.cpython-311.pyc
ADDED
Binary file (14.6 kB). View file
|
|
pipeline_waifu.py
CHANGED
@@ -14,6 +14,9 @@ from diffusers.models import AutoencoderKL
|
|
14 |
# Transformer
|
15 |
from diffusers import SanaTransformer2DModel
|
16 |
|
|
|
|
|
|
|
17 |
|
18 |
class WaifuPipeline(DiffusionPipeline):
|
19 |
r"""
|
@@ -38,7 +41,7 @@ class WaifuPipeline(DiffusionPipeline):
|
|
38 |
)
|
39 |
|
40 |
self.vae_scale_factor = 8
|
41 |
-
self.image_processor = PixArtImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
42 |
|
43 |
@torch.no_grad()
|
44 |
def __call__(
|
@@ -64,7 +67,7 @@ class WaifuPipeline(DiffusionPipeline):
|
|
64 |
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
65 |
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
66 |
max_sequence_length: int = 512,
|
67 |
-
) -> Union[
|
68 |
"""
|
69 |
Function invoked when calling the pipeline for generation.
|
70 |
|
@@ -285,4 +288,4 @@ class WaifuPipeline(DiffusionPipeline):
|
|
285 |
if not return_dict:
|
286 |
return (image,)
|
287 |
|
288 |
-
return
|
|
|
14 |
# Transformer
|
15 |
from diffusers import SanaTransformer2DModel
|
16 |
|
17 |
+
import numpy as np
|
18 |
+
import PIL.Image
|
19 |
+
|
20 |
|
21 |
class WaifuPipeline(DiffusionPipeline):
|
22 |
r"""
|
|
|
41 |
)
|
42 |
|
43 |
self.vae_scale_factor = 8
|
44 |
+
#self.image_processor = PixArtImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
45 |
|
46 |
@torch.no_grad()
|
47 |
def __call__(
|
|
|
67 |
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
68 |
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
69 |
max_sequence_length: int = 512,
|
70 |
+
) -> Union[List[PIL.Image.Image], np.ndarray]:
|
71 |
"""
|
72 |
Function invoked when calling the pipeline for generation.
|
73 |
|
|
|
288 |
if not return_dict:
|
289 |
return (image,)
|
290 |
|
291 |
+
return Union[List[PIL.Image.Image], np.ndarray]
|