a-r-r-o-w HF staff commited on
Commit
f7f5744
·
verified ·
1 Parent(s): c24448a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +101 -0
README.md CHANGED
@@ -6,6 +6,107 @@ library_name: diffusers
6
 
7
  <!-- Provide a quick summary of what the model is/does. -->
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  ## Model Details
 
6
 
7
  <!-- Provide a quick summary of what the model is/does. -->
8
 
9
+ Script for creating dummy random model:
10
+
11
+
12
+ ```python
13
+ import torch
14
+ from diffusers import HunyuanVideoTransformer3DModel, AutoencoderKLHunyuanVideo, FlowMatchEulerDiscreteScheduler, HunyuanVideoPipeline
15
+ from transformers import LlamaModel, LlamaTokenizerFast, CLIPTextModel, CLIPTokenizer, LlamaConfig, CLIPTextConfig
16
+
17
+ torch.manual_seed(0)
18
+ transformer = HunyuanVideoTransformer3DModel(
19
+ in_channels=4,
20
+ out_channels=4,
21
+ num_attention_heads=2,
22
+ attention_head_dim=10,
23
+ num_layers=1,
24
+ num_single_layers=1,
25
+ num_refiner_layers=1,
26
+ patch_size=1,
27
+ patch_size_t=1,
28
+ guidance_embeds=True,
29
+ text_embed_dim=16,
30
+ pooled_projection_dim=8,
31
+ rope_axes_dim=(2, 4, 4),
32
+ )
33
+
34
+ torch.manual_seed(0)
35
+ vae = AutoencoderKLHunyuanVideo(
36
+ in_channels=3,
37
+ out_channels=3,
38
+ latent_channels=4,
39
+ down_block_types=(
40
+ "HunyuanVideoDownBlock3D",
41
+ "HunyuanVideoDownBlock3D",
42
+ "HunyuanVideoDownBlock3D",
43
+ "HunyuanVideoDownBlock3D",
44
+ ),
45
+ up_block_types=(
46
+ "HunyuanVideoUpBlock3D",
47
+ "HunyuanVideoUpBlock3D",
48
+ "HunyuanVideoUpBlock3D",
49
+ "HunyuanVideoUpBlock3D",
50
+ ),
51
+ block_out_channels=(8, 8, 8, 8),
52
+ layers_per_block=1,
53
+ act_fn="silu",
54
+ norm_num_groups=4,
55
+ scaling_factor=0.476986,
56
+ spatial_compression_ratio=8,
57
+ temporal_compression_ratio=4,
58
+ mid_block_add_attention=True,
59
+ )
60
+
61
+ torch.manual_seed(0)
62
+ scheduler = FlowMatchEulerDiscreteScheduler(shift=7.0)
63
+
64
+ llama_text_encoder_config = LlamaConfig(
65
+ bos_token_id=0,
66
+ eos_token_id=2,
67
+ hidden_size=16,
68
+ intermediate_size=37,
69
+ layer_norm_eps=1e-05,
70
+ num_attention_heads=4,
71
+ num_hidden_layers=2,
72
+ pad_token_id=1,
73
+ vocab_size=1000,
74
+ hidden_act="gelu",
75
+ projection_dim=32,
76
+ )
77
+ clip_text_encoder_config = CLIPTextConfig(
78
+ bos_token_id=0,
79
+ eos_token_id=2,
80
+ hidden_size=8,
81
+ intermediate_size=37,
82
+ layer_norm_eps=1e-05,
83
+ num_attention_heads=4,
84
+ num_hidden_layers=2,
85
+ pad_token_id=1,
86
+ vocab_size=1000,
87
+ hidden_act="gelu",
88
+ projection_dim=32,
89
+ )
90
+
91
+ text_encoder = LlamaModel(llama_text_encoder_config)
92
+ tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
93
+
94
+ torch.manual_seed(0)
95
+ text_encoder_2 = CLIPTextModel(clip_text_encoder_config)
96
+ tokenizer_2 = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
97
+
98
+ pipe = HunyuanVideoPipeline(
99
+ transformer=transformer,
100
+ text_encoder=text_encoder,
101
+ tokenizer=tokenizer,
102
+ text_encoder_2=text_encoder_2,
103
+ tokenizer_2=tokenizer_2,
104
+ vae=vae,
105
+ scheduler=scheduler,
106
+ )
107
+
108
+ pipe.push_to_hub("hf-internal-testing/tiny-random-hunyuanvideo")
109
+ ```
110
 
111
 
112
  ## Model Details