TheDenk commited on
Commit
811f545
·
1 Parent(s): c66ae0f

base verions

Browse files
Files changed (3) hide show
  1. README.md +57 -0
  2. config.json +30 -0
  3. diffusion_pytorch_model.safetensors +3 -0
README.md CHANGED
@@ -1,3 +1,60 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - cogvideox
7
+ - video-generation
8
+ - video-to-video
9
+ - controlnet
10
+ - diffusers
11
  ---
12
+
13
+ # CogvideoX-5b Controlnet Extention
14
+
15
+
16
+ #### (Warning) This is raw version of controlnet. Better version will be published soon.
17
+
18
+ ### How to
19
+ Clone repo
20
+ ```bash
21
+ git clone https://github.com/TheDenk/cogvideox-controlnet.git
22
+ cd cogvideox-controlnet
23
+ ```
24
+
25
+ Create venv
26
+ ```bash
27
+ python -m venv venv
28
+ source venv/bin/activate
29
+ ```
30
+
31
+ Install requirements
32
+ ```bash
33
+ pip install -r requirements.txt
34
+ ```
35
+
36
+ ### Inference examples
37
+ #### Inference with cli
38
+ ```bash
39
+ python -m inference.cli_demo \
40
+ --video_path "resources/car.mp4" \
41
+ --prompt "The camera follows behind red car. Car is surrounded by a panoramic view of the vast, azure ocean. Seagulls soar overhead, and in the distance, a lighthouse stands sentinel, its beam cutting through the twilight. The scene captures a perfect blend of adventure and serenity, with the car symbolizing freedom on the open sea." \
42
+ --controlnet_type "canny" \
43
+ --base_model_path THUDM/CogVideoX-5b \
44
+ --controlnet_model_path TheDenk/cogvideox-5b-controlnet-canny-v1
45
+ ```
46
+
47
+ #### Inference with Gradio
48
+ ```bash
49
+ python -m inference.gradio_web_demo \
50
+ --controlnet_type "canny" \
51
+ --base_model_path THUDM/CogVideoX-5b \
52
+ --controlnet_model_path TheDenk/cogvideox-5b-controlnet-canny-v1
53
+ ```
54
+
55
+
56
+ ## Acknowledgements
57
+ Original code and models [CogVideoX](https://github.com/THUDM/CogVideo/tree/main).
58
+
59
+ ## Contacts
60
+ <p>Issues should be raised directly in the repository. For professional support and recommendations please <a>[email protected]</a>.</p>
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "CogVideoXControlnet",
3
+ "_diffusers_version": "0.31.0.dev0",
4
+ "activation_fn": "gelu-approximate",
5
+ "attention_bias": true,
6
+ "attention_head_dim": 64,
7
+ "downscale_coef": 8,
8
+ "dropout": 0.0,
9
+ "flip_sin_to_cos": true,
10
+ "freq_shift": 0,
11
+ "in_channels": 3,
12
+ "max_text_seq_length": 226,
13
+ "norm_elementwise_affine": true,
14
+ "norm_eps": 1e-05,
15
+ "num_attention_heads": 30,
16
+ "num_layers": 12,
17
+ "out_proj_dim": 3072,
18
+ "patch_size": 2,
19
+ "sample_frames": 49,
20
+ "sample_height": 60,
21
+ "sample_width": 90,
22
+ "spatial_interpolation_scale": 1.875,
23
+ "temporal_compression_ratio": 4,
24
+ "temporal_interpolation_scale": 1.0,
25
+ "time_embed_dim": 512,
26
+ "timestep_activation_fn": "silu",
27
+ "use_learned_positional_embeddings": false,
28
+ "use_rotary_positional_embeddings": false,
29
+ "vae_channels": 16
30
+ }
diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab2f0465d31b0a153035f32e18c2d7c9b46413af59227eaea2cca69c8970c572
3
+ size 1506883000