model: scale_factor: 1.15258426 disable_first_stage_autocast: true log_keys: - txt denoiser_config: target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser params: num_idx: 1000 quantize_c_noise: false weighting_config: target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting scaling_config: target: sgm.modules.diffusionmodules.denoiser_scaling.VideoScaling discretization_config: target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization params: shift_scale: 3.0 network_config: target: dit_video_concat.DiffusionTransformer params: time_embed_dim: 512 elementwise_affine: true num_frames: 49 time_compressed_rate: 4 latent_width: 90 latent_height: 60 num_layers: 30 patch_size: 2 in_channels: 16 out_channels: 16 hidden_size: 1920 adm_in_channels: 256 num_attention_heads: 30 transformer_args: checkpoint_activations: true vocab_size: 1 max_sequence_length: 64 layernorm_order: pre skip_init: false model_parallel_size: 1 is_decoder: false modules: pos_embed_config: target: dit_video_concat.Basic3DPositionEmbeddingMixin params: text_length: 226 height_interpolation: 1.875 width_interpolation: 1.875 patch_embed_config: target: dit_video_concat.ImagePatchEmbeddingMixin params: text_hidden_size: 4096 adaln_layer_config: target: dit_video_concat.AdaLNMixin params: qk_ln: true final_layer_config: target: dit_video_concat.FinalLayerMixin conditioner_config: target: sgm.modules.GeneralConditioner params: emb_models: - is_trainable: false input_key: txt ucg_rate: 0.1 target: sgm.modules.encoders.modules.FrozenT5Embedder params: model_dir: /mnt/lustre/sichenyang.p/code/vla/CogVideo/sat/CogVideoX-2b-sat/t5-v1_1-xxl max_length: 226 first_stage_config: target: vae_modules.autoencoder.VideoAutoencoderInferenceWrapper params: cp_size: 1 ckpt_path: /mnt/lustre/sichenyang.p/code/vla/CogVideo/sat/CogVideoX-2b-sat/vae/3d-vae.pt ignore_keys: - loss loss_config: target: torch.nn.Identity regularizer_config: target: vae_modules.regularizers.DiagonalGaussianRegularizer encoder_config: target: vae_modules.cp_enc_dec.ContextParallelEncoder3D params: double_z: true z_channels: 16 resolution: 256 in_channels: 3 out_ch: 3 ch: 128 ch_mult: - 1 - 2 - 2 - 4 attn_resolutions: [] num_res_blocks: 3 dropout: 0.0 gather_norm: true decoder_config: target: vae_modules.cp_enc_dec.ContextParallelDecoder3D params: double_z: true z_channels: 16 resolution: 256 in_channels: 3 out_ch: 3 ch: 128 ch_mult: - 1 - 2 - 2 - 4 attn_resolutions: [] num_res_blocks: 3 dropout: 0.0 gather_norm: false loss_fn_config: target: sgm.modules.diffusionmodules.loss.VideoDiffusionLoss params: offset_noise_level: 0 sigma_sampler_config: target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling params: uniform_sampling: true num_idx: 1000 discretization_config: target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization params: shift_scale: 3.0 sampler_config: target: sgm.modules.diffusionmodules.sampling.VPSDEDPMPP2MSampler params: num_steps: 50 verbose: true discretization_config: target: sgm.modules.diffusionmodules.discretizer.ZeroSNRDDPMDiscretization params: shift_scale: 3.0 guider_config: target: sgm.modules.diffusionmodules.guiders.DynamicCFG params: scale: 6 exp: 5 num_steps: 50 args: checkpoint_activations: true model_parallel_size: 1 experiment_name: dense_exp_6layer_gating_0.00002lr_all_continue mode: finetune load: /mnt/petrelfs/sichenyang.p/code/vla/CogVideo/sat_scy/ckpts_2b_lora/dense_exp_6layer_gating_0.00002lr_all_continue-09-20-12-08 no_load_rng: true train_iters: 100000 eval_iters: 1 eval_interval: 100 eval_batch_size: 1 save: ckpts_2b_lora save_interval: 1000 log_interval: 20 train_data: - /mnt/petrelfs/sichenyang.p/code/video_project/assets/data/mix_high_quality/vimeo+youtube+vecteezy+gen3.json valid_data: - /mnt/lustre/sichenyang.p/code/SD3_Vid/dataset_collection/data/gen3/all.json split: 1,0,0 num_workers: 8 force_train: true only_log_video_latents: true data: target: data_video.PetrelDataset params: video_size: - 480 - 720 fps: 8 max_num_frames: 49 skip_frms_num: 3.0 deepspeed: train_micro_batch_size_per_gpu: 2 gradient_accumulation_steps: 1 steps_per_print: 50 gradient_clipping: 0.1 zero_optimization: stage: 2 cpu_offload: false contiguous_gradients: false overlap_comm: true reduce_scatter: true reduce_bucket_size: 1000000000 allgather_bucket_size: 1000000000 load_from_fp32_weights: false zero_allow_untested_optimizer: true bf16: enabled: false fp16: enabled: true loss_scale: 0 loss_scale_window: 400 hysteresis: 2 min_loss_scale: 1 optimizer: type: sat.ops.FusedEmaAdam params: lr: 2.0e-05 betas: - 0.9 - 0.95 eps: 1.0e-08 weight_decay: 0.0001 activation_checkpointing: partition_activations: false contiguous_memory_optimization: false wall_clock_breakdown: false