test_model / config.yaml
JusperLee's picture
Push model using huggingface_hub.
833a42b verified
datamodule:
_target_: look2hear.datas.datamodule.DataModule
batch_size: 1
num_workers: 8
pin_memory: true
DataClass:
_target_: look2hear.datas.datasets.waveform.WaveformDataClass
train_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tr
val_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/cv
test_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tt
n_src: 1
task: enhancement
sample_rate: 16000
segment: 4
is_drop: false
normalize_audio: false
augmentation: false
audiomodel:
_target_: look2hear.models.ctcnet.CTCNet
encoder_type: ConvolutionalEncoder
decoder_type: ConvolutionalDecoder
audio_channels: 1
audio_encoder_channels: 512
audio_encoder_kernels: 21
audio_encoder_strides: 10
audio_in_channels: 512
audio_out_channels: 512
audio_kernel_size: 5
audio_depth: 4
audio_block_type: ConvNormAct
audio_norm_type: gLN
audio_act_type: PReLU
audio_shared: true
visual_encoder_channels: 512
visual_in_channels: 64
visual_out_channels: 64
visual_kernel_size: 3
visual_depth: 4
visual_block_type: ConvNormAct
visual_norm_type: BatchNorm1d
visual_act_type: PReLU
visual_shared: false
fusion_type: ConcatFusion
fusion_shared: false
n_repeats: 3
m_repeats: 13
mask_types: MaskGenerator
num_speakers: 1
mask_kernel_size: 1
mask_act: ReLU
mask_RI_split: false
mask_output_gate: false
mask_dw_gate: false
mask_direct: false
mask_is2d: false
videomodel:
_target_: look2hear.video_models.resnetmodel.ResNetVideoModel
activation_type: PReLU
pretrained: /home/likai/ssd/Look2hear/pretrain_zoo/frcnn_128_512.backbone.pth.tar
audio_optimizer:
_target_: torch.optim.AdamW
lr: 0.001
weight_decay: 0.1
audio_scheduler:
_target_: torch.optim.lr_scheduler.ReduceLROnPlateau
mode: min
factor: 0.5
patience: 10
audio_loss:
_target_: look2hear.losses.pitwrapper.PITLossWrapper
loss_func: look2hear.losses.snr.neg_sisdr
pit: true
mode: permutation-wise
eval_func: min
system:
_target_: look2hear.systems.single_speaker.SingleSpeaker
freeze_video_model: true
compile: false
exp:
dir: /home/likai/ssd/Look2hear/examples/CTCNet
name: CTCNet-1
checkpoint:
_target_: lightning.pytorch.callbacks.ModelCheckpoint
dirpath: ${exp.dir}/${exp.name}/checkpoints
monitor: val/neg_sisdr
mode: min
verbose: true
save_top_k: 1
save_last: true
filename: '{epoch}-{val/neg_sisdr:.4f}'
logger:
_target_: lightning.pytorch.loggers.WandbLogger
name: ${exp.name}
save_dir: ${exp.dir}/${exp.name}/logs
offline: true
project: Look2hear
trainer:
_target_: lightning.pytorch.Trainer
devices:
- 0
max_epochs: 5
sync_batchnorm: true
gradient_clip_val: 5.0
default_root_dir: ${exp.dir}/${exp.name}/
accelerator: cuda
limit_train_batches: 0.001
limit_val_batches: 0.001
fast_dev_run: false
precision: bf16-mixed