|
datamodule: |
|
_target_: look2hear.datas.datamodule.DataModule |
|
batch_size: 1 |
|
num_workers: 8 |
|
pin_memory: true |
|
DataClass: |
|
_target_: look2hear.datas.datasets.waveform.WaveformDataClass |
|
train_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tr |
|
val_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/cv |
|
test_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tt |
|
n_src: 1 |
|
task: enhancement |
|
sample_rate: 16000 |
|
segment: 4 |
|
is_drop: false |
|
normalize_audio: false |
|
augmentation: false |
|
audiomodel: |
|
_target_: look2hear.models.ctcnet.CTCNet |
|
encoder_type: ConvolutionalEncoder |
|
decoder_type: ConvolutionalDecoder |
|
audio_channels: 1 |
|
audio_encoder_channels: 512 |
|
audio_encoder_kernels: 21 |
|
audio_encoder_strides: 10 |
|
audio_in_channels: 512 |
|
audio_out_channels: 512 |
|
audio_kernel_size: 5 |
|
audio_depth: 4 |
|
audio_block_type: ConvNormAct |
|
audio_norm_type: gLN |
|
audio_act_type: PReLU |
|
audio_shared: true |
|
visual_encoder_channels: 512 |
|
visual_in_channels: 64 |
|
visual_out_channels: 64 |
|
visual_kernel_size: 3 |
|
visual_depth: 4 |
|
visual_block_type: ConvNormAct |
|
visual_norm_type: BatchNorm1d |
|
visual_act_type: PReLU |
|
visual_shared: false |
|
fusion_type: ConcatFusion |
|
fusion_shared: false |
|
n_repeats: 3 |
|
m_repeats: 13 |
|
mask_types: MaskGenerator |
|
num_speakers: 1 |
|
mask_kernel_size: 1 |
|
mask_act: ReLU |
|
mask_RI_split: false |
|
mask_output_gate: false |
|
mask_dw_gate: false |
|
mask_direct: false |
|
mask_is2d: false |
|
videomodel: |
|
_target_: look2hear.video_models.resnetmodel.ResNetVideoModel |
|
activation_type: PReLU |
|
pretrained: /home/likai/ssd/Look2hear/pretrain_zoo/frcnn_128_512.backbone.pth.tar |
|
audio_optimizer: |
|
_target_: torch.optim.AdamW |
|
lr: 0.001 |
|
weight_decay: 0.1 |
|
audio_scheduler: |
|
_target_: torch.optim.lr_scheduler.ReduceLROnPlateau |
|
mode: min |
|
factor: 0.5 |
|
patience: 10 |
|
audio_loss: |
|
_target_: look2hear.losses.pitwrapper.PITLossWrapper |
|
loss_func: look2hear.losses.snr.neg_sisdr |
|
pit: true |
|
mode: permutation-wise |
|
eval_func: min |
|
system: |
|
_target_: look2hear.systems.single_speaker.SingleSpeaker |
|
freeze_video_model: true |
|
compile: false |
|
exp: |
|
dir: /home/likai/ssd/Look2hear/examples/CTCNet |
|
name: CTCNet-1 |
|
checkpoint: |
|
_target_: lightning.pytorch.callbacks.ModelCheckpoint |
|
dirpath: ${exp.dir}/${exp.name}/checkpoints |
|
monitor: val/neg_sisdr |
|
mode: min |
|
verbose: true |
|
save_top_k: 1 |
|
save_last: true |
|
filename: '{epoch}-{val/neg_sisdr:.4f}' |
|
logger: |
|
_target_: lightning.pytorch.loggers.WandbLogger |
|
name: ${exp.name} |
|
save_dir: ${exp.dir}/${exp.name}/logs |
|
offline: true |
|
project: Look2hear |
|
trainer: |
|
_target_: lightning.pytorch.Trainer |
|
devices: |
|
- 0 |
|
max_epochs: 5 |
|
sync_batchnorm: true |
|
gradient_clip_val: 5.0 |
|
default_root_dir: ${exp.dir}/${exp.name}/ |
|
accelerator: cuda |
|
limit_train_batches: 0.001 |
|
limit_val_batches: 0.001 |
|
fast_dev_run: false |
|
precision: bf16-mixed |
|
|