JusperLee commited on
Commit
833a42b
·
verified ·
1 Parent(s): c80de7a

Push model using huggingface_hub.

Browse files
Files changed (2) hide show
  1. best_model.bin +3 -0
  2. config.yaml +109 -0
best_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7023f9debdbd3e51ea534f4113bd982310de6912e5d106d2384f63f1a8f0282
3
+ size 24544610
config.yaml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datamodule:
2
+ _target_: look2hear.datas.datamodule.DataModule
3
+ batch_size: 1
4
+ num_workers: 8
5
+ pin_memory: true
6
+ DataClass:
7
+ _target_: look2hear.datas.datasets.waveform.WaveformDataClass
8
+ train_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tr
9
+ val_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/cv
10
+ test_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tt
11
+ n_src: 1
12
+ task: enhancement
13
+ sample_rate: 16000
14
+ segment: 4
15
+ is_drop: false
16
+ normalize_audio: false
17
+ augmentation: false
18
+ audiomodel:
19
+ _target_: look2hear.models.ctcnet.CTCNet
20
+ encoder_type: ConvolutionalEncoder
21
+ decoder_type: ConvolutionalDecoder
22
+ audio_channels: 1
23
+ audio_encoder_channels: 512
24
+ audio_encoder_kernels: 21
25
+ audio_encoder_strides: 10
26
+ audio_in_channels: 512
27
+ audio_out_channels: 512
28
+ audio_kernel_size: 5
29
+ audio_depth: 4
30
+ audio_block_type: ConvNormAct
31
+ audio_norm_type: gLN
32
+ audio_act_type: PReLU
33
+ audio_shared: true
34
+ visual_encoder_channels: 512
35
+ visual_in_channels: 64
36
+ visual_out_channels: 64
37
+ visual_kernel_size: 3
38
+ visual_depth: 4
39
+ visual_block_type: ConvNormAct
40
+ visual_norm_type: BatchNorm1d
41
+ visual_act_type: PReLU
42
+ visual_shared: false
43
+ fusion_type: ConcatFusion
44
+ fusion_shared: false
45
+ n_repeats: 3
46
+ m_repeats: 13
47
+ mask_types: MaskGenerator
48
+ num_speakers: 1
49
+ mask_kernel_size: 1
50
+ mask_act: ReLU
51
+ mask_RI_split: false
52
+ mask_output_gate: false
53
+ mask_dw_gate: false
54
+ mask_direct: false
55
+ mask_is2d: false
56
+ videomodel:
57
+ _target_: look2hear.video_models.resnetmodel.ResNetVideoModel
58
+ activation_type: PReLU
59
+ pretrained: /home/likai/ssd/Look2hear/pretrain_zoo/frcnn_128_512.backbone.pth.tar
60
+ audio_optimizer:
61
+ _target_: torch.optim.AdamW
62
+ lr: 0.001
63
+ weight_decay: 0.1
64
+ audio_scheduler:
65
+ _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
66
+ mode: min
67
+ factor: 0.5
68
+ patience: 10
69
+ audio_loss:
70
+ _target_: look2hear.losses.pitwrapper.PITLossWrapper
71
+ loss_func: look2hear.losses.snr.neg_sisdr
72
+ pit: true
73
+ mode: permutation-wise
74
+ eval_func: min
75
+ system:
76
+ _target_: look2hear.systems.single_speaker.SingleSpeaker
77
+ freeze_video_model: true
78
+ compile: false
79
+ exp:
80
+ dir: /home/likai/ssd/Look2hear/examples/CTCNet
81
+ name: CTCNet-1
82
+ checkpoint:
83
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
84
+ dirpath: ${exp.dir}/${exp.name}/checkpoints
85
+ monitor: val/neg_sisdr
86
+ mode: min
87
+ verbose: true
88
+ save_top_k: 1
89
+ save_last: true
90
+ filename: '{epoch}-{val/neg_sisdr:.4f}'
91
+ logger:
92
+ _target_: lightning.pytorch.loggers.WandbLogger
93
+ name: ${exp.name}
94
+ save_dir: ${exp.dir}/${exp.name}/logs
95
+ offline: true
96
+ project: Look2hear
97
+ trainer:
98
+ _target_: lightning.pytorch.Trainer
99
+ devices:
100
+ - 0
101
+ max_epochs: 5
102
+ sync_batchnorm: true
103
+ gradient_clip_val: 5.0
104
+ default_root_dir: ${exp.dir}/${exp.name}/
105
+ accelerator: cuda
106
+ limit_train_batches: 0.001
107
+ limit_val_batches: 0.001
108
+ fast_dev_run: false
109
+ precision: bf16-mixed