Spaces:
Running
on
Zero
Running
on
Zero
#!/bin/bash | |
mode: 'inference' | |
use_cuda: 1 # 1 for True, 0 for False | |
num_gpu: 1 | |
sampling_rate: 16000 | |
network: "AV_MossFormer2_TSE_16K" # network type | |
checkpoint_dir: "checkpoints/AV_MossFormer2_TSE_16K" | |
input_path: "scp/video_samples.scp" # an input dir or input scp file | |
output_dir: "path_to_output_videos_tse" # output dir to store processed audio | |
# decode parameters | |
one_time_decode_length: 3 # maximum segment length for one-pass decoding (seconds), longer audio (>5s) will use segmented decoding | |
decode_window: 3 # one-pass decoding length | |
# Model-specific settings for target speaker extraction | |
network_reference: | |
cue: lip | |
backbone: resnet18 | |
emb_size: 256 | |
network_audio: | |
backbone: mossformer2 | |
encoder_kernel_size: 16 | |
encoder_out_nchannels: 512 | |
encoder_in_nchannels: 1 | |
masknet_numspks: 1 | |
masknet_chunksize: 250 | |
masknet_numlayers: 1 | |
masknet_norm: "ln" | |
masknet_useextralinearlayer: False | |
masknet_extraskipconnection: True | |
intra_numlayers: 24 | |
intra_nhead: 8 | |
intra_dffn: 1024 | |
intra_dropout: 0 | |
intra_use_positional: True | |
intra_norm_before: True | |