Jan Svec commited on
Commit
8ee5596
·
1 Parent(s): c61b774

Init commit

Browse files
embedding_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0575cb64845e6b9a10db9bcb74d5ac32b326b8dc90352671d345e2ee3d0126a2
3
+ size 83316686
hyperparams.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ############################################################################
2
+ # Model:
3
+ # Author:
4
+ # ############################################################################
5
+
6
+ # Feature parameters
7
+ sample_rate: 16000
8
+ time_resolution: 0.01 # in seconds (e.g,, 0.01 = 10 ms)
9
+ n_fft: 400
10
+ n_mels_vad: 40
11
+ batch_size: 512
12
+
13
+ # VAD parameters
14
+ cnn1_channels: 16
15
+ cnn2_channels: 32
16
+ cnn_kernelsize: (3, 3)
17
+ rnn_layers: 2
18
+ rnn_neurons: 32
19
+ rnn_bidirectional: True
20
+ dnn_blocks: 1
21
+ dnn_neurons: 16
22
+ output_neurons_vad: 1
23
+
24
+ # ECAPA_TDNN
25
+ n_mels_ecapa: 80
26
+ out_neurons_ecapa: 7205
27
+ emb_dim: 192
28
+
29
+ dataloader_opts:
30
+ batch_size: !ref <batch_size>
31
+
32
+ # VAD objects
33
+ compute_fbank_vad: !new:speechbrain.lobes.features.Fbank
34
+ sample_rate: !ref <sample_rate>
35
+ n_fft: !ref <n_fft>
36
+ n_mels: !ref <n_mels_vad>
37
+ hop_length: !ref <time_resolution> * 1000 # in ms
38
+
39
+ mean_var_norm_vad: !new:speechbrain.processing.features.InputNormalization
40
+ norm_type: sentence
41
+
42
+ cnn: !new:speechbrain.nnet.containers.Sequential
43
+ input_shape: [null, null, !ref <n_mels_vad>]
44
+ norm1: !name:speechbrain.nnet.normalization.LayerNorm
45
+ cnn1: !name:speechbrain.lobes.models.CRDNN.CNN_Block
46
+ channels: !ref <cnn1_channels>
47
+ kernel_size: !ref <cnn_kernelsize>
48
+ cnn2: !name:speechbrain.lobes.models.CRDNN.CNN_Block
49
+ channels: !ref <cnn2_channels>
50
+ kernel_size: !ref <cnn_kernelsize>
51
+
52
+ rnn: !new:speechbrain.nnet.RNN.GRU
53
+ input_shape: [null, null, 320]
54
+ hidden_size: !ref <rnn_neurons>
55
+ num_layers: !ref <rnn_layers>
56
+ bidirectional: !ref <rnn_bidirectional>
57
+
58
+ dnn: !new:speechbrain.nnet.containers.Sequential
59
+ input_shape: [null, null, !ref <rnn_neurons> * 2]
60
+ dnn1: !name:speechbrain.lobes.models.CRDNN.DNN_Block
61
+ neurons: !ref <dnn_neurons>
62
+ dnn2: !name:speechbrain.lobes.models.CRDNN.DNN_Block
63
+ neurons: !ref <dnn_neurons>
64
+ lin: !name:speechbrain.nnet.linear.Linear
65
+ n_neurons: !ref <output_neurons_vad>
66
+ bias: False
67
+ ##########################################################
68
+
69
+ # ECAPA_TDNN objects
70
+ compute_fbank_ecapa: !new:speechbrain.lobes.features.Fbank
71
+ n_mels: !ref <n_mels_ecapa>
72
+
73
+ mean_var_norm_ecapa: !new:speechbrain.processing.features.InputNormalization
74
+ norm_type: sentence
75
+ std_norm: False
76
+
77
+ embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
78
+ input_size: !ref <n_mels_ecapa>
79
+ channels: [1024, 1024, 1024, 1024, 3072]
80
+ kernel_sizes: [5, 3, 3, 3, 1]
81
+ dilations: [1, 2, 3, 4, 1]
82
+ attention_channels: 128
83
+ lin_neurons: 192
84
+
85
+ mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
86
+ norm_type: global
87
+ std_norm: False
88
+ #####################
89
+
90
+ vad: !new:torch.nn.ModuleList
91
+ - [!ref <cnn>, !ref <rnn>, !ref <dnn>]
92
+
93
+ #####################
94
+ modules:
95
+ compute_fbank_vad: !ref <compute_fbank_vad>
96
+ compute_fbank_ecapa: !ref <compute_fbank_ecapa>
97
+ cnn: !ref <cnn>
98
+ rnn: !ref <rnn>
99
+ dnn: !ref <dnn>
100
+ mean_var_norm_vad: !ref <mean_var_norm_vad>
101
+ mean_var_norm_ecapa: !ref <mean_var_norm_ecapa>
102
+ embedding_model: !ref <embedding_model>
103
+ mean_var_norm_emb: !ref <mean_var_norm_emb>
104
+
105
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
106
+ loadables:
107
+ vad: !ref <vad>
108
+ embedding_model: !ref <embedding_model>
109
+ mean_var_norm_vad: !ref <mean_var_norm_vad>
110
+ mean_var_norm_emb: !ref <mean_var_norm_emb>
mean_var_norm_emb.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd70225b05b37be64fc5a95e24395d804231d43f74b2e1e5a513db7b69b34c33
3
+ size 1921
mean_var_norm_vad.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705be69730a6bf9d029c5c93b8adacc680e3c635b01d514859bdfec51fc97760
3
+ size 1063
vad.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f378f95e8abae056ed46daee57884f96ac8f7057d42f92a74a491ebdeb3d7594
3
+ size 452671