Ali Safaya commited on
Commit
48358cd
·
1 Parent(s): f19b223

upload model

Browse files
.gitattributes CHANGED
@@ -31,3 +31,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
31
  *.zip filter=lfs diff=lfs merge=lfs -text
32
  *.zst filter=lfs diff=lfs merge=lfs -text
33
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
31
  *.zip filter=lfs diff=lfs merge=lfs -text
32
  *.zst filter=lfs diff=lfs merge=lfs -text
33
  *tfevents* filter=lfs diff=lfs merge=lfs -text
34
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "asafaya/hubert-large-arabic",
3
+ "speechbrain_interface": "EncoderASR",
4
+ "activation_dropout": 0.0,
5
+ "apply_spec_augment": true,
6
+ "architectures": [
7
+ "HubertModel"
8
+ ],
9
+ "attention_dropout": 0.1,
10
+ "bos_token_id": 1,
11
+ "classifier_proj_size": 256,
12
+ "conv_bias": true,
13
+ "conv_dim": [
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512
21
+ ],
22
+ "conv_kernel": [
23
+ 10,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 3,
28
+ 2,
29
+ 2
30
+ ],
31
+ "conv_stride": [
32
+ 5,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2
39
+ ],
40
+ "ctc_loss_reduction": "sum",
41
+ "ctc_zero_infinity": false,
42
+ "do_stable_layer_norm": true,
43
+ "eos_token_id": 2,
44
+ "feat_extract_activation": "gelu",
45
+ "feat_extract_dropout": 0.0,
46
+ "feat_extract_norm": "layer",
47
+ "feat_proj_dropout": 0.1,
48
+ "feat_proj_layer_norm": true,
49
+ "final_dropout": 0.0,
50
+ "gradient_checkpointing": false,
51
+ "hidden_act": "gelu",
52
+ "hidden_dropout": 0.1,
53
+ "hidden_size": 1024,
54
+ "initializer_range": 0.02,
55
+ "intermediate_size": 4096,
56
+ "layer_norm_eps": 1e-05,
57
+ "layerdrop": 0.1,
58
+ "mask_channel_length": 10,
59
+ "mask_channel_min_space": 1,
60
+ "mask_channel_other": 0.0,
61
+ "mask_channel_prob": 0.0,
62
+ "mask_channel_selection": "static",
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_min_space": 1,
69
+ "mask_time_other": 0.0,
70
+ "mask_time_prob": 0.075,
71
+ "mask_time_selection": "static",
72
+ "model_type": "hubert",
73
+ "num_attention_heads": 16,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "pad_token_id": 0,
79
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
80
+ "torch_dtype": "float32",
81
+ "transformers_version": "4.16.2",
82
+ "use_weighted_layer_sum": false,
83
+ "vocab_size": 500
84
+ }
hyperparams.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # URL for the biggest Fairseq english wav2vec2 model.
3
+ wav2vec2_hub: asafaya/hubert-large-arabic
4
+
5
+ sample_rate: 16000
6
+
7
+ # BPE parameters
8
+ token_type: unigram # ["unigram", "bpe", "char"]
9
+ character_coverage: 1.0
10
+
11
+ # Model parameters
12
+ activation: !name:torch.nn.GELU
13
+ wav2vec_output_dim: 1024
14
+ dnn_neurons: 1024
15
+ freeze_wav2vec: false
16
+ dropout: 0.2
17
+
18
+ # Outputs
19
+ output_neurons: 125 # BPE size, index(blank/eos/bos) = 0
20
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
21
+
22
+ # Decoding parameters
23
+ # Be sure that the bos and eos index match with the BPEs ones
24
+ blank_index: 0
25
+ bos_index: 1
26
+ eos_index: 2
27
+
28
+ enc: &id002 !new:speechbrain.nnet.containers.Sequential
29
+ input_shape: [null, null, 1024]
30
+ linear1: !name:speechbrain.nnet.linear.Linear
31
+ n_neurons: 1024
32
+ bias: true
33
+ bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
34
+ activation: !new:torch.nn.GELU
35
+ drop: !new:torch.nn.Dropout
36
+ p: 0.2
37
+ linear2: !name:speechbrain.nnet.linear.Linear
38
+ n_neurons: 1024
39
+ bias: true
40
+ bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
41
+ activation2: !new:torch.nn.GELU
42
+ drop2: !new:torch.nn.Dropout
43
+ p: 0.2
44
+ linear3: !name:speechbrain.nnet.linear.Linear
45
+ n_neurons: 1024
46
+ bias: true
47
+ bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
48
+ activation3: !new:torch.nn.GELU
49
+
50
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
51
+ source: asafaya/hubert-large-arabic
52
+ output_norm: true
53
+ freeze: false
54
+ save_path: wav2vec2_checkpoint
55
+
56
+ ctc_lin: !new:speechbrain.nnet.linear.Linear
57
+ input_size: 1024
58
+ n_neurons: 125
59
+
60
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
61
+ apply_log: true
62
+
63
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
64
+ blank_index: 0
65
+
66
+ modules:
67
+ encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
68
+ wav2vec2: !ref <wav2vec2>
69
+ enc: !ref <enc>
70
+ ctc_lin: !ref <ctc_lin>
71
+
72
+ model: !new:torch.nn.ModuleList
73
+ - [!ref <enc>, !ref <ctc_lin>]
74
+
75
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
76
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
77
+ split_tokens: true
78
+
79
+ decoding_function: !name:speechbrain.decoders.ctc.ctc_greedy_decode
80
+ blank_id: !ref <blank_index>
81
+
82
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
83
+ loadables:
84
+ wav2vec2: !ref <wav2vec2>
85
+ model: !ref <model>
86
+ tokenizer: !ref <tokenizer>
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b0a97590671d8b8928824205c9746fed51c2d29f301aebcf254f9cf61795298
3
+ size 13164862
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
tokenizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218ec5dc7632d1f191bba36d9a499d883ffcf43a2c1dcaf025f130335672bf93
3
+ size 239537
wav2vec2.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb5ffad02a5bf771d3b845154de53dc131fa679c9456a3502c19c7c061fa0e5
3
+ size 1261933253