# Hparams NEEDED HPARAMS_NEEDED: ["wav2vec_output_dim", "emb_size", "dec_neurons", "dec_layers", "output_neurons", "log_softmax", "tokenizer"] # Modules Needed MODULES_NEEDED: ["encoder_w2v2", "embedding", "ctc_lin", "seq_lin"] # Pretrain folder (HuggingFace) pretrained_path: Porjaz/wav2vec2-aed-macedonian-asr ####################### Training Parameters #################################### ####################### Model Parameters ####################################### wav2vec_output_dim: 1024 emb_size: 128 dec_neurons: 1024 dec_layers: 1 output_neurons: 1000 blank_index: 0 bos_index: 0 eos_index: 0 unk_index: 0 # Decoding parameters min_decode_ratio: 0.0 max_decode_ratio: 1.0 valid_beam_size: 10 test_beam_size: 10 using_eos_threshold: True eos_threshold: 1.5 using_max_attn_shift: True max_attn_shift: 300 temperature: 1.0 ctc_window_size: 200 temperature_lm: 1.25 # Scoring parameters ctc_weight_decode: 0.0 coverage_penalty: 1.5 lm_weight: 0.0 # Wav2vec2 encoder encoder_w2v2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2 source: !ref output_norm: True freeze: False freeze_feature_extractor: True save_path: !ref output_all_hiddens: False embedding: !new:speechbrain.nnet.embedding.Embedding num_embeddings: !ref embedding_dim: !ref # Attention-based RNN decoder. decoder: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder enc_dim: !ref input_size: !ref rnn_type: gru attn_type: location hidden_size: !ref attn_dim: 512 num_layers: !ref scaling: 1.0 channels: 10 kernel_size: 100 re_init: True dropout: !ref ctc_lin: !new:speechbrain.nnet.linear.Linear input_size: !ref n_neurons: !ref seq_lin: !new:speechbrain.nnet.linear.Linear input_size: !ref n_neurons: !ref log_softmax: !new:speechbrain.nnet.activations.Softmax apply_log: True tokenizer: !new:sentencepiece.SentencePieceProcessor model_file: 1000_unigram.model modules: encoder_w2v2: !ref embedding: !ref decoder: !ref ctc_lin: !ref seq_lin: !ref model: !new:torch.nn.ModuleList - [!ref , !ref , !ref , !ref , !ref ] ############################## Decoding & optimiser ############################ test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher embedding: !ref decoder: !ref linear: !ref bos_index: !ref eos_index: !ref min_decode_ratio: !ref max_decode_ratio: !ref beam_size: !ref eos_threshold: !ref using_max_attn_shift: !ref max_attn_shift: !ref temperature: !ref #scorer: !ref ############################## Logging and Pretrainer ########################## pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: model: !ref paths: model: !ref /model.ckpt