|
--- |
|
tags: |
|
- espnet |
|
- audio |
|
- automatic-speech-recognition |
|
language: en |
|
datasets: |
|
- slue-voxceleb |
|
license: cc-by-4.0 |
|
--- |
|
|
|
## ESPnet2 ASR model |
|
|
|
### `espnet/sluevoxceleb_whisper_complex_slu` |
|
|
|
This model was trained by “siddhu001” using slue-voxceleb recipe in [espnet](https://github.com/espnet/espnet/). |
|
|
|
### Demo: How to use in ESPnet2 |
|
|
|
Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html) |
|
if you haven't done that already. |
|
|
|
```bash |
|
cd espnet |
|
git checkout e23ef85f0b3116ad5c60d0833f186da0deec0734 |
|
pip install -e . |
|
cd egs2/slue-voxceleb/slu1_correct |
|
./run.sh --skip_data_prep false --skip_train true --download_model espnet/sluevoxceleb_whisper_complex_slu |
|
``` |
|
|
|
<!-- Generated by scripts/utils/show_asr_result.sh --> |
|
# RESULTS |
|
## Environments |
|
- date: `Sat Feb 10 19:24:27 CST 2024` |
|
- python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]` |
|
- espnet version: `espnet 202310` |
|
- pytorch version: `pytorch 2.1.0+cu121` |
|
- Git hash: `21d2105784e4da98397bf487b2550d4c6e16d40d` |
|
- Commit date: `Wed Jan 31 13:40:37 2024 -0600` |
|
|
|
## exp/slu_train_asr_whisper_weighted_0.0005_raw_en_word_sp |
|
### WER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|decode_asr_ctc0.3_beam10_slu_model_valid.acc.ave_10best/test|3530|144908|87.2|8.5|4.3|3.0|15.8|93.4| |
|
|decode_asr_slu_model_valid.acc.ave_10best/devel|1450|58104|81.2|11.1|7.6|5.3|24.1|94.6| |
|
|decode_asr_slu_model_valid.acc.ave_10best/test|3530|144908|79.5|12.3|8.2|5.8|26.3|96.1| |
|
|
|
### CER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|decode_asr_ctc0.3_beam10_slu_model_valid.acc.ave_10best/test|3530|647097|93.9|2.4|3.7|2.8|8.9|93.4| |
|
|decode_asr_slu_model_valid.acc.ave_10best/devel|1450|256305|89.6|3.5|6.9|4.7|15.2|94.6| |
|
|decode_asr_slu_model_valid.acc.ave_10best/test|3530|647097|88.6|3.8|7.6|5.2|16.6|96.1| |
|
|
|
### TER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
## exp/slu_train_asr_whisper_weighted_0.0005_raw_en_word_sp/decode_asr_ctc0.3_beam10_slu_model_valid.acc.ave_10best |
|
### WER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|org/devel|1451|58267|88.7|7.3|4.0|2.4|13.7|91.5| |
|
|
|
### CER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|org/devel|1451|256942|94.7|2.1|3.3|2.3|7.7|91.5| |
|
|
|
### TER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
## exp/slu_train_asr_whisper_weighted_0.0005_raw_en_word_sp/decode_asr_slu_model_valid.acc.ave_10best |
|
### WER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|org/devel|1451|58267|81.2|11.1|7.7|5.3|24.2|94.6| |
|
|
|
### CER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|org/devel|1451|256942|89.5|3.5|7.0|4.7|15.2|94.6| |
|
|
|
### TER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|
|
## ASR config |
|
|
|
<details><summary>expand</summary> |
|
|
|
``` |
|
config: conf/tuning/train_asr_whisper_weighted_0.0005.yaml |
|
print_config: false |
|
log_level: INFO |
|
drop_last_iter: false |
|
dry_run: false |
|
iterator_type: sequence |
|
valid_iterator_type: null |
|
output_dir: exp/slu_train_asr_whisper_weighted_0.0005_raw_en_word_sp |
|
ngpu: 1 |
|
seed: 2022 |
|
num_workers: 2 |
|
num_att_plot: 3 |
|
dist_backend: nccl |
|
dist_init_method: env:// |
|
dist_world_size: 4 |
|
dist_rank: 0 |
|
local_rank: 0 |
|
dist_master_addr: localhost |
|
dist_master_port: 53071 |
|
dist_launcher: null |
|
multiprocessing_distributed: true |
|
unused_parameters: false |
|
sharded_ddp: false |
|
cudnn_enabled: true |
|
cudnn_benchmark: false |
|
cudnn_deterministic: true |
|
collect_stats: false |
|
write_collected_feats: false |
|
max_epoch: 70 |
|
patience: null |
|
val_scheduler_criterion: |
|
- valid |
|
- loss |
|
early_stopping_criterion: |
|
- valid |
|
- loss |
|
- min |
|
best_model_criterion: |
|
- - valid |
|
- acc |
|
- max |
|
keep_nbest_models: 10 |
|
nbest_averaging_interval: 0 |
|
grad_clip: 5.0 |
|
grad_clip_type: 2.0 |
|
grad_noise: false |
|
accum_grad: 2 |
|
no_forward_run: false |
|
resume: true |
|
train_dtype: float32 |
|
use_amp: false |
|
log_interval: null |
|
use_matplotlib: true |
|
use_tensorboard: true |
|
create_graph_in_tensorboard: false |
|
use_wandb: false |
|
wandb_project: null |
|
wandb_id: null |
|
wandb_entity: null |
|
wandb_name: null |
|
wandb_model_log_interval: -1 |
|
detect_anomaly: false |
|
use_lora: false |
|
save_lora_only: true |
|
lora_conf: {} |
|
pretrain_path: null |
|
init_param: [] |
|
ignore_init_mismatch: false |
|
freeze_param: |
|
- encoder |
|
num_iters_per_epoch: null |
|
batch_size: 20 |
|
valid_batch_size: null |
|
batch_bins: 6000000 |
|
valid_batch_bins: null |
|
train_shape_file: |
|
- exp/slu_stats_raw_en_word_sp/train/speech_shape |
|
- exp/slu_stats_raw_en_word_sp/train/text_shape.word |
|
valid_shape_file: |
|
- exp/slu_stats_raw_en_word_sp/valid/speech_shape |
|
- exp/slu_stats_raw_en_word_sp/valid/text_shape.word |
|
batch_type: numel |
|
valid_batch_type: null |
|
fold_length: |
|
- 80000 |
|
- 150 |
|
sort_in_batch: descending |
|
shuffle_within_batch: false |
|
sort_batch: descending |
|
multiple_iterator: false |
|
chunk_length: 500 |
|
chunk_shift_ratio: 0.5 |
|
num_cache_chunks: 1024 |
|
chunk_excluded_key_prefixes: [] |
|
chunk_default_fs: null |
|
train_data_path_and_name_and_type: |
|
- - dump/raw/train_sp/wav.scp |
|
- speech |
|
- sound |
|
- - dump/raw/train_sp/text |
|
- text |
|
- text |
|
valid_data_path_and_name_and_type: |
|
- - dump/raw/devel/wav.scp |
|
- speech |
|
- sound |
|
- - dump/raw/devel/text |
|
- text |
|
- text |
|
allow_variable_data_keys: false |
|
max_cache_size: 0.0 |
|
max_cache_fd: 32 |
|
allow_multi_rates: false |
|
valid_max_cache_size: null |
|
exclude_weight_decay: false |
|
exclude_weight_decay_conf: {} |
|
optim: adam |
|
optim_conf: |
|
lr: 0.0005 |
|
weight_decay: 1.0e-06 |
|
scheduler: warmuplr |
|
scheduler_conf: |
|
warmup_steps: 5000 |
|
token_list: |
|
- <blank> |
|
- <unk> |
|
- ▁i |
|
- ▁and |
|
- '''' |
|
- s |
|
- ▁the |
|
- ▁a |
|
- ▁it |
|
- Neutral |
|
- ▁to |
|
- ▁you |
|
- ▁that |
|
- ▁of |
|
- ▁in |
|
- ▁was |
|
- ▁uh |
|
- ▁know |
|
- t |
|
- ▁so |
|
- ▁we |
|
- ▁he |
|
- ing |
|
- ▁um |
|
- ed |
|
- m |
|
- ▁like |
|
- ▁is |
|
- ▁but |
|
- Positive |
|
- y |
|
- ▁just |
|
- ▁they |
|
- re |
|
- ▁this |
|
- ▁for |
|
- ▁be |
|
- ▁my |
|
- er |
|
- ▁with |
|
- ▁on |
|
- ▁think |
|
- ▁p |
|
- ▁have |
|
- ▁she |
|
- e |
|
- ▁me |
|
- ▁really |
|
- ▁there |
|
- ▁what |
|
- ▁m |
|
- a |
|
- ▁do |
|
- ▁all |
|
- i |
|
- al |
|
- ve |
|
- c |
|
- ▁as |
|
- ▁about |
|
- ▁not |
|
- ▁t |
|
- n |
|
- ▁at |
|
- l |
|
- ▁had |
|
- ▁b |
|
- ▁when |
|
- ▁c |
|
- g |
|
- ar |
|
- ▁out |
|
- en |
|
- ▁s |
|
- ▁an |
|
- ▁people |
|
- or |
|
- an |
|
- d |
|
- o |
|
- ll |
|
- ▁are |
|
- in |
|
- ▁very |
|
- p |
|
- b |
|
- u |
|
- ▁because |
|
- es |
|
- ▁can |
|
- ▁don |
|
- ▁or |
|
- ▁up |
|
- it |
|
- ▁one |
|
- ly |
|
- ▁if |
|
- ▁f |
|
- st |
|
- ▁were |
|
- ▁mean |
|
- ▁d |
|
- ▁who |
|
- ▁then |
|
- ic |
|
- 'on' |
|
- ▁no |
|
- ▁go |
|
- ▁her |
|
- ▁g |
|
- ent |
|
- ▁st |
|
- ▁kind |
|
- ri |
|
- ▁would |
|
- ▁get |
|
- ▁e |
|
- le |
|
- at |
|
- r |
|
- ▁time |
|
- ▁w |
|
- ▁re |
|
- h |
|
- ▁from |
|
- ▁l |
|
- ▁said |
|
- ▁him |
|
- ▁how |
|
- v |
|
- ▁well |
|
- ▁h |
|
- ▁gonna |
|
- ▁lot |
|
- ▁see |
|
- f |
|
- ▁his |
|
- et |
|
- ion |
|
- ▁been |
|
- ▁great |
|
- ▁yeah |
|
- ▁love |
|
- ▁which |
|
- ▁got |
|
- k |
|
- ▁them |
|
- ▁way |
|
- id |
|
- ▁show |
|
- w |
|
- ▁some |
|
- ▁your |
|
- ▁did |
|
- ▁sort |
|
- ▁has |
|
- ▁things |
|
- ▁back |
|
- ▁where |
|
- ▁something |
|
- ir |
|
- ▁thing |
|
- ad |
|
- ▁su |
|
- ▁ch |
|
- ▁n |
|
- il |
|
- as |
|
- ▁j |
|
- ▁more |
|
- se |
|
- ▁say |
|
- ▁co |
|
- nd |
|
- ▁much |
|
- ▁always |
|
- ine |
|
- ▁r |
|
- ation |
|
- ur |
|
- ▁other |
|
- th |
|
- ▁ |
|
- ▁se |
|
- ▁now |
|
- ate |
|
- ▁doing |
|
- ▁work |
|
- ow |
|
- ▁could |
|
- ally |
|
- ▁these |
|
- Negative |
|
- ▁good |
|
- ▁any |
|
- ers |
|
- ce |
|
- ▁cause |
|
- ▁ex |
|
- ▁pro |
|
- ▁little |
|
- ▁actually |
|
- ▁into |
|
- ▁make |
|
- ▁first |
|
- ▁being |
|
- ra |
|
- ▁our |
|
- ▁al |
|
- ▁by |
|
- ▁film |
|
- ▁didn |
|
- ▁v |
|
- ct |
|
- ity |
|
- ch |
|
- un |
|
- ▁part |
|
- ▁de |
|
- ▁come |
|
- is |
|
- ie |
|
- ▁right |
|
- ▁o |
|
- ▁off |
|
- ol |
|
- ▁two |
|
- ▁never |
|
- ▁le |
|
- ot |
|
- ut |
|
- ▁movie |
|
- ▁play |
|
- ge |
|
- ies |
|
- el |
|
- ▁con |
|
- am |
|
- ▁going |
|
- ke |
|
- ▁want |
|
- im |
|
- ▁feel |
|
- ive |
|
- ro |
|
- ▁mo |
|
- ▁different |
|
- ck |
|
- ▁life |
|
- ist |
|
- ▁oh |
|
- all |
|
- ▁lo |
|
- ard |
|
- ▁went |
|
- and |
|
- ▁sh |
|
- ▁even |
|
- ry |
|
- ▁years |
|
- ▁look |
|
- ▁us |
|
- ant |
|
- ▁te |
|
- ▁k |
|
- ▁li |
|
- ▁happen |
|
- ure |
|
- ▁their |
|
- ▁those |
|
- ▁take |
|
- ment |
|
- ▁day |
|
- ble |
|
- ast |
|
- ▁every |
|
- um |
|
- ill |
|
- op |
|
- ▁thought |
|
- ou |
|
- us |
|
- ay |
|
- ▁th |
|
- ▁put |
|
- ▁story |
|
- ▁new |
|
- ▁down |
|
- ish |
|
- ▁big |
|
- ▁wanna |
|
- ▁ro |
|
- ▁also |
|
- ▁read |
|
- ▁around |
|
- ous |
|
- ▁through |
|
- red |
|
- ▁came |
|
- ▁character |
|
- ess |
|
- te |
|
- ver |
|
- ▁will |
|
- ag |
|
- ss |
|
- ▁fun |
|
- ▁over |
|
- ▁many |
|
- ▁bl |
|
- ▁cl |
|
- ▁man |
|
- ▁than |
|
- ▁pre |
|
- ▁world |
|
- ▁person |
|
- z |
|
- ▁sp |
|
- ven |
|
- ▁wanted |
|
- ▁bit |
|
- ▁before |
|
- ▁mar |
|
- one |
|
- ab |
|
- ▁en |
|
- ci |
|
- ▁set |
|
- ▁ha |
|
- ▁find |
|
- ul |
|
- ▁fi |
|
- ▁end |
|
- ▁un |
|
- ▁sc |
|
- ▁after |
|
- ind |
|
- ter |
|
- ▁working |
|
- ▁why |
|
- om |
|
- me |
|
- ▁such |
|
- ▁whole |
|
- ▁kinda |
|
- ne |
|
- ▁bo |
|
- x |
|
- ▁most |
|
- ▁ad |
|
- ▁guy |
|
- ▁spe |
|
- ars |
|
- ▁am |
|
- ful |
|
- ▁together |
|
- ▁let |
|
- ▁quite |
|
- ain |
|
- ▁everything |
|
- ▁made |
|
- ig |
|
- ▁old |
|
- able |
|
- ▁tr |
|
- ak |
|
- ▁fo |
|
- ▁po |
|
- ore |
|
- ice |
|
- ▁real |
|
- ▁knew |
|
- ▁hard |
|
- pp |
|
- age |
|
- ated |
|
- ▁same |
|
- ▁start |
|
- ▁ever |
|
- ning |
|
- ▁watch |
|
- art |
|
- ▁again |
|
- ▁here |
|
- are |
|
- ght |
|
- ong |
|
- ▁done |
|
- ▁only |
|
- ▁live |
|
- ▁wasn |
|
- ▁ho |
|
- ▁u |
|
- ▁maybe |
|
- ▁need |
|
- ▁everybody |
|
- ust |
|
- ans |
|
- ▁three |
|
- ▁having |
|
- ▁music |
|
- ack |
|
- ld |
|
- ▁trying |
|
- ▁guys |
|
- rou |
|
- ach |
|
- ving |
|
- ▁tell |
|
- ▁should |
|
- ff |
|
- ide |
|
- ▁four |
|
- ▁started |
|
- ▁com |
|
- ass |
|
- ▁long |
|
- ▁fe |
|
- ▁course |
|
- ▁called |
|
- ▁own |
|
- ress |
|
- ▁moment |
|
- ▁pl |
|
- ▁still |
|
- ▁anything |
|
- ▁family |
|
- ▁fin |
|
- ▁dan |
|
- ▁bro |
|
- 'no' |
|
- ther |
|
- ▁per |
|
- ▁amazing |
|
- ▁stuff |
|
- per |
|
- ▁jo |
|
- ▁certain |
|
- os |
|
- ▁talk |
|
- ater |
|
- ▁help |
|
- ▁too |
|
- ▁year |
|
- ight |
|
- ▁fa |
|
- self |
|
- ces |
|
- ▁br |
|
- ▁bet |
|
- ▁someone |
|
- ▁di |
|
- ▁sing |
|
- nt |
|
- ick |
|
- ▁ph |
|
- row |
|
- ▁script |
|
- ▁remember |
|
- ▁try |
|
- qu |
|
- ite |
|
- ▁young |
|
- ▁wh |
|
- ▁ser |
|
- ▁ask |
|
- ▁book |
|
- ▁each |
|
- ▁wr |
|
- ▁best |
|
- ▁ag |
|
- ▁women |
|
- ose |
|
- ions |
|
- ved |
|
- j |
|
- ue |
|
- ▁does |
|
- ▁five |
|
- ▁both |
|
- ▁friends |
|
- ▁act |
|
- iz |
|
- cess |
|
- pt |
|
- ▁somebody |
|
- ft |
|
- ▁nice |
|
- ▁myself |
|
- een |
|
- fe |
|
- sp |
|
- ict |
|
- ty |
|
- ▁child |
|
- ud |
|
- pe |
|
- ▁hope |
|
- ▁fact |
|
- ▁saying |
|
- ave |
|
- icul |
|
- au |
|
- ale |
|
- ris |
|
- ▁twenty |
|
- ▁school |
|
- ▁doesn |
|
- ▁able |
|
- pect |
|
- ▁last |
|
- ber |
|
- ▁song |
|
- od |
|
- ▁str |
|
- ▁interesting |
|
- lf |
|
- ▁em |
|
- ▁wor |
|
- ap |
|
- og |
|
- ▁ra |
|
- ▁dis |
|
- ▁coming |
|
- ▁ab |
|
- ▁house |
|
- ▁next |
|
- ▁tra |
|
- ▁okay |
|
- ere |
|
- ary |
|
- ▁incredi |
|
- ▁car |
|
- ▁job |
|
- ▁used |
|
- ▁give |
|
- ▁god |
|
- ▁americ |
|
- ▁characters |
|
- ▁app |
|
- ▁walk |
|
- ▁yes |
|
- rew |
|
- ▁getting |
|
- ▁six |
|
- ▁chan |
|
- ▁ne |
|
- ▁pretty |
|
- ang |
|
- ▁creat |
|
- ▁another |
|
- ▁ter |
|
- ▁kids |
|
- ▁felt |
|
- ▁sometimes |
|
- ▁place |
|
- out |
|
- ▁funny |
|
- ase |
|
- ich |
|
- act |
|
- ▁days |
|
- ▁hum |
|
- ▁bring |
|
- ts |
|
- ▁making |
|
- ▁comp |
|
- ▁become |
|
- ute |
|
- ▁wonderful |
|
- ron |
|
- les |
|
- ▁saw |
|
- ▁point |
|
- ia |
|
- ▁realiz |
|
- ▁int |
|
- ▁away |
|
- ays |
|
- ▁home |
|
- ace |
|
- ▁relationship |
|
- ▁woman |
|
- ▁everyone |
|
- ▁comes |
|
- ▁high |
|
- dd |
|
- ▁night |
|
- ath |
|
- ▁else |
|
- vent |
|
- ▁shoot |
|
- vers |
|
- day |
|
- ▁sure |
|
- ried |
|
- ned |
|
- ▁obviously |
|
- ▁dra |
|
- ▁inter |
|
- co |
|
- ▁playing |
|
- ▁important |
|
- ort |
|
- uck |
|
- ision |
|
- pport |
|
- ▁seen |
|
- pl |
|
- ▁fl |
|
- ound |
|
- ▁bas |
|
- ull |
|
- est |
|
- ▁actor |
|
- ▁lear |
|
- ▁worked |
|
- ▁believe |
|
- ▁gen |
|
- ▁keep |
|
- ▁friend |
|
- ▁sw |
|
- ▁des |
|
- ▁times |
|
- ▁im |
|
- ▁sur |
|
- ▁sit |
|
- ▁probably |
|
- ok |
|
- ▁took |
|
- ep |
|
- ough |
|
- ip |
|
- ood |
|
- ▁sa |
|
- ▁season |
|
- vel |
|
- wn |
|
- ▁dec |
|
- ▁excited |
|
- ian |
|
- ire |
|
- ph |
|
- ▁month |
|
- ner |
|
- ▁min |
|
- ▁rel |
|
- ating |
|
- body |
|
- ition |
|
- ▁loved |
|
- ▁aw |
|
- ▁hear |
|
- ple |
|
- ▁cool |
|
- ▁y |
|
- ord |
|
- our |
|
- ▁game |
|
- ms |
|
- ub |
|
- ▁might |
|
- ▁kid |
|
- ▁movies |
|
- ical |
|
- ▁bad |
|
- ▁scene |
|
- iv |
|
- ▁enough |
|
- ▁sm |
|
- bly |
|
- ▁fift |
|
- ▁eight |
|
- ▁experience |
|
- ▁actors |
|
- ▁cou |
|
- ▁understand |
|
- ▁week |
|
- ▁few |
|
- gin |
|
- ting |
|
- ▁director |
|
- ▁almost |
|
- ▁open |
|
- ren |
|
- ▁star |
|
- ▁room |
|
- ▁call |
|
- oy |
|
- ▁goes |
|
- ▁told |
|
- ▁once |
|
- ▁found |
|
- arly |
|
- ations |
|
- ward |
|
- ▁audience |
|
- ird |
|
- if |
|
- ▁qu |
|
- ▁ar |
|
- ▁definitely |
|
- ious |
|
- iting |
|
- ▁pol |
|
- ▁huge |
|
- ▁makes |
|
- aking |
|
- ream |
|
- ance |
|
- be |
|
- ▁la |
|
- ▁ac |
|
- iter |
|
- ▁run |
|
- ▁gotta |
|
- ▁gr |
|
- ▁cam |
|
- sh |
|
- ▁gets |
|
- ully |
|
- ▁says |
|
- ame |
|
- side |
|
- ▁bus |
|
- ▁shows |
|
- ▁dr |
|
- ▁inv |
|
- ▁idea |
|
- ▁talking |
|
- ▁wa |
|
- way |
|
- ▁art |
|
- ▁whatever |
|
- ▁write |
|
- ash |
|
- itt |
|
- ▁met |
|
- ▁wants |
|
- ▁role |
|
- ▁mu |
|
- ▁boy |
|
- ▁wrote |
|
- ger |
|
- ately |
|
- ▁exc |
|
- ▁mother |
|
- ▁produ |
|
- ▁cra |
|
- ates |
|
- ▁though |
|
- av |
|
- ▁episode |
|
- ▁sl |
|
- ▁change |
|
- ▁voice |
|
- ▁played |
|
- ily |
|
- ▁guess |
|
- ves |
|
- ▁hand |
|
- ady |
|
- ▁happy |
|
- ith |
|
- ▁name |
|
- ny |
|
- ▁gi |
|
- ▁looking |
|
- lev |
|
- ▁acting |
|
- aught |
|
- iss |
|
- ount |
|
- rom |
|
- ▁tw |
|
- ▁cont |
|
- ▁john |
|
- ▁far |
|
- ▁res |
|
- ▁sense |
|
- ake |
|
- ▁basically |
|
- ▁meet |
|
- ▁gu |
|
- ▁bre |
|
- ens |
|
- cept |
|
- ety |
|
- ▁girl |
|
- ▁york |
|
- ▁count |
|
- ▁shot |
|
- ise |
|
- ject |
|
- ▁tot |
|
- ▁stud |
|
- ▁feels |
|
- ▁thinking |
|
- ▁head |
|
- ▁cast |
|
- ▁writing |
|
- ▁rehe |
|
- ▁written |
|
- ▁perform |
|
- ▁fan |
|
- der |
|
- ect |
|
- ▁sk |
|
- ▁hour |
|
- ▁father |
|
- ered |
|
- ▁hundred |
|
- ▁ind |
|
- ▁norm |
|
- ▁acc |
|
- up |
|
- ▁while |
|
- fort |
|
- ▁nin |
|
- ▁true |
|
- itch |
|
- ▁inst |
|
- ▁second |
|
- ▁pick |
|
- ▁record |
|
- ross |
|
- ▁quest |
|
- ged |
|
- ▁career |
|
- ween |
|
- ▁bec |
|
- ▁reason |
|
- ▁since |
|
- ▁bra |
|
- ▁char |
|
- ▁imp |
|
- ree |
|
- ▁girls |
|
- ▁comple |
|
- ▁turn |
|
- ▁dad |
|
- ▁fant |
|
- ▁extra |
|
- ▁laugh |
|
- ▁stand |
|
- ▁honest |
|
- ▁comm |
|
- na |
|
- ▁listen |
|
- als |
|
- cial |
|
- spe |
|
- ▁ke |
|
- ory |
|
- view |
|
- ink |
|
- ▁direct |
|
- reat |
|
- round |
|
- ien |
|
- ▁under |
|
- ile |
|
- ▁diff |
|
- ually |
|
- ▁tur |
|
- thing |
|
- sic |
|
- ▁gon |
|
- ather |
|
- ▁aud |
|
- ▁scen |
|
- atch |
|
- ▁sho |
|
- ever |
|
- tra |
|
- ▁pe |
|
- mo |
|
- ild |
|
- ▁care |
|
- int |
|
- ▁fam |
|
- ▁ob |
|
- ▁ide |
|
- ade |
|
- right |
|
- ▁may |
|
- he |
|
- ody |
|
- ense |
|
- ▁interest |
|
- ah |
|
- form |
|
- ork |
|
- ▁episod |
|
- ▁rec |
|
- iew |
|
- ▁hop |
|
- ited |
|
- ▁exper |
|
- gh |
|
- ically |
|
- ▁bel |
|
- ▁el |
|
- enty |
|
- ▁gott |
|
- ▁stu |
|
- ▁id |
|
- rie |
|
- ▁nor |
|
- ▁inc |
|
- ertain |
|
- tain |
|
- ▁wo |
|
- ▁mon |
|
- az |
|
- xt |
|
- riend |
|
- now |
|
- ▁list |
|
- ime |
|
- ome |
|
- so |
|
- ause |
|
- iously |
|
- ▁sch |
|
- ▁vo |
|
- ▁op |
|
- ason |
|
- ▁mov |
|
- ▁hi |
|
- ▁pers |
|
- ▁ye |
|
- ▁def |
|
- orm |
|
- ▁belie |
|
- fore |
|
- ix |
|
- mber |
|
- very |
|
- ▁differe |
|
- ▁wonder |
|
- ek |
|
- nder |
|
- ▁obv |
|
- ▁ep |
|
- ship |
|
- ▁lau |
|
- ience |
|
- ool |
|
- ▁sin |
|
- rect |
|
- ▁happ |
|
- ▁gir |
|
- du |
|
- ng |
|
- ▁underst |
|
- most |
|
- eric |
|
- ouse |
|
- time |
|
- lm |
|
- ▁hel |
|
- redi |
|
- ▁cour |
|
- ▁relation |
|
- rough |
|
- q |
|
- ▁defin |
|
- ▁prob |
|
- ▁reme |
|
- ▁hu |
|
- ▁fir |
|
- anna |
|
- ways |
|
- itten |
|
- elt |
|
- ▁sometime |
|
- ':' |
|
- ▁kne |
|
- alk |
|
- ▁ok |
|
- ably |
|
- rote |
|
- gether |
|
- ▁definite |
|
- ▁import |
|
- '&' |
|
- fter |
|
- onest |
|
- erest |
|
- ▁amaz |
|
- ▁ano |
|
- <sos/eos> |
|
transcript_token_list: null |
|
two_pass: false |
|
pre_postencoder_norm: false |
|
init: null |
|
input_size: 1 |
|
ctc_conf: |
|
dropout_rate: 0.0 |
|
ctc_type: builtin |
|
reduce: true |
|
ignore_nan_grad: null |
|
zero_infinity: true |
|
brctc_risk_strategy: exp |
|
brctc_group_strategy: end |
|
brctc_risk_factor: 0.0 |
|
joint_net_conf: null |
|
use_preprocessor: true |
|
token_type: word |
|
bpemodel: null |
|
non_linguistic_symbols: null |
|
cleaner: null |
|
g2p: null |
|
speech_volume_normalize: null |
|
rir_scp: null |
|
rir_apply_prob: 1.0 |
|
noise_scp: null |
|
noise_apply_prob: 1.0 |
|
noise_db_range: '13_15' |
|
short_noise_thres: 0.5 |
|
frontend: null |
|
frontend_conf: {} |
|
specaug: null |
|
specaug_conf: {} |
|
normalize: null |
|
normalize_conf: {} |
|
model: espnet |
|
model_conf: |
|
ctc_weight: 0.3 |
|
lsm_weight: 0.1 |
|
length_normalized_loss: false |
|
weighted_sum: true |
|
extract_feats_in_collect_stats: false |
|
preencoder: null |
|
preencoder_conf: {} |
|
encoder: whisper |
|
encoder_conf: |
|
whisper_model: medium |
|
dropout_rate: 0.0 |
|
use_specaug: true |
|
specaug_conf: |
|
apply_time_warp: true |
|
time_warp_window: 5 |
|
time_warp_mode: bicubic |
|
apply_freq_mask: true |
|
freq_mask_width_range: |
|
- 0 |
|
- 40 |
|
num_freq_mask: 2 |
|
apply_time_mask: true |
|
time_mask_width_ratio_range: |
|
- 0.0 |
|
- 0.12 |
|
num_time_mask: 5 |
|
prepostencoder: linear |
|
prepostencoder_conf: |
|
input_size: 1024 |
|
output_size: 80 |
|
postencoder: conformer_full |
|
postencoder_conf: |
|
output_size: 256 |
|
attention_heads: 4 |
|
linear_units: 1024 |
|
num_blocks: 12 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
attention_dropout_rate: 0.1 |
|
input_layer: conv2d2 |
|
normalize_before: true |
|
macaron_style: true |
|
rel_pos_type: latest |
|
pos_enc_layer_type: rel_pos |
|
selfattention_layer_type: rel_selfattn |
|
activation_type: swish |
|
use_cnn_module: true |
|
cnn_module_kernel: 31 |
|
deliberationencoder: null |
|
deliberationencoder_conf: {} |
|
decoder: transformer |
|
decoder_conf: |
|
attention_heads: 4 |
|
linear_units: 2048 |
|
num_blocks: 6 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
self_attention_dropout_rate: 0.1 |
|
src_attention_dropout_rate: 0.1 |
|
postdecoder: null |
|
postdecoder_conf: {} |
|
required: |
|
- output_dir |
|
- token_list |
|
version: '202310' |
|
distributed: true |
|
``` |
|
|
|
</details> |
|
|
|
|
|
|
|
### Citing ESPnet |
|
|
|
```BibTex |
|
@inproceedings{watanabe2018espnet, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
title={{ESPnet}: End-to-End Speech Processing Toolkit}, |
|
year={2018}, |
|
booktitle={Proceedings of Interspeech}, |
|
pages={2207--2211}, |
|
doi={10.21437/Interspeech.2018-1456}, |
|
url={http://dx.doi.org/10.21437/Interspeech.2018-1456} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |
|
|
|
or arXiv: |
|
|
|
```bibtex |
|
@misc{watanabe2018espnet, |
|
title={ESPnet: End-to-End Speech Processing Toolkit}, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
year={2018}, |
|
eprint={1804.00015}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL} |
|
} |
|
``` |
|
|