seastar105 commited on
Commit
53c24e9
·
verified ·
1 Parent(s): 62a96f6

Training in progress, step 5000

Browse files
.hydra/config.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ paths:
2
+ root_dir: .
3
+ data_dir: ${paths.root_dir}/data/
4
+ log_dir: ${paths.root_dir}/logs/
5
+ output_dir: ${hydra:runtime.output_dir}
6
+ work_dir: ${hydra:runtime.cwd}
7
+ data_config:
8
+ name: mitermix/audiosnippets
9
+ streaming: true
10
+ audio_key: mp3
11
+ caption_key: caption
12
+ model_config:
13
+ tokenizer_name: openai/whisper-tiny
14
+ model_name: openai/whisper-base
15
+ attn_implementation: flash_attention_2
16
+ name: openai/whisper-tiny
17
+ trainer_config:
18
+ tf32: true
19
+ bf16: true
20
+ fp16: false
21
+ dataloader_num_workers: 16
22
+ eval_strategy: 'no'
23
+ eval_steps: null
24
+ save_strategy: steps
25
+ save_steps: 5000
26
+ save_total_limit: 5
27
+ gradient_checkpointing: false
28
+ gradient_checkpointing_kwargs:
29
+ use_reentrant: false
30
+ ddp_find_unused_parameters: false
31
+ logging_steps: 50
32
+ seed: 998244353
33
+ optim: adamw_torch
34
+ adam_beta1: 0.9
35
+ adam_beta2: 0.98
36
+ max_grad_norm: 1.0
37
+ per_device_train_batch_size: 128
38
+ output_dir: ${paths.output_dir}
39
+ report_to: tensorboard
40
+ logging_dir: ${trainer_config.output_dir}/tb
41
+ lr_scheduler_type: cosine
42
+ learning_rate: 0.0001
43
+ warmup_steps: 5000
44
+ max_steps: 50000
45
+ label_smoothing_factor: 0.0
46
+ push_to_hub: true
47
+ hub_model_id: seastar105/whisper-base-emo-speech-caption
48
+ dataloader_prefetch_factor: 4
49
+ task_name: tiny-test
.hydra/hydra.yaml ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
4
+ sweep:
5
+ dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${task_name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - model_config.model_name=openai/whisper-base
116
+ - model_config.attn_implementation=flash_attention_2
117
+ - data_config.name=mitermix/audiosnippets
118
+ - data_config.audio_key=mp3
119
+ - data_config.caption_key=caption
120
+ - trainer_config.bf16=true
121
+ - trainer_config.dataloader_num_workers=16
122
+ - trainer_config.per_device_train_batch_size=128
123
+ - trainer_config.max_steps=50000
124
+ - ++trainer_config.push_to_hub=true
125
+ - ++trainer_config.hub_model_id=seastar105/whisper-base-emo-speech-caption
126
+ - ++trainer_config.save_total_limit=5
127
+ - ++trainer_config.save_steps=5000
128
+ - ++trainer_config.dataloader_prefetch_factor=4
129
+ - ++trainer_config.warmup_steps=5000
130
+ job:
131
+ name: train
132
+ chdir: null
133
+ override_dirname: ++trainer_config.dataloader_prefetch_factor=4,++trainer_config.hub_model_id=seastar105/whisper-base-emo-speech-caption,++trainer_config.push_to_hub=true,++trainer_config.save_steps=5000,++trainer_config.save_total_limit=5,++trainer_config.warmup_steps=5000,data_config.audio_key=mp3,data_config.caption_key=caption,data_config.name=mitermix/audiosnippets,model_config.attn_implementation=flash_attention_2,model_config.model_name=openai/whisper-base,trainer_config.bf16=true,trainer_config.dataloader_num_workers=16,trainer_config.max_steps=50000,trainer_config.per_device_train_batch_size=128
134
+ id: ???
135
+ num: ???
136
+ config_name: main
137
+ env_set: {}
138
+ env_copy: []
139
+ config:
140
+ override_dirname:
141
+ kv_sep: '='
142
+ item_sep: ','
143
+ exclude_keys: []
144
+ runtime:
145
+ version: 1.3.2
146
+ version_base: '1.3'
147
+ cwd: /root/audio-caption
148
+ config_sources:
149
+ - path: hydra.conf
150
+ schema: pkg
151
+ provider: hydra
152
+ - path: /root/audio-caption/configs
153
+ schema: file
154
+ provider: main
155
+ - path: ''
156
+ schema: structured
157
+ provider: schema
158
+ output_dir: /root/audio-caption/logs/tiny-test/runs/2025-01-05_02-24-57
159
+ choices:
160
+ hydra/env: default
161
+ hydra/callbacks: null
162
+ hydra/job_logging: default
163
+ hydra/hydra_logging: default
164
+ hydra/hydra_help: default
165
+ hydra/help: default
166
+ hydra/sweeper: basic
167
+ hydra/launcher: basic
168
+ hydra/output: default
169
+ verbose: false
.hydra/overrides.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - model_config.model_name=openai/whisper-base
2
+ - model_config.attn_implementation=flash_attention_2
3
+ - data_config.name=mitermix/audiosnippets
4
+ - data_config.audio_key=mp3
5
+ - data_config.caption_key=caption
6
+ - trainer_config.bf16=true
7
+ - trainer_config.dataloader_num_workers=16
8
+ - trainer_config.per_device_train_batch_size=128
9
+ - trainer_config.max_steps=50000
10
+ - ++trainer_config.push_to_hub=true
11
+ - ++trainer_config.hub_model_id=seastar105/whisper-base-emo-speech-caption
12
+ - ++trainer_config.save_total_limit=5
13
+ - ++trainer_config.save_steps=5000
14
+ - ++trainer_config.dataloader_prefetch_factor=4
15
+ - ++trainer_config.warmup_steps=5000
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-base",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 512,
14
+ "decoder_attention_heads": 8,
15
+ "decoder_ffn_dim": 2048,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 6,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 8,
21
+ "encoder_ffn_dim": 2048,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 6,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50259
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 6,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.47.1",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be343d994b8f72a0348fa21a10f77df07ab9ceca576dd60a89b564bf7d27b342
3
+ size 290403936
tb/events.out.tfevents.1736043901.5f4e103e0ee7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c819a6f5bf99b7e21632f0ade49c41334c2002842660403ba6bf1129db993e4f
3
+ size 27675
tiny-test.log ADDED
File without changes
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c408606756c80c14750aef57d23effec7e94468930a74be618d0505e2b908948
3
+ size 5432