Sin2pi commited on
Commit
99b9c53
·
verified ·
1 Parent(s): 54f4121

Upload 3 files

Browse files
Files changed (2) hide show
  1. config.json +61 -62
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,62 +1,61 @@
1
- ## hugging face whisper config parameters can be ignored and are there for compatibility model should be loaded with initialization of model code for now
2
- {
3
- "activation_dropout": 0.0,
4
- "activation_function": "gelu",
5
- "apply_spec_augment": false,
6
- "architectures": [
7
- "Echo"
8
- ],
9
- "attention_dropout": 0.0,
10
- "base": 10000,
11
- "begin_suppress_tokens": [
12
- 220,
13
- 50256
14
- ],
15
- "bos_token_id": 50257,
16
- "checkpointing": true,
17
- "classifier_proj_size": 256,
18
- "cross_attention": true,
19
- "d_model": 384,
20
- "decoder_attention_heads": 6,
21
- "decoder_ffn_dim": 1536,
22
- "decoder_layerdrop": 0.0,
23
- "decoder_layers": 4,
24
- "decoder_start_token_id": 50258,
25
- "dropout": 0.0,
26
- "encoder_attention_heads": 6,
27
- "encoder_ffn_dim": 1536,
28
- "encoder_layerdrop": 0.0,
29
- "encoder_layers": 4,
30
- "eos_token_id": 50257,
31
- "init_std": 0.02,
32
- "is_encoder_decoder": true,
33
- "mask_feature_length": 10,
34
- "mask_feature_min_masks": 0,
35
- "mask_feature_prob": 0.0,
36
- "mask_time_length": 10,
37
- "mask_time_min_masks": 2,
38
- "mask_time_prob": 0.05,
39
- "max_rel_dist": 15,
40
- "max_source_positions": 1500,
41
- "max_target_positions": 448,
42
- "median_filter_width": 7,
43
- "model_type": "whisper",
44
- "n_audio_ctx": 1500,
45
- "n_audio_head": 16,
46
- "n_audio_layer": 24,
47
- "n_audio_state": 1024,
48
- "n_mels": 80,
49
- "n_text_ctx": 448,
50
- "n_text_head": 16,
51
- "n_text_layer": 20,
52
- "n_text_state": 1024,
53
- "num_hidden_layers": 4,
54
- "num_mel_bins": 80,
55
- "pad_token_id": 50257,
56
- "scale_embedding": false,
57
- "torch_dtype": "float32",
58
- "transformers_version": "4.47.0",
59
- "use_cache": true,
60
- "use_weighted_layer_sum": false,
61
- "vocab_size": 51865
62
- }
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "Echo"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "base": 10000,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50256
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "checkpointing": true,
16
+ "classifier_proj_size": 256,
17
+ "cross_attention": true,
18
+ "d_model": 384,
19
+ "decoder_attention_heads": 6,
20
+ "decoder_ffn_dim": 1536,
21
+ "decoder_layerdrop": 0.0,
22
+ "decoder_layers": 4,
23
+ "decoder_start_token_id": 50258,
24
+ "dropout": 0.0,
25
+ "encoder_attention_heads": 6,
26
+ "encoder_ffn_dim": 1536,
27
+ "encoder_layerdrop": 0.0,
28
+ "encoder_layers": 4,
29
+ "eos_token_id": 50257,
30
+ "init_std": 0.02,
31
+ "is_encoder_decoder": true,
32
+ "mask_feature_length": 10,
33
+ "mask_feature_min_masks": 0,
34
+ "mask_feature_prob": 0.0,
35
+ "mask_time_length": 10,
36
+ "mask_time_min_masks": 2,
37
+ "mask_time_prob": 0.05,
38
+ "max_rel_dist": 200,
39
+ "max_source_positions": 1500,
40
+ "max_target_positions": 448,
41
+ "median_filter_width": 7,
42
+ "model_type": "whisper",
43
+ "n_audio_ctx": 1500,
44
+ "n_audio_head": 16,
45
+ "n_audio_layer": 24,
46
+ "n_audio_state": 1024,
47
+ "n_mels": 128,
48
+ "n_text_ctx": 448,
49
+ "n_text_head": 16,
50
+ "n_text_layer": 20,
51
+ "n_text_state": 1024,
52
+ "num_hidden_layers": 4,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.47.0",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead546823f5ff3feaccdc9e6d236f78a407f7954cbf4f383e164d84499b528d1
3
- size 3204676144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c839a0eda6edd94c84d85a90d7746e9587b7261b038842741fa0ba3e86f111f
3
+ size 3203825480