Mo-oN commited on
Commit
15be1aa
·
1 Parent(s): a126504

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/README.md +52 -0
  2. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/config.json +192 -0
  3. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/generation_config.json +8 -0
  4. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/model.safetensors +3 -0
  5. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/optimizer.pt +3 -0
  6. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/rng_state.pth +3 -0
  7. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/scheduler.pt +3 -0
  8. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/trainer_state.json +303 -0
  9. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/training_args.bin +3 -0
  10. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/config.json +192 -0
  11. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/generation_config.json +8 -0
  12. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/model.safetensors +3 -0
  13. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/optimizer.pt +3 -0
  14. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/rng_state.pth +3 -0
  15. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/scheduler.pt +3 -0
  16. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/trainer_state.json +309 -0
  17. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/training_args.bin +3 -0
  18. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/config.json +192 -0
  19. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/generation_config.json +8 -0
  20. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/model.safetensors +3 -0
  21. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/runs/Dec14_11-03-01_peach/events.out.tfevents.1702519383.peach +3 -0
  22. https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/training_args.bin +3 -0
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: naver-clova-ix/donut-base
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: donut-base-DO
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # donut-base-DO
15
+
16
+ This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on an unknown dataset.
17
+
18
+ ## Model description
19
+
20
+ More information needed
21
+
22
+ ## Intended uses & limitations
23
+
24
+ More information needed
25
+
26
+ ## Training and evaluation data
27
+
28
+ More information needed
29
+
30
+ ## Training procedure
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - learning_rate: 2e-05
36
+ - train_batch_size: 1
37
+ - eval_batch_size: 8
38
+ - seed: 42
39
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
+ - lr_scheduler_type: linear
41
+ - num_epochs: 50
42
+ - mixed_precision_training: Native AMP
43
+
44
+ ### Training results
45
+
46
+
47
+
48
+ ### Framework versions
49
+
50
+ - Transformers 4.36.0
51
+ - Pytorch 2.1.0
52
+ - Tokenizers 0.15.0
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "naver-clova-ix/donut-base",
3
+ "architectures": [
4
+ "VisionEncoderDecoderModel"
5
+ ],
6
+ "decoder": {
7
+ "_name_or_path": "",
8
+ "activation_dropout": 0.0,
9
+ "activation_function": "gelu",
10
+ "add_cross_attention": true,
11
+ "add_final_layer_norm": true,
12
+ "architectures": null,
13
+ "attention_dropout": 0.0,
14
+ "bad_words_ids": null,
15
+ "begin_suppress_tokens": null,
16
+ "bos_token_id": 0,
17
+ "chunk_size_feed_forward": 0,
18
+ "classifier_dropout": 0.0,
19
+ "cross_attention_hidden_size": null,
20
+ "d_model": 1024,
21
+ "decoder_attention_heads": 16,
22
+ "decoder_ffn_dim": 4096,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 4,
25
+ "decoder_start_token_id": null,
26
+ "diversity_penalty": 0.0,
27
+ "do_sample": false,
28
+ "dropout": 0.1,
29
+ "early_stopping": false,
30
+ "encoder_attention_heads": 16,
31
+ "encoder_ffn_dim": 4096,
32
+ "encoder_layerdrop": 0.0,
33
+ "encoder_layers": 12,
34
+ "encoder_no_repeat_ngram_size": 0,
35
+ "eos_token_id": 2,
36
+ "exponential_decay_length_penalty": null,
37
+ "finetuning_task": null,
38
+ "forced_bos_token_id": null,
39
+ "forced_eos_token_id": 2,
40
+ "id2label": {
41
+ "0": "LABEL_0",
42
+ "1": "LABEL_1"
43
+ },
44
+ "init_std": 0.02,
45
+ "is_decoder": true,
46
+ "is_encoder_decoder": false,
47
+ "label2id": {
48
+ "LABEL_0": 0,
49
+ "LABEL_1": 1
50
+ },
51
+ "length_penalty": 1.0,
52
+ "max_length": 512,
53
+ "max_position_embeddings": 1536,
54
+ "min_length": 0,
55
+ "model_type": "mbart",
56
+ "no_repeat_ngram_size": 0,
57
+ "num_beam_groups": 1,
58
+ "num_beams": 1,
59
+ "num_hidden_layers": 12,
60
+ "num_return_sequences": 1,
61
+ "output_attentions": false,
62
+ "output_hidden_states": false,
63
+ "output_scores": false,
64
+ "pad_token_id": 1,
65
+ "prefix": null,
66
+ "problem_type": null,
67
+ "pruned_heads": {},
68
+ "remove_invalid_values": false,
69
+ "repetition_penalty": 1.0,
70
+ "return_dict": true,
71
+ "return_dict_in_generate": false,
72
+ "scale_embedding": true,
73
+ "sep_token_id": null,
74
+ "suppress_tokens": null,
75
+ "task_specific_params": null,
76
+ "temperature": 1.0,
77
+ "tf_legacy_loss": false,
78
+ "tie_encoder_decoder": false,
79
+ "tie_word_embeddings": true,
80
+ "tokenizer_class": null,
81
+ "top_k": 50,
82
+ "top_p": 1.0,
83
+ "torch_dtype": null,
84
+ "torchscript": false,
85
+ "typical_p": 1.0,
86
+ "use_bfloat16": false,
87
+ "use_cache": true,
88
+ "vocab_size": 57543
89
+ },
90
+ "decoder_start_token_id": 0,
91
+ "encoder": {
92
+ "_name_or_path": "",
93
+ "add_cross_attention": false,
94
+ "architectures": null,
95
+ "attention_probs_dropout_prob": 0.0,
96
+ "bad_words_ids": null,
97
+ "begin_suppress_tokens": null,
98
+ "bos_token_id": null,
99
+ "chunk_size_feed_forward": 0,
100
+ "cross_attention_hidden_size": null,
101
+ "decoder_start_token_id": null,
102
+ "depths": [
103
+ 2,
104
+ 2,
105
+ 14,
106
+ 2
107
+ ],
108
+ "diversity_penalty": 0.0,
109
+ "do_sample": false,
110
+ "drop_path_rate": 0.1,
111
+ "early_stopping": false,
112
+ "embed_dim": 128,
113
+ "encoder_no_repeat_ngram_size": 0,
114
+ "eos_token_id": null,
115
+ "exponential_decay_length_penalty": null,
116
+ "finetuning_task": null,
117
+ "forced_bos_token_id": null,
118
+ "forced_eos_token_id": null,
119
+ "hidden_act": "gelu",
120
+ "hidden_dropout_prob": 0.0,
121
+ "hidden_size": 1024,
122
+ "id2label": {
123
+ "0": "LABEL_0",
124
+ "1": "LABEL_1"
125
+ },
126
+ "image_size": [
127
+ 2560,
128
+ 1920
129
+ ],
130
+ "initializer_range": 0.02,
131
+ "is_decoder": false,
132
+ "is_encoder_decoder": false,
133
+ "label2id": {
134
+ "LABEL_0": 0,
135
+ "LABEL_1": 1
136
+ },
137
+ "layer_norm_eps": 1e-05,
138
+ "length_penalty": 1.0,
139
+ "max_length": 20,
140
+ "min_length": 0,
141
+ "mlp_ratio": 4.0,
142
+ "model_type": "donut-swin",
143
+ "no_repeat_ngram_size": 0,
144
+ "num_beam_groups": 1,
145
+ "num_beams": 1,
146
+ "num_channels": 3,
147
+ "num_heads": [
148
+ 4,
149
+ 8,
150
+ 16,
151
+ 32
152
+ ],
153
+ "num_layers": 4,
154
+ "num_return_sequences": 1,
155
+ "output_attentions": false,
156
+ "output_hidden_states": false,
157
+ "output_scores": false,
158
+ "pad_token_id": null,
159
+ "patch_size": 4,
160
+ "path_norm": true,
161
+ "prefix": null,
162
+ "problem_type": null,
163
+ "pruned_heads": {},
164
+ "qkv_bias": true,
165
+ "remove_invalid_values": false,
166
+ "repetition_penalty": 1.0,
167
+ "return_dict": true,
168
+ "return_dict_in_generate": false,
169
+ "sep_token_id": null,
170
+ "suppress_tokens": null,
171
+ "task_specific_params": null,
172
+ "temperature": 1.0,
173
+ "tf_legacy_loss": false,
174
+ "tie_encoder_decoder": false,
175
+ "tie_word_embeddings": true,
176
+ "tokenizer_class": null,
177
+ "top_k": 50,
178
+ "top_p": 1.0,
179
+ "torch_dtype": null,
180
+ "torchscript": false,
181
+ "typical_p": 1.0,
182
+ "use_absolute_embeddings": false,
183
+ "use_bfloat16": false,
184
+ "window_size": 10
185
+ },
186
+ "is_encoder_decoder": true,
187
+ "model_type": "vision-encoder-decoder",
188
+ "pad_token_id": 1,
189
+ "tie_word_embeddings": false,
190
+ "torch_dtype": "float32",
191
+ "transformers_version": "4.36.0"
192
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "pad_token_id": 1,
7
+ "transformers_version": "4.36.0"
8
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e209adaa00a05ab951898aa12b55f0d2dad4f4f8265f2d9d969fd3ce388d818a
3
+ size 809144472
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9591f399dd926439012e604d6e7883abecba8824b11186bd5d8f794dcd08fb0e
3
+ size 1615351167
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3a727b34111fddeb0256fd0ce6cc79adc0299b446281ac8e29e21ab1ff84786
3
+ size 14308
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af6063f15dc29a9a0de0597c60bae48b3f3cdc55d75901b9abd1b4816abb5b88
3
+ size 1064
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/trainer_state.json ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 49.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4704,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.04,
13
+ "learning_rate": 1.9604166666666668e-05,
14
+ "loss": 3.4223,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 2.08,
19
+ "learning_rate": 1.9187500000000002e-05,
20
+ "loss": 0.4926,
21
+ "step": 200
22
+ },
23
+ {
24
+ "epoch": 3.12,
25
+ "learning_rate": 1.8770833333333337e-05,
26
+ "loss": 0.2715,
27
+ "step": 300
28
+ },
29
+ {
30
+ "epoch": 4.17,
31
+ "learning_rate": 1.8354166666666668e-05,
32
+ "loss": 0.1697,
33
+ "step": 400
34
+ },
35
+ {
36
+ "epoch": 5.21,
37
+ "learning_rate": 1.7937500000000002e-05,
38
+ "loss": 0.1113,
39
+ "step": 500
40
+ },
41
+ {
42
+ "epoch": 6.25,
43
+ "learning_rate": 1.7520833333333337e-05,
44
+ "loss": 0.0982,
45
+ "step": 600
46
+ },
47
+ {
48
+ "epoch": 7.29,
49
+ "learning_rate": 1.7104166666666668e-05,
50
+ "loss": 0.0827,
51
+ "step": 700
52
+ },
53
+ {
54
+ "epoch": 8.33,
55
+ "learning_rate": 1.6687500000000002e-05,
56
+ "loss": 0.0355,
57
+ "step": 800
58
+ },
59
+ {
60
+ "epoch": 9.38,
61
+ "learning_rate": 1.6270833333333337e-05,
62
+ "loss": 0.0381,
63
+ "step": 900
64
+ },
65
+ {
66
+ "epoch": 10.42,
67
+ "learning_rate": 1.5854166666666668e-05,
68
+ "loss": 0.0373,
69
+ "step": 1000
70
+ },
71
+ {
72
+ "epoch": 11.46,
73
+ "learning_rate": 1.54375e-05,
74
+ "loss": 0.0275,
75
+ "step": 1100
76
+ },
77
+ {
78
+ "epoch": 12.5,
79
+ "learning_rate": 1.5020833333333335e-05,
80
+ "loss": 0.0376,
81
+ "step": 1200
82
+ },
83
+ {
84
+ "epoch": 13.54,
85
+ "learning_rate": 1.4604166666666668e-05,
86
+ "loss": 0.0251,
87
+ "step": 1300
88
+ },
89
+ {
90
+ "epoch": 14.58,
91
+ "learning_rate": 1.4187500000000001e-05,
92
+ "loss": 0.0219,
93
+ "step": 1400
94
+ },
95
+ {
96
+ "epoch": 15.62,
97
+ "learning_rate": 1.3770833333333335e-05,
98
+ "loss": 0.0176,
99
+ "step": 1500
100
+ },
101
+ {
102
+ "epoch": 16.67,
103
+ "learning_rate": 1.3354166666666668e-05,
104
+ "loss": 0.0212,
105
+ "step": 1600
106
+ },
107
+ {
108
+ "epoch": 17.71,
109
+ "learning_rate": 1.2937500000000001e-05,
110
+ "loss": 0.0201,
111
+ "step": 1700
112
+ },
113
+ {
114
+ "epoch": 18.75,
115
+ "learning_rate": 1.2520833333333336e-05,
116
+ "loss": 0.0149,
117
+ "step": 1800
118
+ },
119
+ {
120
+ "epoch": 19.79,
121
+ "learning_rate": 1.2104166666666668e-05,
122
+ "loss": 0.0099,
123
+ "step": 1900
124
+ },
125
+ {
126
+ "epoch": 20.83,
127
+ "learning_rate": 1.1687500000000001e-05,
128
+ "loss": 0.0055,
129
+ "step": 2000
130
+ },
131
+ {
132
+ "epoch": 21.88,
133
+ "learning_rate": 1.1270833333333336e-05,
134
+ "loss": 0.0067,
135
+ "step": 2100
136
+ },
137
+ {
138
+ "epoch": 22.92,
139
+ "learning_rate": 1.0854166666666668e-05,
140
+ "loss": 0.0221,
141
+ "step": 2200
142
+ },
143
+ {
144
+ "epoch": 23.96,
145
+ "learning_rate": 1.04375e-05,
146
+ "loss": 0.0057,
147
+ "step": 2300
148
+ },
149
+ {
150
+ "epoch": 25.0,
151
+ "learning_rate": 1.0020833333333336e-05,
152
+ "loss": 0.0066,
153
+ "step": 2400
154
+ },
155
+ {
156
+ "epoch": 26.04,
157
+ "learning_rate": 9.604166666666669e-06,
158
+ "loss": 0.0081,
159
+ "step": 2500
160
+ },
161
+ {
162
+ "epoch": 27.08,
163
+ "learning_rate": 9.1875e-06,
164
+ "loss": 0.009,
165
+ "step": 2600
166
+ },
167
+ {
168
+ "epoch": 28.12,
169
+ "learning_rate": 8.770833333333334e-06,
170
+ "loss": 0.0043,
171
+ "step": 2700
172
+ },
173
+ {
174
+ "epoch": 29.17,
175
+ "learning_rate": 8.354166666666667e-06,
176
+ "loss": 0.0052,
177
+ "step": 2800
178
+ },
179
+ {
180
+ "epoch": 30.21,
181
+ "learning_rate": 7.9375e-06,
182
+ "loss": 0.0223,
183
+ "step": 2900
184
+ },
185
+ {
186
+ "epoch": 31.25,
187
+ "learning_rate": 7.5208333333333335e-06,
188
+ "loss": 0.0041,
189
+ "step": 3000
190
+ },
191
+ {
192
+ "epoch": 32.29,
193
+ "learning_rate": 7.104166666666668e-06,
194
+ "loss": 0.009,
195
+ "step": 3100
196
+ },
197
+ {
198
+ "epoch": 33.33,
199
+ "learning_rate": 6.6875e-06,
200
+ "loss": 0.0121,
201
+ "step": 3200
202
+ },
203
+ {
204
+ "epoch": 34.38,
205
+ "learning_rate": 6.2708333333333336e-06,
206
+ "loss": 0.0244,
207
+ "step": 3300
208
+ },
209
+ {
210
+ "epoch": 35.42,
211
+ "learning_rate": 5.854166666666667e-06,
212
+ "loss": 0.0011,
213
+ "step": 3400
214
+ },
215
+ {
216
+ "epoch": 36.46,
217
+ "learning_rate": 5.4375e-06,
218
+ "loss": 0.0047,
219
+ "step": 3500
220
+ },
221
+ {
222
+ "epoch": 37.5,
223
+ "learning_rate": 5.020833333333334e-06,
224
+ "loss": 0.0032,
225
+ "step": 3600
226
+ },
227
+ {
228
+ "epoch": 38.54,
229
+ "learning_rate": 4.6041666666666665e-06,
230
+ "loss": 0.0046,
231
+ "step": 3700
232
+ },
233
+ {
234
+ "epoch": 39.58,
235
+ "learning_rate": 4.1875e-06,
236
+ "loss": 0.0036,
237
+ "step": 3800
238
+ },
239
+ {
240
+ "epoch": 40.62,
241
+ "learning_rate": 3.7708333333333334e-06,
242
+ "loss": 0.0013,
243
+ "step": 3900
244
+ },
245
+ {
246
+ "epoch": 41.67,
247
+ "learning_rate": 3.3541666666666666e-06,
248
+ "loss": 0.004,
249
+ "step": 4000
250
+ },
251
+ {
252
+ "epoch": 42.71,
253
+ "learning_rate": 2.9375000000000003e-06,
254
+ "loss": 0.0027,
255
+ "step": 4100
256
+ },
257
+ {
258
+ "epoch": 43.75,
259
+ "learning_rate": 2.5208333333333335e-06,
260
+ "loss": 0.0076,
261
+ "step": 4200
262
+ },
263
+ {
264
+ "epoch": 44.79,
265
+ "learning_rate": 2.1041666666666667e-06,
266
+ "loss": 0.0014,
267
+ "step": 4300
268
+ },
269
+ {
270
+ "epoch": 45.83,
271
+ "learning_rate": 1.6875000000000001e-06,
272
+ "loss": 0.0014,
273
+ "step": 4400
274
+ },
275
+ {
276
+ "epoch": 46.88,
277
+ "learning_rate": 1.2708333333333334e-06,
278
+ "loss": 0.0016,
279
+ "step": 4500
280
+ },
281
+ {
282
+ "epoch": 47.92,
283
+ "learning_rate": 8.541666666666667e-07,
284
+ "loss": 0.0017,
285
+ "step": 4600
286
+ },
287
+ {
288
+ "epoch": 48.96,
289
+ "learning_rate": 4.375e-07,
290
+ "loss": 0.0026,
291
+ "step": 4700
292
+ }
293
+ ],
294
+ "logging_steps": 100,
295
+ "max_steps": 4800,
296
+ "num_input_tokens_seen": 0,
297
+ "num_train_epochs": 50,
298
+ "save_steps": 500,
299
+ "total_flos": 5.883596962560737e+19,
300
+ "train_batch_size": 1,
301
+ "trial_name": null,
302
+ "trial_params": null
303
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4704/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b65fbe853bc374dd7c07bc87d841e85e681f1d8406a1b4e73fb26a271695056
3
+ size 4920
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "naver-clova-ix/donut-base",
3
+ "architectures": [
4
+ "VisionEncoderDecoderModel"
5
+ ],
6
+ "decoder": {
7
+ "_name_or_path": "",
8
+ "activation_dropout": 0.0,
9
+ "activation_function": "gelu",
10
+ "add_cross_attention": true,
11
+ "add_final_layer_norm": true,
12
+ "architectures": null,
13
+ "attention_dropout": 0.0,
14
+ "bad_words_ids": null,
15
+ "begin_suppress_tokens": null,
16
+ "bos_token_id": 0,
17
+ "chunk_size_feed_forward": 0,
18
+ "classifier_dropout": 0.0,
19
+ "cross_attention_hidden_size": null,
20
+ "d_model": 1024,
21
+ "decoder_attention_heads": 16,
22
+ "decoder_ffn_dim": 4096,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 4,
25
+ "decoder_start_token_id": null,
26
+ "diversity_penalty": 0.0,
27
+ "do_sample": false,
28
+ "dropout": 0.1,
29
+ "early_stopping": false,
30
+ "encoder_attention_heads": 16,
31
+ "encoder_ffn_dim": 4096,
32
+ "encoder_layerdrop": 0.0,
33
+ "encoder_layers": 12,
34
+ "encoder_no_repeat_ngram_size": 0,
35
+ "eos_token_id": 2,
36
+ "exponential_decay_length_penalty": null,
37
+ "finetuning_task": null,
38
+ "forced_bos_token_id": null,
39
+ "forced_eos_token_id": 2,
40
+ "id2label": {
41
+ "0": "LABEL_0",
42
+ "1": "LABEL_1"
43
+ },
44
+ "init_std": 0.02,
45
+ "is_decoder": true,
46
+ "is_encoder_decoder": false,
47
+ "label2id": {
48
+ "LABEL_0": 0,
49
+ "LABEL_1": 1
50
+ },
51
+ "length_penalty": 1.0,
52
+ "max_length": 512,
53
+ "max_position_embeddings": 1536,
54
+ "min_length": 0,
55
+ "model_type": "mbart",
56
+ "no_repeat_ngram_size": 0,
57
+ "num_beam_groups": 1,
58
+ "num_beams": 1,
59
+ "num_hidden_layers": 12,
60
+ "num_return_sequences": 1,
61
+ "output_attentions": false,
62
+ "output_hidden_states": false,
63
+ "output_scores": false,
64
+ "pad_token_id": 1,
65
+ "prefix": null,
66
+ "problem_type": null,
67
+ "pruned_heads": {},
68
+ "remove_invalid_values": false,
69
+ "repetition_penalty": 1.0,
70
+ "return_dict": true,
71
+ "return_dict_in_generate": false,
72
+ "scale_embedding": true,
73
+ "sep_token_id": null,
74
+ "suppress_tokens": null,
75
+ "task_specific_params": null,
76
+ "temperature": 1.0,
77
+ "tf_legacy_loss": false,
78
+ "tie_encoder_decoder": false,
79
+ "tie_word_embeddings": true,
80
+ "tokenizer_class": null,
81
+ "top_k": 50,
82
+ "top_p": 1.0,
83
+ "torch_dtype": null,
84
+ "torchscript": false,
85
+ "typical_p": 1.0,
86
+ "use_bfloat16": false,
87
+ "use_cache": true,
88
+ "vocab_size": 57543
89
+ },
90
+ "decoder_start_token_id": 0,
91
+ "encoder": {
92
+ "_name_or_path": "",
93
+ "add_cross_attention": false,
94
+ "architectures": null,
95
+ "attention_probs_dropout_prob": 0.0,
96
+ "bad_words_ids": null,
97
+ "begin_suppress_tokens": null,
98
+ "bos_token_id": null,
99
+ "chunk_size_feed_forward": 0,
100
+ "cross_attention_hidden_size": null,
101
+ "decoder_start_token_id": null,
102
+ "depths": [
103
+ 2,
104
+ 2,
105
+ 14,
106
+ 2
107
+ ],
108
+ "diversity_penalty": 0.0,
109
+ "do_sample": false,
110
+ "drop_path_rate": 0.1,
111
+ "early_stopping": false,
112
+ "embed_dim": 128,
113
+ "encoder_no_repeat_ngram_size": 0,
114
+ "eos_token_id": null,
115
+ "exponential_decay_length_penalty": null,
116
+ "finetuning_task": null,
117
+ "forced_bos_token_id": null,
118
+ "forced_eos_token_id": null,
119
+ "hidden_act": "gelu",
120
+ "hidden_dropout_prob": 0.0,
121
+ "hidden_size": 1024,
122
+ "id2label": {
123
+ "0": "LABEL_0",
124
+ "1": "LABEL_1"
125
+ },
126
+ "image_size": [
127
+ 2560,
128
+ 1920
129
+ ],
130
+ "initializer_range": 0.02,
131
+ "is_decoder": false,
132
+ "is_encoder_decoder": false,
133
+ "label2id": {
134
+ "LABEL_0": 0,
135
+ "LABEL_1": 1
136
+ },
137
+ "layer_norm_eps": 1e-05,
138
+ "length_penalty": 1.0,
139
+ "max_length": 20,
140
+ "min_length": 0,
141
+ "mlp_ratio": 4.0,
142
+ "model_type": "donut-swin",
143
+ "no_repeat_ngram_size": 0,
144
+ "num_beam_groups": 1,
145
+ "num_beams": 1,
146
+ "num_channels": 3,
147
+ "num_heads": [
148
+ 4,
149
+ 8,
150
+ 16,
151
+ 32
152
+ ],
153
+ "num_layers": 4,
154
+ "num_return_sequences": 1,
155
+ "output_attentions": false,
156
+ "output_hidden_states": false,
157
+ "output_scores": false,
158
+ "pad_token_id": null,
159
+ "patch_size": 4,
160
+ "path_norm": true,
161
+ "prefix": null,
162
+ "problem_type": null,
163
+ "pruned_heads": {},
164
+ "qkv_bias": true,
165
+ "remove_invalid_values": false,
166
+ "repetition_penalty": 1.0,
167
+ "return_dict": true,
168
+ "return_dict_in_generate": false,
169
+ "sep_token_id": null,
170
+ "suppress_tokens": null,
171
+ "task_specific_params": null,
172
+ "temperature": 1.0,
173
+ "tf_legacy_loss": false,
174
+ "tie_encoder_decoder": false,
175
+ "tie_word_embeddings": true,
176
+ "tokenizer_class": null,
177
+ "top_k": 50,
178
+ "top_p": 1.0,
179
+ "torch_dtype": null,
180
+ "torchscript": false,
181
+ "typical_p": 1.0,
182
+ "use_absolute_embeddings": false,
183
+ "use_bfloat16": false,
184
+ "window_size": 10
185
+ },
186
+ "is_encoder_decoder": true,
187
+ "model_type": "vision-encoder-decoder",
188
+ "pad_token_id": 1,
189
+ "tie_word_embeddings": false,
190
+ "torch_dtype": "float32",
191
+ "transformers_version": "4.36.0"
192
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "pad_token_id": 1,
7
+ "transformers_version": "4.36.0"
8
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf1687937f83bbdfeb9094f869986b2942e23d433f83e90b1b2f8d9eeebde5e3
3
+ size 809144472
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18749af8ceb2783ced1ee3906dfcf7a662a77ce70335cc30463ea55826f2cbc4
3
+ size 1615351167
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dac278c349545aa5f50df467904959923b912db3589c21e4ac2d03de96efdcf
3
+ size 14308
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e60068187e7562e5295ddee18920326085d9737667ee29fbf135390ec1897cc7
3
+ size 1064
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/trainer_state.json ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.04,
13
+ "learning_rate": 1.9604166666666668e-05,
14
+ "loss": 3.4223,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 2.08,
19
+ "learning_rate": 1.9187500000000002e-05,
20
+ "loss": 0.4926,
21
+ "step": 200
22
+ },
23
+ {
24
+ "epoch": 3.12,
25
+ "learning_rate": 1.8770833333333337e-05,
26
+ "loss": 0.2715,
27
+ "step": 300
28
+ },
29
+ {
30
+ "epoch": 4.17,
31
+ "learning_rate": 1.8354166666666668e-05,
32
+ "loss": 0.1697,
33
+ "step": 400
34
+ },
35
+ {
36
+ "epoch": 5.21,
37
+ "learning_rate": 1.7937500000000002e-05,
38
+ "loss": 0.1113,
39
+ "step": 500
40
+ },
41
+ {
42
+ "epoch": 6.25,
43
+ "learning_rate": 1.7520833333333337e-05,
44
+ "loss": 0.0982,
45
+ "step": 600
46
+ },
47
+ {
48
+ "epoch": 7.29,
49
+ "learning_rate": 1.7104166666666668e-05,
50
+ "loss": 0.0827,
51
+ "step": 700
52
+ },
53
+ {
54
+ "epoch": 8.33,
55
+ "learning_rate": 1.6687500000000002e-05,
56
+ "loss": 0.0355,
57
+ "step": 800
58
+ },
59
+ {
60
+ "epoch": 9.38,
61
+ "learning_rate": 1.6270833333333337e-05,
62
+ "loss": 0.0381,
63
+ "step": 900
64
+ },
65
+ {
66
+ "epoch": 10.42,
67
+ "learning_rate": 1.5854166666666668e-05,
68
+ "loss": 0.0373,
69
+ "step": 1000
70
+ },
71
+ {
72
+ "epoch": 11.46,
73
+ "learning_rate": 1.54375e-05,
74
+ "loss": 0.0275,
75
+ "step": 1100
76
+ },
77
+ {
78
+ "epoch": 12.5,
79
+ "learning_rate": 1.5020833333333335e-05,
80
+ "loss": 0.0376,
81
+ "step": 1200
82
+ },
83
+ {
84
+ "epoch": 13.54,
85
+ "learning_rate": 1.4604166666666668e-05,
86
+ "loss": 0.0251,
87
+ "step": 1300
88
+ },
89
+ {
90
+ "epoch": 14.58,
91
+ "learning_rate": 1.4187500000000001e-05,
92
+ "loss": 0.0219,
93
+ "step": 1400
94
+ },
95
+ {
96
+ "epoch": 15.62,
97
+ "learning_rate": 1.3770833333333335e-05,
98
+ "loss": 0.0176,
99
+ "step": 1500
100
+ },
101
+ {
102
+ "epoch": 16.67,
103
+ "learning_rate": 1.3354166666666668e-05,
104
+ "loss": 0.0212,
105
+ "step": 1600
106
+ },
107
+ {
108
+ "epoch": 17.71,
109
+ "learning_rate": 1.2937500000000001e-05,
110
+ "loss": 0.0201,
111
+ "step": 1700
112
+ },
113
+ {
114
+ "epoch": 18.75,
115
+ "learning_rate": 1.2520833333333336e-05,
116
+ "loss": 0.0149,
117
+ "step": 1800
118
+ },
119
+ {
120
+ "epoch": 19.79,
121
+ "learning_rate": 1.2104166666666668e-05,
122
+ "loss": 0.0099,
123
+ "step": 1900
124
+ },
125
+ {
126
+ "epoch": 20.83,
127
+ "learning_rate": 1.1687500000000001e-05,
128
+ "loss": 0.0055,
129
+ "step": 2000
130
+ },
131
+ {
132
+ "epoch": 21.88,
133
+ "learning_rate": 1.1270833333333336e-05,
134
+ "loss": 0.0067,
135
+ "step": 2100
136
+ },
137
+ {
138
+ "epoch": 22.92,
139
+ "learning_rate": 1.0854166666666668e-05,
140
+ "loss": 0.0221,
141
+ "step": 2200
142
+ },
143
+ {
144
+ "epoch": 23.96,
145
+ "learning_rate": 1.04375e-05,
146
+ "loss": 0.0057,
147
+ "step": 2300
148
+ },
149
+ {
150
+ "epoch": 25.0,
151
+ "learning_rate": 1.0020833333333336e-05,
152
+ "loss": 0.0066,
153
+ "step": 2400
154
+ },
155
+ {
156
+ "epoch": 26.04,
157
+ "learning_rate": 9.604166666666669e-06,
158
+ "loss": 0.0081,
159
+ "step": 2500
160
+ },
161
+ {
162
+ "epoch": 27.08,
163
+ "learning_rate": 9.1875e-06,
164
+ "loss": 0.009,
165
+ "step": 2600
166
+ },
167
+ {
168
+ "epoch": 28.12,
169
+ "learning_rate": 8.770833333333334e-06,
170
+ "loss": 0.0043,
171
+ "step": 2700
172
+ },
173
+ {
174
+ "epoch": 29.17,
175
+ "learning_rate": 8.354166666666667e-06,
176
+ "loss": 0.0052,
177
+ "step": 2800
178
+ },
179
+ {
180
+ "epoch": 30.21,
181
+ "learning_rate": 7.9375e-06,
182
+ "loss": 0.0223,
183
+ "step": 2900
184
+ },
185
+ {
186
+ "epoch": 31.25,
187
+ "learning_rate": 7.5208333333333335e-06,
188
+ "loss": 0.0041,
189
+ "step": 3000
190
+ },
191
+ {
192
+ "epoch": 32.29,
193
+ "learning_rate": 7.104166666666668e-06,
194
+ "loss": 0.009,
195
+ "step": 3100
196
+ },
197
+ {
198
+ "epoch": 33.33,
199
+ "learning_rate": 6.6875e-06,
200
+ "loss": 0.0121,
201
+ "step": 3200
202
+ },
203
+ {
204
+ "epoch": 34.38,
205
+ "learning_rate": 6.2708333333333336e-06,
206
+ "loss": 0.0244,
207
+ "step": 3300
208
+ },
209
+ {
210
+ "epoch": 35.42,
211
+ "learning_rate": 5.854166666666667e-06,
212
+ "loss": 0.0011,
213
+ "step": 3400
214
+ },
215
+ {
216
+ "epoch": 36.46,
217
+ "learning_rate": 5.4375e-06,
218
+ "loss": 0.0047,
219
+ "step": 3500
220
+ },
221
+ {
222
+ "epoch": 37.5,
223
+ "learning_rate": 5.020833333333334e-06,
224
+ "loss": 0.0032,
225
+ "step": 3600
226
+ },
227
+ {
228
+ "epoch": 38.54,
229
+ "learning_rate": 4.6041666666666665e-06,
230
+ "loss": 0.0046,
231
+ "step": 3700
232
+ },
233
+ {
234
+ "epoch": 39.58,
235
+ "learning_rate": 4.1875e-06,
236
+ "loss": 0.0036,
237
+ "step": 3800
238
+ },
239
+ {
240
+ "epoch": 40.62,
241
+ "learning_rate": 3.7708333333333334e-06,
242
+ "loss": 0.0013,
243
+ "step": 3900
244
+ },
245
+ {
246
+ "epoch": 41.67,
247
+ "learning_rate": 3.3541666666666666e-06,
248
+ "loss": 0.004,
249
+ "step": 4000
250
+ },
251
+ {
252
+ "epoch": 42.71,
253
+ "learning_rate": 2.9375000000000003e-06,
254
+ "loss": 0.0027,
255
+ "step": 4100
256
+ },
257
+ {
258
+ "epoch": 43.75,
259
+ "learning_rate": 2.5208333333333335e-06,
260
+ "loss": 0.0076,
261
+ "step": 4200
262
+ },
263
+ {
264
+ "epoch": 44.79,
265
+ "learning_rate": 2.1041666666666667e-06,
266
+ "loss": 0.0014,
267
+ "step": 4300
268
+ },
269
+ {
270
+ "epoch": 45.83,
271
+ "learning_rate": 1.6875000000000001e-06,
272
+ "loss": 0.0014,
273
+ "step": 4400
274
+ },
275
+ {
276
+ "epoch": 46.88,
277
+ "learning_rate": 1.2708333333333334e-06,
278
+ "loss": 0.0016,
279
+ "step": 4500
280
+ },
281
+ {
282
+ "epoch": 47.92,
283
+ "learning_rate": 8.541666666666667e-07,
284
+ "loss": 0.0017,
285
+ "step": 4600
286
+ },
287
+ {
288
+ "epoch": 48.96,
289
+ "learning_rate": 4.375e-07,
290
+ "loss": 0.0026,
291
+ "step": 4700
292
+ },
293
+ {
294
+ "epoch": 50.0,
295
+ "learning_rate": 2.0833333333333335e-08,
296
+ "loss": 0.0009,
297
+ "step": 4800
298
+ }
299
+ ],
300
+ "logging_steps": 100,
301
+ "max_steps": 4800,
302
+ "num_input_tokens_seen": 0,
303
+ "num_train_epochs": 50,
304
+ "save_steps": 500,
305
+ "total_flos": 6.003670369959936e+19,
306
+ "train_batch_size": 1,
307
+ "trial_name": null,
308
+ "trial_params": null
309
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/checkpoint-4800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b65fbe853bc374dd7c07bc87d841e85e681f1d8406a1b4e73fb26a271695056
3
+ size 4920
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "naver-clova-ix/donut-base",
3
+ "architectures": [
4
+ "VisionEncoderDecoderModel"
5
+ ],
6
+ "decoder": {
7
+ "_name_or_path": "",
8
+ "activation_dropout": 0.0,
9
+ "activation_function": "gelu",
10
+ "add_cross_attention": true,
11
+ "add_final_layer_norm": true,
12
+ "architectures": null,
13
+ "attention_dropout": 0.0,
14
+ "bad_words_ids": null,
15
+ "begin_suppress_tokens": null,
16
+ "bos_token_id": 0,
17
+ "chunk_size_feed_forward": 0,
18
+ "classifier_dropout": 0.0,
19
+ "cross_attention_hidden_size": null,
20
+ "d_model": 1024,
21
+ "decoder_attention_heads": 16,
22
+ "decoder_ffn_dim": 4096,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 4,
25
+ "decoder_start_token_id": null,
26
+ "diversity_penalty": 0.0,
27
+ "do_sample": false,
28
+ "dropout": 0.1,
29
+ "early_stopping": false,
30
+ "encoder_attention_heads": 16,
31
+ "encoder_ffn_dim": 4096,
32
+ "encoder_layerdrop": 0.0,
33
+ "encoder_layers": 12,
34
+ "encoder_no_repeat_ngram_size": 0,
35
+ "eos_token_id": 2,
36
+ "exponential_decay_length_penalty": null,
37
+ "finetuning_task": null,
38
+ "forced_bos_token_id": null,
39
+ "forced_eos_token_id": 2,
40
+ "id2label": {
41
+ "0": "LABEL_0",
42
+ "1": "LABEL_1"
43
+ },
44
+ "init_std": 0.02,
45
+ "is_decoder": true,
46
+ "is_encoder_decoder": false,
47
+ "label2id": {
48
+ "LABEL_0": 0,
49
+ "LABEL_1": 1
50
+ },
51
+ "length_penalty": 1.0,
52
+ "max_length": 512,
53
+ "max_position_embeddings": 1536,
54
+ "min_length": 0,
55
+ "model_type": "mbart",
56
+ "no_repeat_ngram_size": 0,
57
+ "num_beam_groups": 1,
58
+ "num_beams": 1,
59
+ "num_hidden_layers": 12,
60
+ "num_return_sequences": 1,
61
+ "output_attentions": false,
62
+ "output_hidden_states": false,
63
+ "output_scores": false,
64
+ "pad_token_id": 1,
65
+ "prefix": null,
66
+ "problem_type": null,
67
+ "pruned_heads": {},
68
+ "remove_invalid_values": false,
69
+ "repetition_penalty": 1.0,
70
+ "return_dict": true,
71
+ "return_dict_in_generate": false,
72
+ "scale_embedding": true,
73
+ "sep_token_id": null,
74
+ "suppress_tokens": null,
75
+ "task_specific_params": null,
76
+ "temperature": 1.0,
77
+ "tf_legacy_loss": false,
78
+ "tie_encoder_decoder": false,
79
+ "tie_word_embeddings": true,
80
+ "tokenizer_class": null,
81
+ "top_k": 50,
82
+ "top_p": 1.0,
83
+ "torch_dtype": null,
84
+ "torchscript": false,
85
+ "typical_p": 1.0,
86
+ "use_bfloat16": false,
87
+ "use_cache": true,
88
+ "vocab_size": 57543
89
+ },
90
+ "decoder_start_token_id": 0,
91
+ "encoder": {
92
+ "_name_or_path": "",
93
+ "add_cross_attention": false,
94
+ "architectures": null,
95
+ "attention_probs_dropout_prob": 0.0,
96
+ "bad_words_ids": null,
97
+ "begin_suppress_tokens": null,
98
+ "bos_token_id": null,
99
+ "chunk_size_feed_forward": 0,
100
+ "cross_attention_hidden_size": null,
101
+ "decoder_start_token_id": null,
102
+ "depths": [
103
+ 2,
104
+ 2,
105
+ 14,
106
+ 2
107
+ ],
108
+ "diversity_penalty": 0.0,
109
+ "do_sample": false,
110
+ "drop_path_rate": 0.1,
111
+ "early_stopping": false,
112
+ "embed_dim": 128,
113
+ "encoder_no_repeat_ngram_size": 0,
114
+ "eos_token_id": null,
115
+ "exponential_decay_length_penalty": null,
116
+ "finetuning_task": null,
117
+ "forced_bos_token_id": null,
118
+ "forced_eos_token_id": null,
119
+ "hidden_act": "gelu",
120
+ "hidden_dropout_prob": 0.0,
121
+ "hidden_size": 1024,
122
+ "id2label": {
123
+ "0": "LABEL_0",
124
+ "1": "LABEL_1"
125
+ },
126
+ "image_size": [
127
+ 2560,
128
+ 1920
129
+ ],
130
+ "initializer_range": 0.02,
131
+ "is_decoder": false,
132
+ "is_encoder_decoder": false,
133
+ "label2id": {
134
+ "LABEL_0": 0,
135
+ "LABEL_1": 1
136
+ },
137
+ "layer_norm_eps": 1e-05,
138
+ "length_penalty": 1.0,
139
+ "max_length": 20,
140
+ "min_length": 0,
141
+ "mlp_ratio": 4.0,
142
+ "model_type": "donut-swin",
143
+ "no_repeat_ngram_size": 0,
144
+ "num_beam_groups": 1,
145
+ "num_beams": 1,
146
+ "num_channels": 3,
147
+ "num_heads": [
148
+ 4,
149
+ 8,
150
+ 16,
151
+ 32
152
+ ],
153
+ "num_layers": 4,
154
+ "num_return_sequences": 1,
155
+ "output_attentions": false,
156
+ "output_hidden_states": false,
157
+ "output_scores": false,
158
+ "pad_token_id": null,
159
+ "patch_size": 4,
160
+ "path_norm": true,
161
+ "prefix": null,
162
+ "problem_type": null,
163
+ "pruned_heads": {},
164
+ "qkv_bias": true,
165
+ "remove_invalid_values": false,
166
+ "repetition_penalty": 1.0,
167
+ "return_dict": true,
168
+ "return_dict_in_generate": false,
169
+ "sep_token_id": null,
170
+ "suppress_tokens": null,
171
+ "task_specific_params": null,
172
+ "temperature": 1.0,
173
+ "tf_legacy_loss": false,
174
+ "tie_encoder_decoder": false,
175
+ "tie_word_embeddings": true,
176
+ "tokenizer_class": null,
177
+ "top_k": 50,
178
+ "top_p": 1.0,
179
+ "torch_dtype": null,
180
+ "torchscript": false,
181
+ "typical_p": 1.0,
182
+ "use_absolute_embeddings": false,
183
+ "use_bfloat16": false,
184
+ "window_size": 10
185
+ },
186
+ "is_encoder_decoder": true,
187
+ "model_type": "vision-encoder-decoder",
188
+ "pad_token_id": 1,
189
+ "tie_word_embeddings": false,
190
+ "torch_dtype": "float32",
191
+ "transformers_version": "4.36.0"
192
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "pad_token_id": 1,
7
+ "transformers_version": "4.36.0"
8
+ }
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf1687937f83bbdfeb9094f869986b2942e23d433f83e90b1b2f8d9eeebde5e3
3
+ size 809144472
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/runs/Dec14_11-03-01_peach/events.out.tfevents.1702519383.peach ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff824073c78d1c6febf8aa684dee28141fc235c99babfc9b56606092a051990
3
+ size 16612
https:/huggingface.co/Mo-oN/donut-base-DO/tree/main/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b65fbe853bc374dd7c07bc87d841e85e681f1d8406a1b4e73fb26a271695056
3
+ size 4920