amunozo commited on
Commit
e4d0322
·
verified ·
1 Parent(s): 4f568ce

Upload model files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ GoNotoCurrent.ttf filter=lfs diff=lfs merge=lfs -text
GoNotoCurrent.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ab5c39e2b1c34a955136275ce0db068cb20d9643ead033d6b8124a73ab4f64
3
+ size 15645492
README.md CHANGED
@@ -1,8 +1,53 @@
1
  ---
2
- datasets:
3
- - stefan-it/german-dbmdz-bert-corpus
4
- language:
5
- - de
6
- pipeline_tag: fill-mask
7
- license: apache-2.0
8
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - masked-auto-encoding
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: pixel-base-german-dbmdz-pixel-0.00015-0-fp32
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # pixel-base-german-dbmdz-pixel-0.00015-0-fp32
14
+
15
+ This model is a fine-tuned version of [](https://huggingface.co/) on the wikipedia + bookcorpus dataset.
16
+
17
+ ## Model description
18
+
19
+ More information needed
20
+
21
+ ## Intended uses & limitations
22
+
23
+ More information needed
24
+
25
+ ## Training and evaluation data
26
+
27
+ More information needed
28
+
29
+ ## Training procedure
30
+
31
+ ### Training hyperparameters
32
+
33
+ The following hyperparameters were used during training:
34
+ - learning_rate: 0.00015
35
+ - train_batch_size: 256
36
+ - eval_batch_size: 32
37
+ - seed: 42
38
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
39
+ - lr_scheduler_type: cosine
40
+ - lr_scheduler_warmup_ratio: 0.05
41
+ - training_steps: 1500000
42
+ - mixed_precision_training: Apex, opt level O1
43
+
44
+ ### Training results
45
+
46
+
47
+
48
+ ### Framework versions
49
+
50
+ - Transformers 4.17.0
51
+ - Pytorch 2.0.1+cu117
52
+ - Datasets 2.14.5
53
+ - Tokenizers 0.13.3
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.43,
3
+ "train_loss": 0.1377392346496582,
4
+ "train_runtime": 935051.3554,
5
+ "train_samples_per_second": 410.673,
6
+ "train_steps_per_second": 1.604
7
+ }
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "PIXELForPreTraining"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "cache_dir": "",
7
+ "decoder_hidden_size": 512,
8
+ "decoder_intermediate_size": 2048,
9
+ "decoder_num_attention_heads": 16,
10
+ "decoder_num_hidden_layers": 8,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "image_size": [
15
+ 16,
16
+ 8464
17
+ ],
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "mask_ratio": 0.25,
22
+ "model_type": "pixel",
23
+ "norm_pix_loss": true,
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "qkv_bias": true,
29
+ "revision": "main",
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.17.0",
32
+ "use_auth_token": ""
33
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": false,
3
+ "do_resize": true,
4
+ "feature_extractor_type": "ViTFeatureExtractor",
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_std": [
11
+ 0.5,
12
+ 0.5,
13
+ 0.5
14
+ ],
15
+ "resample": 2,
16
+ "size": [
17
+ 16,
18
+ 8464
19
+ ]
20
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d75961d2a82829be2aafdac5f00ee15924886c33d176e268a4e629d6ffbc8ce4
3
+ size 449474181
text_renderer_config.json ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "background_color": "white",
3
+ "dpi": 120,
4
+ "font_color": "black",
5
+ "font_file": "GoNotoCurrent.ttf",
6
+ "font_size": 8,
7
+ "fonts_list": [
8
+ "Apple Color Emoji",
9
+ "DejaVu Math TeX Gyre",
10
+ "DejaVu Sans",
11
+ "DejaVu Sans Mono",
12
+ "DejaVu Serif",
13
+ "Go Noto Current",
14
+ "Inconsolata",
15
+ "Monospace",
16
+ "Noto Fangsong KSS Rotated",
17
+ "Noto Fangsong KSS Vertical",
18
+ "Noto Kufi Arabic",
19
+ "Noto Music",
20
+ "Noto Naskh Arabic",
21
+ "Noto Naskh Arabic UI",
22
+ "Noto Nastaliq Urdu",
23
+ "Noto Rashi Hebrew",
24
+ "Noto Sans",
25
+ "Noto Sans Adlam",
26
+ "Noto Sans Adlam Unjoined",
27
+ "Noto Sans Anatolian Hieroglyphs",
28
+ "Noto Sans Arabic",
29
+ "Noto Sans Arabic UI",
30
+ "Noto Sans Armenian",
31
+ "Noto Sans Avestan",
32
+ "Noto Sans Balinese",
33
+ "Noto Sans Bamum",
34
+ "Noto Sans Bassa Vah",
35
+ "Noto Sans Batak",
36
+ "Noto Sans Bengali",
37
+ "Noto Sans Bengali UI",
38
+ "Noto Sans Bhaiksuki",
39
+ "Noto Sans Brahmi",
40
+ "Noto Sans Buginese",
41
+ "Noto Sans Buhid",
42
+ "Noto Sans CJK HK",
43
+ "Noto Sans CJK JP",
44
+ "Noto Sans CJK KR",
45
+ "Noto Sans CJK SC",
46
+ "Noto Sans CJK TC",
47
+ "Noto Sans Canadian Aboriginal",
48
+ "Noto Sans Carian",
49
+ "Noto Sans Caucasian Albanian",
50
+ "Noto Sans Chakma",
51
+ "Noto Sans Cham",
52
+ "Noto Sans Cherokee",
53
+ "Noto Sans Chorasmian",
54
+ "Noto Sans Coptic",
55
+ "Noto Sans Cuneiform",
56
+ "Noto Sans Cypriot",
57
+ "Noto Sans Cypro Minoan",
58
+ "Noto Sans Deseret",
59
+ "Noto Sans Devanagari",
60
+ "Noto Sans Devanagari UI",
61
+ "Noto Sans Duployan",
62
+ "Noto Sans Egyptian Hieroglyphs",
63
+ "Noto Sans Elbasan",
64
+ "Noto Sans Elymaic",
65
+ "Noto Sans Ethiopic",
66
+ "Noto Sans Georgian",
67
+ "Noto Sans Glagolitic",
68
+ "Noto Sans Gothic",
69
+ "Noto Sans Grantha",
70
+ "Noto Sans Gujarati",
71
+ "Noto Sans Gujarati UI",
72
+ "Noto Sans Gunjala Gondi",
73
+ "Noto Sans Gurmukhi",
74
+ "Noto Sans Gurmukhi UI",
75
+ "Noto Sans Hanifi Rohingya",
76
+ "Noto Sans Hanunoo",
77
+ "Noto Sans Hatran",
78
+ "Noto Sans Hebrew",
79
+ "Noto Sans Hebrew Droid",
80
+ "Noto Sans Imperial Aramaic",
81
+ "Noto Sans Indic Siyaq Numbers",
82
+ "Noto Sans Inscriptional Pahlavi",
83
+ "Noto Sans Inscriptional Parthian",
84
+ "Noto Sans Javanese",
85
+ "Noto Sans Kaithi",
86
+ "Noto Sans Kannada",
87
+ "Noto Sans Kannada UI",
88
+ "Noto Sans Kawi",
89
+ "Noto Sans Kayah Li",
90
+ "Noto Sans Kharoshthi",
91
+ "Noto Sans Khmer",
92
+ "Noto Sans Khmer UI",
93
+ "Noto Sans Khojki",
94
+ "Noto Sans Khudawadi",
95
+ "Noto Sans Lao",
96
+ "Noto Sans Lao Looped",
97
+ "Noto Sans Lao Looped UI",
98
+ "Noto Sans Lao UI",
99
+ "Noto Sans Lepcha",
100
+ "Noto Sans Limbu",
101
+ "Noto Sans Linear A",
102
+ "Noto Sans Linear B",
103
+ "Noto Sans Lisu",
104
+ "Noto Sans Lycian",
105
+ "Noto Sans Lydian",
106
+ "Noto Sans Mahajani",
107
+ "Noto Sans Malayalam",
108
+ "Noto Sans Malayalam UI",
109
+ "Noto Sans Mandaic",
110
+ "Noto Sans Manichaean",
111
+ "Noto Sans Marchen",
112
+ "Noto Sans Masaram Gondi",
113
+ "Noto Sans Mayan Numerals",
114
+ "Noto Sans Medefaidrin",
115
+ "Noto Sans Meetei Mayek",
116
+ "Noto Sans Mende Kikakui",
117
+ "Noto Sans Meroitic",
118
+ "Noto Sans Miao",
119
+ "Noto Sans Modi",
120
+ "Noto Sans Mongolian",
121
+ "Noto Sans Mono",
122
+ "Noto Sans Mro",
123
+ "Noto Sans Multani",
124
+ "Noto Sans Myanmar",
125
+ "Noto Sans Myanmar UI",
126
+ "Noto Sans NKo",
127
+ "Noto Sans NKo Unjoined",
128
+ "Noto Sans Nabataean",
129
+ "Noto Sans Nag Mundari",
130
+ "Noto Sans Nandinagari",
131
+ "Noto Sans New Tai Lue",
132
+ "Noto Sans Newa",
133
+ "Noto Sans Nushu",
134
+ "Noto Sans Ogham",
135
+ "Noto Sans Ol Chiki",
136
+ "Noto Sans Old Hungarian",
137
+ "Noto Sans Old Italic",
138
+ "Noto Sans Old North Arabian",
139
+ "Noto Sans Old Permic",
140
+ "Noto Sans Old Persian",
141
+ "Noto Sans Old Sogdian",
142
+ "Noto Sans Old South Arabian",
143
+ "Noto Sans Old Turkic",
144
+ "Noto Sans Oriya",
145
+ "Noto Sans Osage",
146
+ "Noto Sans Osmanya",
147
+ "Noto Sans Pahawh Hmong",
148
+ "Noto Sans Palmyrene",
149
+ "Noto Sans Pau Cin Hau",
150
+ "Noto Sans Phags-Pa",
151
+ "Noto Sans Phoenician",
152
+ "Noto Sans Psalter Pahlavi",
153
+ "Noto Sans Rejang",
154
+ "Noto Sans Runic",
155
+ "Noto Sans Samaritan",
156
+ "Noto Sans Saurashtra",
157
+ "Noto Sans Sharada",
158
+ "Noto Sans Shavian",
159
+ "Noto Sans Siddham",
160
+ "Noto Sans SignWriting",
161
+ "Noto Sans Sinhala",
162
+ "Noto Sans Sinhala UI",
163
+ "Noto Sans Sogdian",
164
+ "Noto Sans Sora Sompeng",
165
+ "Noto Sans Soyombo",
166
+ "Noto Sans Sundanese",
167
+ "Noto Sans Syloti Nagri",
168
+ "Noto Sans Symbols",
169
+ "Noto Sans Symbols 2",
170
+ "Noto Sans Syriac",
171
+ "Noto Sans Syriac Eastern",
172
+ "Noto Sans Syriac Western",
173
+ "Noto Sans Tagalog",
174
+ "Noto Sans Tagbanwa",
175
+ "Noto Sans Tai Le",
176
+ "Noto Sans Tai Tham",
177
+ "Noto Sans Tai Viet",
178
+ "Noto Sans Takri",
179
+ "Noto Sans Tamil",
180
+ "Noto Sans Tamil Supplement",
181
+ "Noto Sans Tamil UI",
182
+ "Noto Sans Tangsa",
183
+ "Noto Sans Telugu",
184
+ "Noto Sans Telugu UI",
185
+ "Noto Sans Test",
186
+ "Noto Sans Thaana",
187
+ "Noto Sans Thai",
188
+ "Noto Sans Thai Looped",
189
+ "Noto Sans Thai Looped UI",
190
+ "Noto Sans Thai UI",
191
+ "Noto Sans Tifinagh",
192
+ "Noto Sans Tifinagh APT",
193
+ "Noto Sans Tifinagh Adrar",
194
+ "Noto Sans Tifinagh Agraw Imazighen",
195
+ "Noto Sans Tifinagh Ahaggar",
196
+ "Noto Sans Tifinagh Air",
197
+ "Noto Sans Tifinagh Azawagh",
198
+ "Noto Sans Tifinagh Ghat",
199
+ "Noto Sans Tifinagh Hawad",
200
+ "Noto Sans Tifinagh Rhissa Ixa",
201
+ "Noto Sans Tifinagh SIL",
202
+ "Noto Sans Tifinagh Tawellemmet",
203
+ "Noto Sans Tirhuta",
204
+ "Noto Sans Ugaritic",
205
+ "Noto Sans Vai",
206
+ "Noto Sans Vithkuqi",
207
+ "Noto Sans Wancho",
208
+ "Noto Sans Warang Citi",
209
+ "Noto Sans Yi",
210
+ "Noto Sans Zanabazar Square",
211
+ "Noto Traditional Nushu",
212
+ "Sans",
213
+ "Serif",
214
+ "Source Code Pro",
215
+ "System-ui",
216
+ "Ubuntu",
217
+ "Ubuntu Condensed",
218
+ "Ubuntu Mono"
219
+ ],
220
+ "max_seq_length": 529,
221
+ "pad_size": 3,
222
+ "pixels_per_patch": 16,
223
+ "rgb": false,
224
+ "text_renderer_type": "PangoCairoTextRenderer"
225
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.43,
3
+ "train_loss": 0.1377392346496582,
4
+ "train_runtime": 935051.3554,
5
+ "train_samples_per_second": 410.673,
6
+ "train_steps_per_second": 1.604
7
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c01f435556f10ddc7aa85231f023262303da016552d19f955c8c880903dc970
3
+ size 3195