alyzbane commited on
Commit
fbc2003
·
verified ·
1 Parent(s): ef22847

End of training

Browse files
README.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: facebook/convnext-tiny-224
5
+ tags:
6
+ - generated_from_trainer
7
+ datasets:
8
+ - imagefolder
9
+ metrics:
10
+ - precision
11
+ - recall
12
+ - f1
13
+ - accuracy
14
+ model-index:
15
+ - name: convnext-tiny-224-finetuned-barkley
16
+ results:
17
+ - task:
18
+ name: Image Classification
19
+ type: image-classification
20
+ dataset:
21
+ name: imagefolder
22
+ type: imagefolder
23
+ config: default
24
+ split: train
25
+ args: default
26
+ metrics:
27
+ - name: Precision
28
+ type: precision
29
+ value: 0.9936145510835913
30
+ - name: Recall
31
+ type: recall
32
+ value: 0.993421052631579
33
+ - name: F1
34
+ type: f1
35
+ value: 0.993419541966282
36
+ - name: Accuracy
37
+ type: accuracy
38
+ value: 0.9939393939393939
39
+ ---
40
+
41
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
42
+ should probably proofread and complete it, then remove this comment. -->
43
+
44
+ # convnext-tiny-224-finetuned-barkley
45
+
46
+ This model is a fine-tuned version of [facebook/convnext-tiny-224](https://huggingface.co/facebook/convnext-tiny-224) on the imagefolder dataset.
47
+ It achieves the following results on the evaluation set:
48
+ - Loss: 0.0794
49
+ - Precision: 0.9936
50
+ - Recall: 0.9934
51
+ - F1: 0.9934
52
+ - Accuracy: 0.9939
53
+ - Top1 Accuracy: 0.9934
54
+ - Error Rate: 0.0061
55
+
56
+ ## Model description
57
+
58
+ More information needed
59
+
60
+ ## Intended uses & limitations
61
+
62
+ More information needed
63
+
64
+ ## Training and evaluation data
65
+
66
+ More information needed
67
+
68
+ ## Training procedure
69
+
70
+ ### Training hyperparameters
71
+
72
+ The following hyperparameters were used during training:
73
+ - learning_rate: 0.0002
74
+ - train_batch_size: 32
75
+ - eval_batch_size: 32
76
+ - seed: 42
77
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
78
+ - lr_scheduler_type: linear
79
+ - lr_scheduler_warmup_ratio: 0.1
80
+ - num_epochs: 30
81
+ - mixed_precision_training: Native AMP
82
+
83
+ ### Training results
84
+
85
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy | Top1 Accuracy | Error Rate |
86
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|:-------------:|:----------:|
87
+ | 1.576 | 1.0 | 38 | 1.5660 | 0.3007 | 0.3684 | 0.2952 | 0.3479 | 0.3684 | 0.6521 |
88
+ | 1.5469 | 2.0 | 76 | 1.5353 | 0.3141 | 0.4079 | 0.3215 | 0.3854 | 0.4079 | 0.6146 |
89
+ | 1.5081 | 3.0 | 114 | 1.4782 | 0.5684 | 0.4671 | 0.3961 | 0.4436 | 0.4671 | 0.5564 |
90
+ | 1.4278 | 4.0 | 152 | 1.3718 | 0.7088 | 0.6053 | 0.5840 | 0.5866 | 0.6053 | 0.4134 |
91
+ | 1.2938 | 5.0 | 190 | 1.1909 | 0.8582 | 0.8355 | 0.8378 | 0.8290 | 0.8355 | 0.1710 |
92
+ | 1.0696 | 6.0 | 228 | 0.9353 | 0.9243 | 0.9211 | 0.9215 | 0.9205 | 0.9211 | 0.0795 |
93
+ | 0.789 | 7.0 | 266 | 0.6347 | 0.9680 | 0.9671 | 0.9673 | 0.9691 | 0.9671 | 0.0309 |
94
+ | 0.506 | 8.0 | 304 | 0.3910 | 0.9750 | 0.9737 | 0.9739 | 0.9752 | 0.9737 | 0.0248 |
95
+ | 0.2876 | 9.0 | 342 | 0.2126 | 0.9808 | 0.9803 | 0.9802 | 0.9814 | 0.9803 | 0.0186 |
96
+ | 0.1722 | 10.0 | 380 | 0.1409 | 0.9809 | 0.9803 | 0.9799 | 0.9818 | 0.9803 | 0.0182 |
97
+ | 0.1082 | 11.0 | 418 | 0.0794 | 0.9936 | 0.9934 | 0.9934 | 0.9939 | 0.9934 | 0.0061 |
98
+ | 0.0715 | 12.0 | 456 | 0.0577 | 0.9936 | 0.9934 | 0.9934 | 0.9939 | 0.9934 | 0.0061 |
99
+ | 0.0492 | 13.0 | 494 | 0.0440 | 0.9872 | 0.9868 | 0.9867 | 0.9879 | 0.9868 | 0.0121 |
100
+
101
+
102
+ ### Framework versions
103
+
104
+ - Transformers 4.45.2
105
+ - Pytorch 2.3.1+cu121
106
+ - Datasets 3.0.1
107
+ - Tokenizers 0.20.1
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 13.0,
3
+ "eval_accuracy": 0.9939393939393939,
4
+ "eval_error_rate": 0.0060606060606061,
5
+ "eval_f1": 0.993419541966282,
6
+ "eval_loss": 0.0794038251042366,
7
+ "eval_precision": 0.9936145510835913,
8
+ "eval_recall": 0.993421052631579,
9
+ "eval_runtime": 51.0106,
10
+ "eval_samples_per_second": 2.98,
11
+ "eval_steps_per_second": 0.098,
12
+ "eval_top1_accuracy": 0.993421052631579,
13
+ "total_flos": 3.972506461105029e+17,
14
+ "train_loss": 0.8004542765347099,
15
+ "train_runtime": 7158.591,
16
+ "train_samples_per_second": 5.096,
17
+ "train_steps_per_second": 0.159
18
+ }
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/convnext-tiny-224",
3
+ "architectures": [
4
+ "ConvNextForImageClassification"
5
+ ],
6
+ "depths": [
7
+ 3,
8
+ 3,
9
+ 9,
10
+ 3
11
+ ],
12
+ "drop_path_rate": 0.0,
13
+ "hidden_act": "gelu",
14
+ "hidden_sizes": [
15
+ 96,
16
+ 192,
17
+ 384,
18
+ 768
19
+ ],
20
+ "id2label": {
21
+ "0": "Iinstia bijuga",
22
+ "1": "Mangifera indica",
23
+ "2": "Pterocarpus indicus",
24
+ "3": "Roystonea regia",
25
+ "4": "Tabebuia"
26
+ },
27
+ "image_size": 224,
28
+ "initializer_range": 0.02,
29
+ "label2id": {
30
+ "Iinstia bijuga": 0,
31
+ "Mangifera indica": 1,
32
+ "Pterocarpus indicus": 2,
33
+ "Roystonea regia": 3,
34
+ "Tabebuia": 4
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "layer_scale_init_value": 1e-06,
38
+ "model_type": "convnext",
39
+ "num_channels": 3,
40
+ "num_stages": 4,
41
+ "out_features": [
42
+ "stage4"
43
+ ],
44
+ "out_indices": [
45
+ 4
46
+ ],
47
+ "patch_size": 4,
48
+ "problem_type": "single_label_classification",
49
+ "stage_names": [
50
+ "stem",
51
+ "stage1",
52
+ "stage2",
53
+ "stage3",
54
+ "stage4"
55
+ ],
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.45.2"
58
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 13.0,
3
+ "eval_accuracy": 0.9939393939393939,
4
+ "eval_error_rate": 0.0060606060606061,
5
+ "eval_f1": 0.993419541966282,
6
+ "eval_loss": 0.0794038251042366,
7
+ "eval_precision": 0.9936145510835913,
8
+ "eval_recall": 0.993421052631579,
9
+ "eval_runtime": 51.0106,
10
+ "eval_samples_per_second": 2.98,
11
+ "eval_steps_per_second": 0.098,
12
+ "eval_top1_accuracy": 0.993421052631579
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0799eed45249c597bd5706cd54239ba58459cb5916a27dddda3eccc89547767d
3
+ size 111317164
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_pct": 0.875,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "ConvNextImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 3,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "shortest_edge": 224
21
+ }
22
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 13.0,
3
+ "total_flos": 3.972506461105029e+17,
4
+ "train_loss": 0.8004542765347099,
5
+ "train_runtime": 7158.591,
6
+ "train_samples_per_second": 5.096,
7
+ "train_steps_per_second": 0.159
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9939393939393939,
3
+ "best_model_checkpoint": "convnext-tiny-224-finetuned-barkley\\checkpoint-418",
4
+ "epoch": 13.0,
5
+ "eval_steps": 500,
6
+ "global_step": 494,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "train_accuracy": 0.3092105263157895
14
+ },
15
+ {
16
+ "epoch": 1.0,
17
+ "grad_norm": 2.351605176925659,
18
+ "learning_rate": 1.1659108139509125e-06,
19
+ "loss": 1.576,
20
+ "step": 38
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_accuracy": 0.34791666666666665,
25
+ "eval_error_rate": 0.6520833333333333,
26
+ "eval_f1": 0.2952478867653256,
27
+ "eval_loss": 1.5659886598587036,
28
+ "eval_precision": 0.30074270516946977,
29
+ "eval_recall": 0.3684210526315789,
30
+ "eval_runtime": 51.2455,
31
+ "eval_samples_per_second": 2.966,
32
+ "eval_steps_per_second": 0.098,
33
+ "eval_top1_accuracy": 0.3684210526315789,
34
+ "step": 38
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "train_accuracy": 0.36622807017543857
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "grad_norm": 2.0893232822418213,
43
+ "learning_rate": 2.23574935501902e-06,
44
+ "loss": 1.5469,
45
+ "step": 76
46
+ },
47
+ {
48
+ "epoch": 2.0,
49
+ "eval_accuracy": 0.38541666666666663,
50
+ "eval_error_rate": 0.6145833333333334,
51
+ "eval_f1": 0.32146190433952665,
52
+ "eval_loss": 1.5353103876113892,
53
+ "eval_precision": 0.3140966869404633,
54
+ "eval_recall": 0.40789473684210525,
55
+ "eval_runtime": 52.9551,
56
+ "eval_samples_per_second": 2.87,
57
+ "eval_steps_per_second": 0.094,
58
+ "eval_top1_accuracy": 0.40789473684210525,
59
+ "step": 76
60
+ },
61
+ {
62
+ "epoch": 3.0,
63
+ "train_accuracy": 0.4137426900584795
64
+ },
65
+ {
66
+ "epoch": 3.0,
67
+ "grad_norm": 2.353696823120117,
68
+ "learning_rate": 3.927960312504679e-06,
69
+ "loss": 1.5081,
70
+ "step": 114
71
+ },
72
+ {
73
+ "epoch": 3.0,
74
+ "eval_accuracy": 0.44356060606060604,
75
+ "eval_error_rate": 0.5564393939393939,
76
+ "eval_f1": 0.3961300506250762,
77
+ "eval_loss": 1.4782265424728394,
78
+ "eval_precision": 0.5683927222362125,
79
+ "eval_recall": 0.46710526315789475,
80
+ "eval_runtime": 51.3887,
81
+ "eval_samples_per_second": 2.958,
82
+ "eval_steps_per_second": 0.097,
83
+ "eval_top1_accuracy": 0.46710526315789475,
84
+ "step": 114
85
+ },
86
+ {
87
+ "epoch": 4.0,
88
+ "train_accuracy": 0.4861111111111111
89
+ },
90
+ {
91
+ "epoch": 4.0,
92
+ "grad_norm": 2.854907512664795,
93
+ "learning_rate": 6.113544042901594e-06,
94
+ "loss": 1.4278,
95
+ "step": 152
96
+ },
97
+ {
98
+ "epoch": 4.0,
99
+ "eval_accuracy": 0.5865530303030304,
100
+ "eval_error_rate": 0.4134469696969696,
101
+ "eval_f1": 0.5840194800037495,
102
+ "eval_loss": 1.3718132972717285,
103
+ "eval_precision": 0.7087537646637603,
104
+ "eval_recall": 0.6052631578947368,
105
+ "eval_runtime": 53.3959,
106
+ "eval_samples_per_second": 2.847,
107
+ "eval_steps_per_second": 0.094,
108
+ "eval_top1_accuracy": 0.6052631578947368,
109
+ "step": 152
110
+ },
111
+ {
112
+ "epoch": 5.0,
113
+ "train_accuracy": 0.6827485380116959
114
+ },
115
+ {
116
+ "epoch": 5.0,
117
+ "grad_norm": 3.946742296218872,
118
+ "learning_rate": 8.62589039584572e-06,
119
+ "loss": 1.2938,
120
+ "step": 190
121
+ },
122
+ {
123
+ "epoch": 5.0,
124
+ "eval_accuracy": 0.8289772727272728,
125
+ "eval_error_rate": 0.17102272727272716,
126
+ "eval_f1": 0.8378200475239326,
127
+ "eval_loss": 1.1908537149429321,
128
+ "eval_precision": 0.8581657632453227,
129
+ "eval_recall": 0.8355263157894737,
130
+ "eval_runtime": 52.0457,
131
+ "eval_samples_per_second": 2.921,
132
+ "eval_steps_per_second": 0.096,
133
+ "eval_top1_accuracy": 0.8355263157894737,
134
+ "step": 190
135
+ },
136
+ {
137
+ "epoch": 6.0,
138
+ "train_accuracy": 0.8713450292397661
139
+ },
140
+ {
141
+ "epoch": 6.0,
142
+ "grad_norm": 2.741647720336914,
143
+ "learning_rate": 1.1273479642392808e-05,
144
+ "loss": 1.0696,
145
+ "step": 228
146
+ },
147
+ {
148
+ "epoch": 6.0,
149
+ "eval_accuracy": 0.9204545454545455,
150
+ "eval_error_rate": 0.07954545454545447,
151
+ "eval_f1": 0.9214891548724753,
152
+ "eval_loss": 0.9352867007255554,
153
+ "eval_precision": 0.9242831541218637,
154
+ "eval_recall": 0.9210526315789473,
155
+ "eval_runtime": 53.2027,
156
+ "eval_samples_per_second": 2.857,
157
+ "eval_steps_per_second": 0.094,
158
+ "eval_top1_accuracy": 0.9210526315789473,
159
+ "step": 228
160
+ },
161
+ {
162
+ "epoch": 7.0,
163
+ "train_accuracy": 0.9407894736842105
164
+ },
165
+ {
166
+ "epoch": 7.0,
167
+ "grad_norm": 3.6590983867645264,
168
+ "learning_rate": 1.3854482295832083e-05,
169
+ "loss": 0.789,
170
+ "step": 266
171
+ },
172
+ {
173
+ "epoch": 7.0,
174
+ "eval_accuracy": 0.9691287878787879,
175
+ "eval_error_rate": 0.030871212121212133,
176
+ "eval_f1": 0.9672834045899062,
177
+ "eval_loss": 0.6346580386161804,
178
+ "eval_precision": 0.9680208585981083,
179
+ "eval_recall": 0.9671052631578947,
180
+ "eval_runtime": 52.1891,
181
+ "eval_samples_per_second": 2.912,
182
+ "eval_steps_per_second": 0.096,
183
+ "eval_top1_accuracy": 0.9671052631578947,
184
+ "step": 266
185
+ },
186
+ {
187
+ "epoch": 8.0,
188
+ "train_accuracy": 0.9634502923976608
189
+ },
190
+ {
191
+ "epoch": 8.0,
192
+ "grad_norm": 3.982485055923462,
193
+ "learning_rate": 1.6172144859969913e-05,
194
+ "loss": 0.506,
195
+ "step": 304
196
+ },
197
+ {
198
+ "epoch": 8.0,
199
+ "eval_accuracy": 0.975189393939394,
200
+ "eval_error_rate": 0.024810606060606033,
201
+ "eval_f1": 0.9738558660758309,
202
+ "eval_loss": 0.3909807801246643,
203
+ "eval_precision": 0.9750055285272005,
204
+ "eval_recall": 0.9736842105263158,
205
+ "eval_runtime": 50.6533,
206
+ "eval_samples_per_second": 3.001,
207
+ "eval_steps_per_second": 0.099,
208
+ "eval_top1_accuracy": 0.9736842105263158,
209
+ "step": 304
210
+ },
211
+ {
212
+ "epoch": 9.0,
213
+ "train_accuracy": 0.9780701754385965
214
+ },
215
+ {
216
+ "epoch": 9.0,
217
+ "grad_norm": 2.4651834964752197,
218
+ "learning_rate": 1.8049788627450628e-05,
219
+ "loss": 0.2876,
220
+ "step": 342
221
+ },
222
+ {
223
+ "epoch": 9.0,
224
+ "eval_accuracy": 0.981439393939394,
225
+ "eval_error_rate": 0.018560606060606055,
226
+ "eval_f1": 0.9802473202746875,
227
+ "eval_loss": 0.21257419884204865,
228
+ "eval_precision": 0.980843653250774,
229
+ "eval_recall": 0.9802631578947368,
230
+ "eval_runtime": 50.1498,
231
+ "eval_samples_per_second": 3.031,
232
+ "eval_steps_per_second": 0.1,
233
+ "eval_top1_accuracy": 0.9802631578947368,
234
+ "step": 342
235
+ },
236
+ {
237
+ "epoch": 10.0,
238
+ "train_accuracy": 0.9780701754385965
239
+ },
240
+ {
241
+ "epoch": 10.0,
242
+ "grad_norm": 3.773145914077759,
243
+ "learning_rate": 1.9318622999689343e-05,
244
+ "loss": 0.1722,
245
+ "step": 380
246
+ },
247
+ {
248
+ "epoch": 10.0,
249
+ "eval_accuracy": 0.9818181818181818,
250
+ "eval_error_rate": 0.018181818181818188,
251
+ "eval_f1": 0.979943544279758,
252
+ "eval_loss": 0.14089564979076385,
253
+ "eval_precision": 0.9808553804296839,
254
+ "eval_recall": 0.9802631578947368,
255
+ "eval_runtime": 50.1171,
256
+ "eval_samples_per_second": 3.033,
257
+ "eval_steps_per_second": 0.1,
258
+ "eval_top1_accuracy": 0.9802631578947368,
259
+ "step": 380
260
+ },
261
+ {
262
+ "epoch": 11.0,
263
+ "train_accuracy": 0.9875730994152047
264
+ },
265
+ {
266
+ "epoch": 11.0,
267
+ "grad_norm": 2.976818561553955,
268
+ "learning_rate": 1.995005803798479e-05,
269
+ "loss": 0.1082,
270
+ "step": 418
271
+ },
272
+ {
273
+ "epoch": 11.0,
274
+ "eval_accuracy": 0.9939393939393939,
275
+ "eval_error_rate": 0.0060606060606061,
276
+ "eval_f1": 0.993419541966282,
277
+ "eval_loss": 0.0794038251042366,
278
+ "eval_precision": 0.9936145510835913,
279
+ "eval_recall": 0.993421052631579,
280
+ "eval_runtime": 52.5912,
281
+ "eval_samples_per_second": 2.89,
282
+ "eval_steps_per_second": 0.095,
283
+ "eval_top1_accuracy": 0.993421052631579,
284
+ "step": 418
285
+ },
286
+ {
287
+ "epoch": 12.0,
288
+ "train_accuracy": 0.9912280701754386
289
+ },
290
+ {
291
+ "epoch": 12.0,
292
+ "grad_norm": 5.014571189880371,
293
+ "learning_rate": 1.9972038083659915e-05,
294
+ "loss": 0.0715,
295
+ "step": 456
296
+ },
297
+ {
298
+ "epoch": 12.0,
299
+ "eval_accuracy": 0.9939393939393939,
300
+ "eval_error_rate": 0.0060606060606061,
301
+ "eval_f1": 0.993419541966282,
302
+ "eval_loss": 0.057679127901792526,
303
+ "eval_precision": 0.9936145510835913,
304
+ "eval_recall": 0.993421052631579,
305
+ "eval_runtime": 50.9469,
306
+ "eval_samples_per_second": 2.984,
307
+ "eval_steps_per_second": 0.098,
308
+ "eval_top1_accuracy": 0.993421052631579,
309
+ "step": 456
310
+ },
311
+ {
312
+ "epoch": 13.0,
313
+ "train_accuracy": 0.9912280701754386
314
+ },
315
+ {
316
+ "epoch": 13.0,
317
+ "grad_norm": 1.5518372058868408,
318
+ "learning_rate": 1.9813886204892037e-05,
319
+ "loss": 0.0492,
320
+ "step": 494
321
+ },
322
+ {
323
+ "epoch": 13.0,
324
+ "eval_accuracy": 0.9878787878787879,
325
+ "eval_error_rate": 0.012121212121212088,
326
+ "eval_f1": 0.9867362170674966,
327
+ "eval_loss": 0.04395502433180809,
328
+ "eval_precision": 0.9872349657566376,
329
+ "eval_recall": 0.9868421052631579,
330
+ "eval_runtime": 54.1085,
331
+ "eval_samples_per_second": 2.809,
332
+ "eval_steps_per_second": 0.092,
333
+ "eval_top1_accuracy": 0.9868421052631579,
334
+ "step": 494
335
+ },
336
+ {
337
+ "epoch": 13.0,
338
+ "step": 494,
339
+ "total_flos": 3.972506461105029e+17,
340
+ "train_loss": 0.8004542765347099,
341
+ "train_runtime": 7158.591,
342
+ "train_samples_per_second": 5.096,
343
+ "train_steps_per_second": 0.159
344
+ }
345
+ ],
346
+ "logging_steps": 500,
347
+ "max_steps": 1140,
348
+ "num_input_tokens_seen": 0,
349
+ "num_train_epochs": 30,
350
+ "save_steps": 500,
351
+ "stateful_callbacks": {
352
+ "EarlyStoppingCallback": {
353
+ "args": {
354
+ "early_stopping_patience": 2,
355
+ "early_stopping_threshold": 0.0
356
+ },
357
+ "attributes": {
358
+ "early_stopping_patience_counter": 2
359
+ }
360
+ },
361
+ "TrainerControl": {
362
+ "args": {
363
+ "should_epoch_stop": false,
364
+ "should_evaluate": false,
365
+ "should_log": false,
366
+ "should_save": true,
367
+ "should_training_stop": true
368
+ },
369
+ "attributes": {}
370
+ }
371
+ },
372
+ "total_flos": 3.972506461105029e+17,
373
+ "train_batch_size": 32,
374
+ "trial_name": null,
375
+ "trial_params": null
376
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b32c9a024fd0fe5d70f0247b1ad721619779776bf233e0e5351cdb8332e17696
3
+ size 5176