Augusto777 commited on
Commit
5ec8984
·
verified ·
1 Parent(s): b49daa3

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8709677419354839
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224](https://huggingface.co/microsoft/beit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.4334
36
- - Accuracy: 0.8710
37
 
38
  ## Model description
39
 
@@ -67,42 +67,46 @@ The following hyperparameters were used during training:
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
- | No log | 0.89 | 4 | 1.3799 | 0.5 |
71
- | No log | 2.0 | 9 | 1.3424 | 0.4516 |
72
- | 1.3727 | 2.89 | 13 | 1.2846 | 0.4516 |
73
- | 1.3727 | 4.0 | 18 | 1.1867 | 0.4516 |
74
- | 1.262 | 4.89 | 22 | 1.1083 | 0.4516 |
75
- | 1.262 | 6.0 | 27 | 1.0096 | 0.7419 |
76
- | 1.0651 | 6.89 | 31 | 0.9655 | 0.6290 |
77
- | 1.0651 | 8.0 | 36 | 0.7953 | 0.8065 |
78
- | 0.8729 | 8.89 | 40 | 0.8145 | 0.7742 |
79
- | 0.8729 | 10.0 | 45 | 0.6767 | 0.7903 |
80
- | 0.8729 | 10.89 | 49 | 0.6978 | 0.7419 |
81
- | 0.7206 | 12.0 | 54 | 0.7585 | 0.6935 |
82
- | 0.7206 | 12.89 | 58 | 0.5204 | 0.8387 |
83
- | 0.5783 | 14.0 | 63 | 0.6260 | 0.7903 |
84
- | 0.5783 | 14.89 | 67 | 0.4707 | 0.8387 |
85
- | 0.4827 | 16.0 | 72 | 0.6392 | 0.8065 |
86
- | 0.4827 | 16.89 | 76 | 0.4334 | 0.8710 |
87
- | 0.4465 | 18.0 | 81 | 0.5769 | 0.7903 |
88
- | 0.4465 | 18.89 | 85 | 0.5047 | 0.8226 |
89
- | 0.4166 | 20.0 | 90 | 0.5191 | 0.8710 |
90
- | 0.4166 | 20.89 | 94 | 0.5115 | 0.8226 |
91
- | 0.4166 | 22.0 | 99 | 0.6454 | 0.7419 |
92
- | 0.4209 | 22.89 | 103 | 0.5939 | 0.7742 |
93
- | 0.4209 | 24.0 | 108 | 0.4799 | 0.8710 |
94
- | 0.3429 | 24.89 | 112 | 0.5511 | 0.8065 |
95
- | 0.3429 | 26.0 | 117 | 0.4977 | 0.8065 |
96
- | 0.3344 | 26.89 | 121 | 0.4180 | 0.8710 |
97
- | 0.3344 | 28.0 | 126 | 0.5254 | 0.8226 |
98
- | 0.3301 | 28.89 | 130 | 0.5396 | 0.8226 |
99
- | 0.3301 | 30.0 | 135 | 0.5654 | 0.7903 |
100
- | 0.3301 | 30.89 | 139 | 0.5932 | 0.8065 |
101
- | 0.3262 | 32.0 | 144 | 0.5630 | 0.8065 |
102
- | 0.3262 | 32.89 | 148 | 0.5434 | 0.8065 |
103
- | 0.2997 | 34.0 | 153 | 0.5314 | 0.8065 |
104
- | 0.2997 | 34.89 | 157 | 0.5195 | 0.8065 |
105
- | 0.2891 | 35.56 | 160 | 0.5155 | 0.8065 |
 
 
 
 
106
 
107
 
108
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8317757009345794
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224](https://huggingface.co/microsoft/beit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.5393
36
+ - Accuracy: 0.8318
37
 
38
  ## Model description
39
 
 
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | No log | 1.0 | 8 | 0.6887 | 0.5888 |
71
+ | 0.692 | 2.0 | 16 | 0.6782 | 0.5888 |
72
+ | 0.6801 | 3.0 | 24 | 0.6669 | 0.5888 |
73
+ | 0.6696 | 4.0 | 32 | 0.6644 | 0.5888 |
74
+ | 0.6607 | 5.0 | 40 | 0.6661 | 0.6636 |
75
+ | 0.6607 | 6.0 | 48 | 0.6241 | 0.6542 |
76
+ | 0.6341 | 7.0 | 56 | 0.6235 | 0.6542 |
77
+ | 0.6089 | 8.0 | 64 | 0.6088 | 0.6916 |
78
+ | 0.6095 | 9.0 | 72 | 0.5912 | 0.6916 |
79
+ | 0.5632 | 10.0 | 80 | 0.6607 | 0.6355 |
80
+ | 0.5632 | 11.0 | 88 | 0.5793 | 0.7009 |
81
+ | 0.5418 | 12.0 | 96 | 0.5953 | 0.6822 |
82
+ | 0.5336 | 13.0 | 104 | 0.5793 | 0.7103 |
83
+ | 0.5102 | 14.0 | 112 | 0.5292 | 0.7196 |
84
+ | 0.4762 | 15.0 | 120 | 0.6558 | 0.7009 |
85
+ | 0.4762 | 16.0 | 128 | 0.5371 | 0.7103 |
86
+ | 0.544 | 17.0 | 136 | 0.5401 | 0.7570 |
87
+ | 0.4256 | 18.0 | 144 | 0.4927 | 0.7944 |
88
+ | 0.4082 | 19.0 | 152 | 0.5801 | 0.7383 |
89
+ | 0.4014 | 20.0 | 160 | 0.5823 | 0.7383 |
90
+ | 0.4014 | 21.0 | 168 | 0.5393 | 0.7757 |
91
+ | 0.3483 | 22.0 | 176 | 0.5941 | 0.7103 |
92
+ | 0.3121 | 23.0 | 184 | 0.5569 | 0.7383 |
93
+ | 0.3484 | 24.0 | 192 | 0.5975 | 0.7664 |
94
+ | 0.263 | 25.0 | 200 | 0.6544 | 0.7570 |
95
+ | 0.263 | 26.0 | 208 | 0.5744 | 0.7757 |
96
+ | 0.2633 | 27.0 | 216 | 0.6095 | 0.7664 |
97
+ | 0.2935 | 28.0 | 224 | 0.5286 | 0.7664 |
98
+ | 0.2332 | 29.0 | 232 | 0.6028 | 0.7850 |
99
+ | 0.2314 | 30.0 | 240 | 0.5935 | 0.7944 |
100
+ | 0.2314 | 31.0 | 248 | 0.5393 | 0.8318 |
101
+ | 0.202 | 32.0 | 256 | 0.5556 | 0.8224 |
102
+ | 0.2127 | 33.0 | 264 | 0.5913 | 0.8037 |
103
+ | 0.2035 | 34.0 | 272 | 0.5337 | 0.8037 |
104
+ | 0.2618 | 35.0 | 280 | 0.6221 | 0.8037 |
105
+ | 0.2618 | 36.0 | 288 | 0.5090 | 0.8318 |
106
+ | 0.217 | 37.0 | 296 | 0.5649 | 0.8224 |
107
+ | 0.2111 | 38.0 | 304 | 0.5683 | 0.8131 |
108
+ | 0.2085 | 39.0 | 312 | 0.5398 | 0.8224 |
109
+ | 0.1912 | 40.0 | 320 | 0.5548 | 0.8224 |
110
 
111
 
112
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 35.56,
3
- "eval_accuracy": 0.8709677419354839,
4
- "eval_loss": 0.43338534235954285,
5
- "eval_runtime": 2.5246,
6
- "eval_samples_per_second": 24.559,
7
- "eval_steps_per_second": 1.584,
8
- "train_loss": 0.5975452944636345,
9
- "train_runtime": 588.8758,
10
- "train_samples_per_second": 19.563,
11
- "train_steps_per_second": 0.272
12
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 0.8317757009345794,
4
+ "eval_loss": 0.5392867922782898,
5
+ "eval_runtime": 2.3105,
6
+ "eval_samples_per_second": 46.31,
7
+ "eval_steps_per_second": 3.03,
8
+ "train_loss": 0.4049976162612438,
9
+ "train_runtime": 701.3243,
10
+ "train_samples_per_second": 28.403,
11
+ "train_steps_per_second": 0.456
12
  }
config.json CHANGED
@@ -14,19 +14,15 @@
14
  "hidden_dropout_prob": 0.0,
15
  "hidden_size": 768,
16
  "id2label": {
17
- "0": "active",
18
- "1": "active-inactive",
19
- "2": "healthy",
20
- "3": "inactive"
21
  },
22
  "image_size": 224,
23
  "initializer_range": 0.02,
24
  "intermediate_size": 3072,
25
  "label2id": {
26
- "active": 0,
27
- "active-inactive": 1,
28
- "healthy": 2,
29
- "inactive": 3
30
  },
31
  "layer_norm_eps": 1e-12,
32
  "layer_scale_init_value": 0.1,
 
14
  "hidden_dropout_prob": 0.0,
15
  "hidden_size": 768,
16
  "id2label": {
17
+ "0": "HR",
18
+ "1": "No HR"
 
 
19
  },
20
  "image_size": 224,
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
23
  "label2id": {
24
+ "HR": 0,
25
+ "No HR": 1
 
 
26
  },
27
  "layer_norm_eps": 1e-12,
28
  "layer_scale_init_value": 0.1,
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 35.56,
3
- "eval_accuracy": 0.8709677419354839,
4
- "eval_loss": 0.43338534235954285,
5
- "eval_runtime": 2.5246,
6
- "eval_samples_per_second": 24.559,
7
- "eval_steps_per_second": 1.584
8
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 0.8317757009345794,
4
+ "eval_loss": 0.5392867922782898,
5
+ "eval_runtime": 2.3105,
6
+ "eval_samples_per_second": 46.31,
7
+ "eval_steps_per_second": 3.03
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9461d3c4b7148ad7928d9f22de8764e8b5e4a05aca1aba6bff0b3200b36d426b
3
- size 343086480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eeea300ff3a120329583ab7e00d190047c17c87c67ed1e5a5799d0b845cb144
3
+ size 343080328
runs/Dec01_16-47-09_DESKTOP-SKBE9FB/events.out.tfevents.1733093230.DESKTOP-SKBE9FB.8952.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fb6a8e9bdb89b40f494bf6bdd8f1d6f93f6462b0ae0cbc600c2b435833c9375
3
+ size 23387
runs/Dec01_16-47-09_DESKTOP-SKBE9FB/events.out.tfevents.1733093934.DESKTOP-SKBE9FB.8952.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7b161fffc86cb516b20ac597456df212892f3a4f8d5f9685c06b4258273bb69
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 35.56,
3
- "train_loss": 0.5975452944636345,
4
- "train_runtime": 588.8758,
5
- "train_samples_per_second": 19.563,
6
- "train_steps_per_second": 0.272
7
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "train_loss": 0.4049976162612438,
4
+ "train_runtime": 701.3243,
5
+ "train_samples_per_second": 28.403,
6
+ "train_steps_per_second": 0.456
7
  }
trainer_state.json CHANGED
@@ -1,449 +1,581 @@
1
  {
2
- "best_metric": 0.8709677419354839,
3
- "best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-76",
4
- "epoch": 35.55555555555556,
5
  "eval_steps": 500,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.89,
13
- "eval_accuracy": 0.5,
14
- "eval_loss": 1.379904866218567,
15
- "eval_runtime": 2.315,
16
- "eval_samples_per_second": 26.782,
17
- "eval_steps_per_second": 1.728,
18
- "step": 4
 
 
 
 
 
 
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_accuracy": 0.45161290322580644,
23
- "eval_loss": 1.3424019813537598,
24
- "eval_runtime": 2.1675,
25
- "eval_samples_per_second": 28.604,
26
- "eval_steps_per_second": 1.845,
27
- "step": 9
28
  },
29
  {
30
- "epoch": 2.22,
31
  "learning_rate": 3.125e-05,
32
- "loss": 1.3727,
33
- "step": 10
 
 
 
 
 
 
 
 
 
34
  },
35
  {
36
- "epoch": 2.89,
37
- "eval_accuracy": 0.45161290322580644,
38
- "eval_loss": 1.2845711708068848,
39
- "eval_runtime": 2.2255,
40
- "eval_samples_per_second": 27.858,
41
- "eval_steps_per_second": 1.797,
42
- "step": 13
43
  },
44
  {
45
  "epoch": 4.0,
46
- "eval_accuracy": 0.45161290322580644,
47
- "eval_loss": 1.186702847480774,
48
- "eval_runtime": 2.174,
49
- "eval_samples_per_second": 28.519,
50
- "eval_steps_per_second": 1.84,
51
- "step": 18
52
  },
53
  {
54
- "epoch": 4.44,
55
  "learning_rate": 4.8611111111111115e-05,
56
- "loss": 1.262,
57
- "step": 20
58
  },
59
  {
60
- "epoch": 4.89,
61
- "eval_accuracy": 0.45161290322580644,
62
- "eval_loss": 1.1083455085754395,
63
- "eval_runtime": 2.2285,
64
- "eval_samples_per_second": 27.821,
65
- "eval_steps_per_second": 1.795,
66
- "step": 22
67
  },
68
  {
69
  "epoch": 6.0,
70
- "eval_accuracy": 0.7419354838709677,
71
- "eval_loss": 1.0096312761306763,
72
- "eval_runtime": 2.3886,
73
- "eval_samples_per_second": 25.957,
74
- "eval_steps_per_second": 1.675,
75
- "step": 27
76
  },
77
  {
78
- "epoch": 6.67,
79
- "learning_rate": 4.5138888888888894e-05,
80
- "loss": 1.0651,
81
- "step": 30
82
  },
83
  {
84
- "epoch": 6.89,
85
- "eval_accuracy": 0.6290322580645161,
86
- "eval_loss": 0.965541660785675,
87
- "eval_runtime": 2.1775,
88
- "eval_samples_per_second": 28.473,
89
- "eval_steps_per_second": 1.837,
90
- "step": 31
 
 
 
 
 
 
91
  },
92
  {
93
  "epoch": 8.0,
94
- "eval_accuracy": 0.8064516129032258,
95
- "eval_loss": 0.7952606678009033,
96
- "eval_runtime": 2.2276,
97
- "eval_samples_per_second": 27.833,
98
- "eval_steps_per_second": 1.796,
99
- "step": 36
100
  },
101
  {
102
- "epoch": 8.89,
103
- "learning_rate": 4.166666666666667e-05,
104
- "loss": 0.8729,
105
- "step": 40
106
  },
107
  {
108
- "epoch": 8.89,
109
- "eval_accuracy": 0.7741935483870968,
110
- "eval_loss": 0.8145327568054199,
111
- "eval_runtime": 2.2085,
112
- "eval_samples_per_second": 28.073,
113
- "eval_steps_per_second": 1.811,
114
- "step": 40
115
  },
116
  {
117
  "epoch": 10.0,
118
- "eval_accuracy": 0.7903225806451613,
119
- "eval_loss": 0.6766544580459595,
120
- "eval_runtime": 2.6727,
121
- "eval_samples_per_second": 23.198,
122
- "eval_steps_per_second": 1.497,
123
- "step": 45
124
  },
125
  {
126
- "epoch": 10.89,
127
- "eval_accuracy": 0.7419354838709677,
128
- "eval_loss": 0.6978453993797302,
129
- "eval_runtime": 2.1745,
130
- "eval_samples_per_second": 28.512,
131
- "eval_steps_per_second": 1.839,
132
- "step": 49
133
  },
134
  {
135
- "epoch": 11.11,
136
- "learning_rate": 3.8194444444444444e-05,
137
- "loss": 0.7206,
138
- "step": 50
 
 
 
 
 
 
 
 
 
139
  },
140
  {
141
  "epoch": 12.0,
142
- "eval_accuracy": 0.6935483870967742,
143
- "eval_loss": 0.7585192918777466,
144
- "eval_runtime": 2.1875,
145
- "eval_samples_per_second": 28.342,
146
- "eval_steps_per_second": 1.829,
147
- "step": 54
148
  },
149
  {
150
- "epoch": 12.89,
151
- "eval_accuracy": 0.8387096774193549,
152
- "eval_loss": 0.5204325318336487,
153
- "eval_runtime": 2.2877,
154
- "eval_samples_per_second": 27.101,
155
- "eval_steps_per_second": 1.748,
156
- "step": 58
157
  },
158
  {
159
- "epoch": 13.33,
160
- "learning_rate": 3.472222222222222e-05,
161
- "loss": 0.5783,
162
- "step": 60
 
 
 
 
 
 
 
 
 
163
  },
164
  {
165
  "epoch": 14.0,
166
- "eval_accuracy": 0.7903225806451613,
167
- "eval_loss": 0.6260057687759399,
168
- "eval_runtime": 2.148,
169
- "eval_samples_per_second": 28.864,
170
- "eval_steps_per_second": 1.862,
171
- "step": 63
172
  },
173
  {
174
- "epoch": 14.89,
175
- "eval_accuracy": 0.8387096774193549,
176
- "eval_loss": 0.47068876028060913,
177
- "eval_runtime": 2.2305,
178
- "eval_samples_per_second": 27.796,
179
- "eval_steps_per_second": 1.793,
180
- "step": 67
181
  },
182
  {
183
- "epoch": 15.56,
184
- "learning_rate": 3.125e-05,
185
- "loss": 0.4827,
186
- "step": 70
 
 
 
187
  },
188
  {
189
  "epoch": 16.0,
190
- "eval_accuracy": 0.8064516129032258,
191
- "eval_loss": 0.6392358541488647,
192
- "eval_runtime": 2.274,
193
- "eval_samples_per_second": 27.264,
194
- "eval_steps_per_second": 1.759,
195
- "step": 72
196
  },
197
  {
198
- "epoch": 16.89,
199
- "eval_accuracy": 0.8709677419354839,
200
- "eval_loss": 0.43338534235954285,
201
- "eval_runtime": 2.2445,
202
- "eval_samples_per_second": 27.623,
203
- "eval_steps_per_second": 1.782,
204
- "step": 76
205
  },
206
  {
207
- "epoch": 17.78,
208
- "learning_rate": 2.777777777777778e-05,
209
- "loss": 0.4465,
210
- "step": 80
 
 
 
 
 
 
 
 
 
211
  },
212
  {
213
  "epoch": 18.0,
214
- "eval_accuracy": 0.7903225806451613,
215
- "eval_loss": 0.5768514275550842,
216
- "eval_runtime": 2.222,
217
- "eval_samples_per_second": 27.902,
218
- "eval_steps_per_second": 1.8,
219
- "step": 81
 
 
 
 
 
 
220
  },
221
  {
222
- "epoch": 18.89,
223
- "eval_accuracy": 0.8225806451612904,
224
- "eval_loss": 0.5046879053115845,
225
- "eval_runtime": 2.2336,
226
- "eval_samples_per_second": 27.758,
227
- "eval_steps_per_second": 1.791,
228
- "step": 85
229
  },
230
  {
231
  "epoch": 20.0,
232
- "learning_rate": 2.4305555555555558e-05,
233
- "loss": 0.4166,
234
- "step": 90
235
  },
236
  {
237
  "epoch": 20.0,
238
- "eval_accuracy": 0.8709677419354839,
239
- "eval_loss": 0.5190597772598267,
240
- "eval_runtime": 2.2074,
241
- "eval_samples_per_second": 28.087,
242
- "eval_steps_per_second": 1.812,
243
- "step": 90
244
  },
245
  {
246
- "epoch": 20.89,
247
- "eval_accuracy": 0.8225806451612904,
248
- "eval_loss": 0.5115006566047668,
249
- "eval_runtime": 2.1835,
250
- "eval_samples_per_second": 28.395,
251
- "eval_steps_per_second": 1.832,
252
- "step": 94
 
 
 
 
 
 
253
  },
254
  {
255
  "epoch": 22.0,
256
- "eval_accuracy": 0.7419354838709677,
257
- "eval_loss": 0.6454288959503174,
258
- "eval_runtime": 2.19,
259
- "eval_samples_per_second": 28.31,
260
- "eval_steps_per_second": 1.826,
261
- "step": 99
262
  },
263
  {
264
- "epoch": 22.22,
265
- "learning_rate": 2.0833333333333336e-05,
266
- "loss": 0.4209,
267
- "step": 100
 
 
 
 
 
 
 
 
 
268
  },
269
  {
270
- "epoch": 22.89,
271
- "eval_accuracy": 0.7741935483870968,
272
- "eval_loss": 0.5938693881034851,
273
- "eval_runtime": 2.135,
274
- "eval_samples_per_second": 29.04,
275
- "eval_steps_per_second": 1.874,
276
- "step": 103
277
  },
278
  {
279
  "epoch": 24.0,
280
- "eval_accuracy": 0.8709677419354839,
281
- "eval_loss": 0.4799472391605377,
282
- "eval_runtime": 2.2005,
283
- "eval_samples_per_second": 28.175,
284
- "eval_steps_per_second": 1.818,
285
- "step": 108
286
  },
287
  {
288
- "epoch": 24.44,
289
- "learning_rate": 1.736111111111111e-05,
290
- "loss": 0.3429,
291
- "step": 110
292
  },
293
  {
294
- "epoch": 24.89,
295
- "eval_accuracy": 0.8064516129032258,
296
- "eval_loss": 0.5510820150375366,
297
- "eval_runtime": 2.216,
298
- "eval_samples_per_second": 27.978,
299
- "eval_steps_per_second": 1.805,
300
- "step": 112
301
  },
302
  {
303
  "epoch": 26.0,
304
- "eval_accuracy": 0.8064516129032258,
305
- "eval_loss": 0.4977356493473053,
306
- "eval_runtime": 2.1565,
307
- "eval_samples_per_second": 28.75,
308
- "eval_steps_per_second": 1.855,
309
- "step": 117
310
  },
311
  {
312
- "epoch": 26.67,
313
- "learning_rate": 1.388888888888889e-05,
314
- "loss": 0.3344,
315
- "step": 120
 
 
 
 
 
 
 
 
 
316
  },
317
  {
318
- "epoch": 26.89,
319
- "eval_accuracy": 0.8709677419354839,
320
- "eval_loss": 0.41801995038986206,
321
- "eval_runtime": 2.357,
322
- "eval_samples_per_second": 26.304,
323
- "eval_steps_per_second": 1.697,
324
- "step": 121
325
  },
326
  {
327
  "epoch": 28.0,
328
- "eval_accuracy": 0.8225806451612904,
329
- "eval_loss": 0.5254219770431519,
330
- "eval_runtime": 2.3326,
331
- "eval_samples_per_second": 26.58,
332
- "eval_steps_per_second": 1.715,
333
- "step": 126
334
  },
335
  {
336
- "epoch": 28.89,
337
- "learning_rate": 1.0416666666666668e-05,
338
- "loss": 0.3301,
339
- "step": 130
340
  },
341
  {
342
- "epoch": 28.89,
343
- "eval_accuracy": 0.8225806451612904,
344
- "eval_loss": 0.5396197438240051,
345
- "eval_runtime": 2.0979,
346
- "eval_samples_per_second": 29.553,
347
- "eval_steps_per_second": 1.907,
348
- "step": 130
 
 
 
 
 
 
349
  },
350
  {
351
  "epoch": 30.0,
352
- "eval_accuracy": 0.7903225806451613,
353
- "eval_loss": 0.5654030442237854,
354
- "eval_runtime": 2.3721,
355
- "eval_samples_per_second": 26.138,
356
- "eval_steps_per_second": 1.686,
357
- "step": 135
358
  },
359
  {
360
- "epoch": 30.89,
361
- "eval_accuracy": 0.8064516129032258,
362
- "eval_loss": 0.5931597948074341,
363
- "eval_runtime": 2.252,
364
- "eval_samples_per_second": 27.531,
365
- "eval_steps_per_second": 1.776,
366
- "step": 139
367
  },
368
  {
369
- "epoch": 31.11,
370
- "learning_rate": 6.944444444444445e-06,
371
- "loss": 0.3262,
372
- "step": 140
373
  },
374
  {
375
  "epoch": 32.0,
376
- "eval_accuracy": 0.8064516129032258,
377
- "eval_loss": 0.5630321502685547,
378
- "eval_runtime": 2.111,
379
- "eval_samples_per_second": 29.37,
380
- "eval_steps_per_second": 1.895,
381
- "step": 144
382
  },
383
  {
384
- "epoch": 32.89,
385
- "eval_accuracy": 0.8064516129032258,
386
- "eval_loss": 0.5433912873268127,
387
- "eval_runtime": 2.2255,
388
- "eval_samples_per_second": 27.858,
389
- "eval_steps_per_second": 1.797,
390
- "step": 148
391
  },
392
  {
393
- "epoch": 33.33,
394
- "learning_rate": 3.4722222222222224e-06,
395
- "loss": 0.2997,
396
- "step": 150
 
 
 
 
 
 
 
 
 
397
  },
398
  {
399
  "epoch": 34.0,
400
- "eval_accuracy": 0.8064516129032258,
401
- "eval_loss": 0.5314125418663025,
402
- "eval_runtime": 2.275,
403
- "eval_samples_per_second": 27.252,
404
- "eval_steps_per_second": 1.758,
405
- "step": 153
406
  },
407
  {
408
- "epoch": 34.89,
409
- "eval_accuracy": 0.8064516129032258,
410
- "eval_loss": 0.5194743275642395,
411
- "eval_runtime": 2.1655,
412
- "eval_samples_per_second": 28.631,
413
- "eval_steps_per_second": 1.847,
414
- "step": 157
415
  },
416
  {
417
- "epoch": 35.56,
418
- "learning_rate": 0.0,
419
- "loss": 0.2891,
420
- "step": 160
 
 
 
421
  },
422
  {
423
- "epoch": 35.56,
424
- "eval_accuracy": 0.8064516129032258,
425
- "eval_loss": 0.5155302286148071,
426
- "eval_runtime": 2.3866,
427
- "eval_samples_per_second": 25.979,
428
- "eval_steps_per_second": 1.676,
429
- "step": 160
430
  },
431
  {
432
- "epoch": 35.56,
433
- "step": 160,
434
- "total_flos": 7.931930389512192e+17,
435
- "train_loss": 0.5975452944636345,
436
- "train_runtime": 588.8758,
437
- "train_samples_per_second": 19.563,
438
- "train_steps_per_second": 0.272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  }
440
  ],
441
  "logging_steps": 10,
442
- "max_steps": 160,
443
  "num_input_tokens_seen": 0,
444
  "num_train_epochs": 40,
445
  "save_steps": 500,
446
- "total_flos": 7.931930389512192e+17,
447
  "train_batch_size": 16,
448
  "trial_name": null,
449
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8317757009345794,
3
+ "best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-248",
4
+ "epoch": 40.0,
5
  "eval_steps": 500,
6
+ "global_step": 320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.5887850467289719,
14
+ "eval_loss": 0.6887365579605103,
15
+ "eval_runtime": 2.8498,
16
+ "eval_samples_per_second": 37.546,
17
+ "eval_steps_per_second": 2.456,
18
+ "step": 8
19
+ },
20
+ {
21
+ "epoch": 1.25,
22
+ "learning_rate": 1.5625e-05,
23
+ "loss": 0.692,
24
+ "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.5887850467289719,
29
+ "eval_loss": 0.678210437297821,
30
+ "eval_runtime": 1.9785,
31
+ "eval_samples_per_second": 54.082,
32
+ "eval_steps_per_second": 3.538,
33
+ "step": 16
34
  },
35
  {
36
+ "epoch": 2.5,
37
  "learning_rate": 3.125e-05,
38
+ "loss": 0.6801,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 3.0,
43
+ "eval_accuracy": 0.5887850467289719,
44
+ "eval_loss": 0.6668981313705444,
45
+ "eval_runtime": 2.0656,
46
+ "eval_samples_per_second": 51.802,
47
+ "eval_steps_per_second": 3.389,
48
+ "step": 24
49
  },
50
  {
51
+ "epoch": 3.75,
52
+ "learning_rate": 4.6875e-05,
53
+ "loss": 0.6696,
54
+ "step": 30
 
 
 
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "eval_accuracy": 0.5887850467289719,
59
+ "eval_loss": 0.6644209623336792,
60
+ "eval_runtime": 2.1267,
61
+ "eval_samples_per_second": 50.313,
62
+ "eval_steps_per_second": 3.291,
63
+ "step": 32
64
  },
65
  {
66
+ "epoch": 5.0,
67
  "learning_rate": 4.8611111111111115e-05,
68
+ "loss": 0.6607,
69
+ "step": 40
70
  },
71
  {
72
+ "epoch": 5.0,
73
+ "eval_accuracy": 0.6635514018691588,
74
+ "eval_loss": 0.6661449074745178,
75
+ "eval_runtime": 1.995,
76
+ "eval_samples_per_second": 53.635,
77
+ "eval_steps_per_second": 3.509,
78
+ "step": 40
79
  },
80
  {
81
  "epoch": 6.0,
82
+ "eval_accuracy": 0.6542056074766355,
83
+ "eval_loss": 0.6241438388824463,
84
+ "eval_runtime": 2.0189,
85
+ "eval_samples_per_second": 52.999,
86
+ "eval_steps_per_second": 3.467,
87
+ "step": 48
88
  },
89
  {
90
+ "epoch": 6.25,
91
+ "learning_rate": 4.6875e-05,
92
+ "loss": 0.6341,
93
+ "step": 50
94
  },
95
  {
96
+ "epoch": 7.0,
97
+ "eval_accuracy": 0.6542056074766355,
98
+ "eval_loss": 0.6234968900680542,
99
+ "eval_runtime": 1.9955,
100
+ "eval_samples_per_second": 53.622,
101
+ "eval_steps_per_second": 3.508,
102
+ "step": 56
103
+ },
104
+ {
105
+ "epoch": 7.5,
106
+ "learning_rate": 4.5138888888888894e-05,
107
+ "loss": 0.6089,
108
+ "step": 60
109
  },
110
  {
111
  "epoch": 8.0,
112
+ "eval_accuracy": 0.6915887850467289,
113
+ "eval_loss": 0.6088296175003052,
114
+ "eval_runtime": 2.06,
115
+ "eval_samples_per_second": 51.942,
116
+ "eval_steps_per_second": 3.398,
117
+ "step": 64
118
  },
119
  {
120
+ "epoch": 8.75,
121
+ "learning_rate": 4.340277777777778e-05,
122
+ "loss": 0.6095,
123
+ "step": 70
124
  },
125
  {
126
+ "epoch": 9.0,
127
+ "eval_accuracy": 0.6915887850467289,
128
+ "eval_loss": 0.5911644101142883,
129
+ "eval_runtime": 2.01,
130
+ "eval_samples_per_second": 53.235,
131
+ "eval_steps_per_second": 3.483,
132
+ "step": 72
133
  },
134
  {
135
  "epoch": 10.0,
136
+ "learning_rate": 4.166666666666667e-05,
137
+ "loss": 0.5632,
138
+ "step": 80
 
 
 
139
  },
140
  {
141
+ "epoch": 10.0,
142
+ "eval_accuracy": 0.6355140186915887,
143
+ "eval_loss": 0.660692036151886,
144
+ "eval_runtime": 2.126,
145
+ "eval_samples_per_second": 50.329,
146
+ "eval_steps_per_second": 3.293,
147
+ "step": 80
148
  },
149
  {
150
+ "epoch": 11.0,
151
+ "eval_accuracy": 0.7009345794392523,
152
+ "eval_loss": 0.5792553424835205,
153
+ "eval_runtime": 1.972,
154
+ "eval_samples_per_second": 54.261,
155
+ "eval_steps_per_second": 3.55,
156
+ "step": 88
157
+ },
158
+ {
159
+ "epoch": 11.25,
160
+ "learning_rate": 3.993055555555556e-05,
161
+ "loss": 0.5418,
162
+ "step": 90
163
  },
164
  {
165
  "epoch": 12.0,
166
+ "eval_accuracy": 0.6822429906542056,
167
+ "eval_loss": 0.5953279733657837,
168
+ "eval_runtime": 2.2228,
169
+ "eval_samples_per_second": 48.137,
170
+ "eval_steps_per_second": 3.149,
171
+ "step": 96
172
  },
173
  {
174
+ "epoch": 12.5,
175
+ "learning_rate": 3.8194444444444444e-05,
176
+ "loss": 0.5336,
177
+ "step": 100
 
 
 
178
  },
179
  {
180
+ "epoch": 13.0,
181
+ "eval_accuracy": 0.7102803738317757,
182
+ "eval_loss": 0.5792534947395325,
183
+ "eval_runtime": 1.991,
184
+ "eval_samples_per_second": 53.742,
185
+ "eval_steps_per_second": 3.516,
186
+ "step": 104
187
+ },
188
+ {
189
+ "epoch": 13.75,
190
+ "learning_rate": 3.6458333333333336e-05,
191
+ "loss": 0.5102,
192
+ "step": 110
193
  },
194
  {
195
  "epoch": 14.0,
196
+ "eval_accuracy": 0.719626168224299,
197
+ "eval_loss": 0.5291872024536133,
198
+ "eval_runtime": 2.0585,
199
+ "eval_samples_per_second": 51.98,
200
+ "eval_steps_per_second": 3.401,
201
+ "step": 112
202
  },
203
  {
204
+ "epoch": 15.0,
205
+ "learning_rate": 3.472222222222222e-05,
206
+ "loss": 0.4762,
207
+ "step": 120
 
 
 
208
  },
209
  {
210
+ "epoch": 15.0,
211
+ "eval_accuracy": 0.7009345794392523,
212
+ "eval_loss": 0.6557727456092834,
213
+ "eval_runtime": 1.9985,
214
+ "eval_samples_per_second": 53.541,
215
+ "eval_steps_per_second": 3.503,
216
+ "step": 120
217
  },
218
  {
219
  "epoch": 16.0,
220
+ "eval_accuracy": 0.7102803738317757,
221
+ "eval_loss": 0.5371208786964417,
222
+ "eval_runtime": 1.971,
223
+ "eval_samples_per_second": 54.288,
224
+ "eval_steps_per_second": 3.552,
225
+ "step": 128
226
  },
227
  {
228
+ "epoch": 16.25,
229
+ "learning_rate": 3.2986111111111115e-05,
230
+ "loss": 0.544,
231
+ "step": 130
 
 
 
232
  },
233
  {
234
+ "epoch": 17.0,
235
+ "eval_accuracy": 0.7570093457943925,
236
+ "eval_loss": 0.5400705337524414,
237
+ "eval_runtime": 1.981,
238
+ "eval_samples_per_second": 54.014,
239
+ "eval_steps_per_second": 3.534,
240
+ "step": 136
241
+ },
242
+ {
243
+ "epoch": 17.5,
244
+ "learning_rate": 3.125e-05,
245
+ "loss": 0.4256,
246
+ "step": 140
247
  },
248
  {
249
  "epoch": 18.0,
250
+ "eval_accuracy": 0.794392523364486,
251
+ "eval_loss": 0.49267861247062683,
252
+ "eval_runtime": 1.9622,
253
+ "eval_samples_per_second": 54.531,
254
+ "eval_steps_per_second": 3.567,
255
+ "step": 144
256
+ },
257
+ {
258
+ "epoch": 18.75,
259
+ "learning_rate": 2.951388888888889e-05,
260
+ "loss": 0.4082,
261
+ "step": 150
262
  },
263
  {
264
+ "epoch": 19.0,
265
+ "eval_accuracy": 0.7383177570093458,
266
+ "eval_loss": 0.5800967216491699,
267
+ "eval_runtime": 1.9825,
268
+ "eval_samples_per_second": 53.973,
269
+ "eval_steps_per_second": 3.531,
270
+ "step": 152
271
  },
272
  {
273
  "epoch": 20.0,
274
+ "learning_rate": 2.777777777777778e-05,
275
+ "loss": 0.4014,
276
+ "step": 160
277
  },
278
  {
279
  "epoch": 20.0,
280
+ "eval_accuracy": 0.7383177570093458,
281
+ "eval_loss": 0.5822688937187195,
282
+ "eval_runtime": 1.985,
283
+ "eval_samples_per_second": 53.905,
284
+ "eval_steps_per_second": 3.526,
285
+ "step": 160
286
  },
287
  {
288
+ "epoch": 21.0,
289
+ "eval_accuracy": 0.7757009345794392,
290
+ "eval_loss": 0.5392723083496094,
291
+ "eval_runtime": 2.117,
292
+ "eval_samples_per_second": 50.542,
293
+ "eval_steps_per_second": 3.306,
294
+ "step": 168
295
+ },
296
+ {
297
+ "epoch": 21.25,
298
+ "learning_rate": 2.604166666666667e-05,
299
+ "loss": 0.3483,
300
+ "step": 170
301
  },
302
  {
303
  "epoch": 22.0,
304
+ "eval_accuracy": 0.7102803738317757,
305
+ "eval_loss": 0.5940819382667542,
306
+ "eval_runtime": 1.9765,
307
+ "eval_samples_per_second": 54.137,
308
+ "eval_steps_per_second": 3.542,
309
+ "step": 176
310
  },
311
  {
312
+ "epoch": 22.5,
313
+ "learning_rate": 2.4305555555555558e-05,
314
+ "loss": 0.3121,
315
+ "step": 180
316
+ },
317
+ {
318
+ "epoch": 23.0,
319
+ "eval_accuracy": 0.7383177570093458,
320
+ "eval_loss": 0.5568514466285706,
321
+ "eval_runtime": 2.1005,
322
+ "eval_samples_per_second": 50.94,
323
+ "eval_steps_per_second": 3.333,
324
+ "step": 184
325
  },
326
  {
327
+ "epoch": 23.75,
328
+ "learning_rate": 2.2569444444444447e-05,
329
+ "loss": 0.3484,
330
+ "step": 190
 
 
 
331
  },
332
  {
333
  "epoch": 24.0,
334
+ "eval_accuracy": 0.7663551401869159,
335
+ "eval_loss": 0.5975044369697571,
336
+ "eval_runtime": 1.97,
337
+ "eval_samples_per_second": 54.316,
338
+ "eval_steps_per_second": 3.553,
339
+ "step": 192
340
  },
341
  {
342
+ "epoch": 25.0,
343
+ "learning_rate": 2.0833333333333336e-05,
344
+ "loss": 0.263,
345
+ "step": 200
346
  },
347
  {
348
+ "epoch": 25.0,
349
+ "eval_accuracy": 0.7570093457943925,
350
+ "eval_loss": 0.6544022560119629,
351
+ "eval_runtime": 2.3716,
352
+ "eval_samples_per_second": 45.118,
353
+ "eval_steps_per_second": 2.952,
354
+ "step": 200
355
  },
356
  {
357
  "epoch": 26.0,
358
+ "eval_accuracy": 0.7757009345794392,
359
+ "eval_loss": 0.5743973851203918,
360
+ "eval_runtime": 2.0105,
361
+ "eval_samples_per_second": 53.221,
362
+ "eval_steps_per_second": 3.482,
363
+ "step": 208
364
  },
365
  {
366
+ "epoch": 26.25,
367
+ "learning_rate": 1.9097222222222222e-05,
368
+ "loss": 0.2633,
369
+ "step": 210
370
+ },
371
+ {
372
+ "epoch": 27.0,
373
+ "eval_accuracy": 0.7663551401869159,
374
+ "eval_loss": 0.6095036864280701,
375
+ "eval_runtime": 1.9725,
376
+ "eval_samples_per_second": 54.247,
377
+ "eval_steps_per_second": 3.549,
378
+ "step": 216
379
  },
380
  {
381
+ "epoch": 27.5,
382
+ "learning_rate": 1.736111111111111e-05,
383
+ "loss": 0.2935,
384
+ "step": 220
 
 
 
385
  },
386
  {
387
  "epoch": 28.0,
388
+ "eval_accuracy": 0.7663551401869159,
389
+ "eval_loss": 0.528620719909668,
390
+ "eval_runtime": 2.0605,
391
+ "eval_samples_per_second": 51.929,
392
+ "eval_steps_per_second": 3.397,
393
+ "step": 224
394
  },
395
  {
396
+ "epoch": 28.75,
397
+ "learning_rate": 1.5625e-05,
398
+ "loss": 0.2332,
399
+ "step": 230
400
  },
401
  {
402
+ "epoch": 29.0,
403
+ "eval_accuracy": 0.7850467289719626,
404
+ "eval_loss": 0.6027860045433044,
405
+ "eval_runtime": 1.9665,
406
+ "eval_samples_per_second": 54.412,
407
+ "eval_steps_per_second": 3.56,
408
+ "step": 232
409
+ },
410
+ {
411
+ "epoch": 30.0,
412
+ "learning_rate": 1.388888888888889e-05,
413
+ "loss": 0.2314,
414
+ "step": 240
415
  },
416
  {
417
  "epoch": 30.0,
418
+ "eval_accuracy": 0.794392523364486,
419
+ "eval_loss": 0.5935384631156921,
420
+ "eval_runtime": 1.962,
421
+ "eval_samples_per_second": 54.537,
422
+ "eval_steps_per_second": 3.568,
423
+ "step": 240
424
  },
425
  {
426
+ "epoch": 31.0,
427
+ "eval_accuracy": 0.8317757009345794,
428
+ "eval_loss": 0.5392867922782898,
429
+ "eval_runtime": 1.969,
430
+ "eval_samples_per_second": 54.343,
431
+ "eval_steps_per_second": 3.555,
432
+ "step": 248
433
  },
434
  {
435
+ "epoch": 31.25,
436
+ "learning_rate": 1.2152777777777779e-05,
437
+ "loss": 0.202,
438
+ "step": 250
439
  },
440
  {
441
  "epoch": 32.0,
442
+ "eval_accuracy": 0.822429906542056,
443
+ "eval_loss": 0.5556337833404541,
444
+ "eval_runtime": 2.012,
445
+ "eval_samples_per_second": 53.182,
446
+ "eval_steps_per_second": 3.479,
447
+ "step": 256
448
  },
449
  {
450
+ "epoch": 32.5,
451
+ "learning_rate": 1.0416666666666668e-05,
452
+ "loss": 0.2127,
453
+ "step": 260
 
 
 
454
  },
455
  {
456
+ "epoch": 33.0,
457
+ "eval_accuracy": 0.8037383177570093,
458
+ "eval_loss": 0.5912833213806152,
459
+ "eval_runtime": 1.98,
460
+ "eval_samples_per_second": 54.041,
461
+ "eval_steps_per_second": 3.535,
462
+ "step": 264
463
+ },
464
+ {
465
+ "epoch": 33.75,
466
+ "learning_rate": 8.680555555555556e-06,
467
+ "loss": 0.2035,
468
+ "step": 270
469
  },
470
  {
471
  "epoch": 34.0,
472
+ "eval_accuracy": 0.8037383177570093,
473
+ "eval_loss": 0.5337203741073608,
474
+ "eval_runtime": 1.9834,
475
+ "eval_samples_per_second": 53.947,
476
+ "eval_steps_per_second": 3.529,
477
+ "step": 272
478
  },
479
  {
480
+ "epoch": 35.0,
481
+ "learning_rate": 6.944444444444445e-06,
482
+ "loss": 0.2618,
483
+ "step": 280
 
 
 
484
  },
485
  {
486
+ "epoch": 35.0,
487
+ "eval_accuracy": 0.8037383177570093,
488
+ "eval_loss": 0.622107982635498,
489
+ "eval_runtime": 2.0025,
490
+ "eval_samples_per_second": 53.434,
491
+ "eval_steps_per_second": 3.496,
492
+ "step": 280
493
  },
494
  {
495
+ "epoch": 36.0,
496
+ "eval_accuracy": 0.8317757009345794,
497
+ "eval_loss": 0.5089600682258606,
498
+ "eval_runtime": 1.9755,
499
+ "eval_samples_per_second": 54.164,
500
+ "eval_steps_per_second": 3.543,
501
+ "step": 288
502
  },
503
  {
504
+ "epoch": 36.25,
505
+ "learning_rate": 5.208333333333334e-06,
506
+ "loss": 0.217,
507
+ "step": 290
508
+ },
509
+ {
510
+ "epoch": 37.0,
511
+ "eval_accuracy": 0.822429906542056,
512
+ "eval_loss": 0.5649047493934631,
513
+ "eval_runtime": 1.991,
514
+ "eval_samples_per_second": 53.743,
515
+ "eval_steps_per_second": 3.516,
516
+ "step": 296
517
+ },
518
+ {
519
+ "epoch": 37.5,
520
+ "learning_rate": 3.4722222222222224e-06,
521
+ "loss": 0.2111,
522
+ "step": 300
523
+ },
524
+ {
525
+ "epoch": 38.0,
526
+ "eval_accuracy": 0.8130841121495327,
527
+ "eval_loss": 0.568317174911499,
528
+ "eval_runtime": 2.0365,
529
+ "eval_samples_per_second": 52.542,
530
+ "eval_steps_per_second": 3.437,
531
+ "step": 304
532
+ },
533
+ {
534
+ "epoch": 38.75,
535
+ "learning_rate": 1.7361111111111112e-06,
536
+ "loss": 0.2085,
537
+ "step": 310
538
+ },
539
+ {
540
+ "epoch": 39.0,
541
+ "eval_accuracy": 0.822429906542056,
542
+ "eval_loss": 0.5397838950157166,
543
+ "eval_runtime": 1.9935,
544
+ "eval_samples_per_second": 53.675,
545
+ "eval_steps_per_second": 3.511,
546
+ "step": 312
547
+ },
548
+ {
549
+ "epoch": 40.0,
550
+ "learning_rate": 0.0,
551
+ "loss": 0.1912,
552
+ "step": 320
553
+ },
554
+ {
555
+ "epoch": 40.0,
556
+ "eval_accuracy": 0.822429906542056,
557
+ "eval_loss": 0.5548034310340881,
558
+ "eval_runtime": 1.978,
559
+ "eval_samples_per_second": 54.096,
560
+ "eval_steps_per_second": 3.539,
561
+ "step": 320
562
+ },
563
+ {
564
+ "epoch": 40.0,
565
+ "step": 320,
566
+ "total_flos": 1.5429806632629043e+18,
567
+ "train_loss": 0.4049976162612438,
568
+ "train_runtime": 701.3243,
569
+ "train_samples_per_second": 28.403,
570
+ "train_steps_per_second": 0.456
571
  }
572
  ],
573
  "logging_steps": 10,
574
+ "max_steps": 320,
575
  "num_input_tokens_seen": 0,
576
  "num_train_epochs": 40,
577
  "save_steps": 500,
578
+ "total_flos": 1.5429806632629043e+18,
579
  "train_batch_size": 16,
580
  "trial_name": null,
581
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:492f9fd8b047d1211e7f0f6292ed08cde5c5098394b987a9a0ea2f84aaed7e54
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289be5ea4287de61c05dc38506ba40eb212d6de2c0bca319801e20a96f6874c0
3
  size 4728