Augusto777 commited on
Commit
ce6a2c9
·
verified ·
1 Parent(s): 5ec8984

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.8317757009345794
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224](https://huggingface.co/microsoft/beit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.5393
36
- - Accuracy: 0.8318
37
 
38
  ## Model description
39
 
@@ -67,46 +67,42 @@ The following hyperparameters were used during training:
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
- | No log | 1.0 | 8 | 0.6887 | 0.5888 |
71
- | 0.692 | 2.0 | 16 | 0.6782 | 0.5888 |
72
- | 0.6801 | 3.0 | 24 | 0.6669 | 0.5888 |
73
- | 0.6696 | 4.0 | 32 | 0.6644 | 0.5888 |
74
- | 0.6607 | 5.0 | 40 | 0.6661 | 0.6636 |
75
- | 0.6607 | 6.0 | 48 | 0.6241 | 0.6542 |
76
- | 0.6341 | 7.0 | 56 | 0.6235 | 0.6542 |
77
- | 0.6089 | 8.0 | 64 | 0.6088 | 0.6916 |
78
- | 0.6095 | 9.0 | 72 | 0.5912 | 0.6916 |
79
- | 0.5632 | 10.0 | 80 | 0.6607 | 0.6355 |
80
- | 0.5632 | 11.0 | 88 | 0.5793 | 0.7009 |
81
- | 0.5418 | 12.0 | 96 | 0.5953 | 0.6822 |
82
- | 0.5336 | 13.0 | 104 | 0.5793 | 0.7103 |
83
- | 0.5102 | 14.0 | 112 | 0.5292 | 0.7196 |
84
- | 0.4762 | 15.0 | 120 | 0.6558 | 0.7009 |
85
- | 0.4762 | 16.0 | 128 | 0.5371 | 0.7103 |
86
- | 0.544 | 17.0 | 136 | 0.5401 | 0.7570 |
87
- | 0.4256 | 18.0 | 144 | 0.4927 | 0.7944 |
88
- | 0.4082 | 19.0 | 152 | 0.5801 | 0.7383 |
89
- | 0.4014 | 20.0 | 160 | 0.5823 | 0.7383 |
90
- | 0.4014 | 21.0 | 168 | 0.5393 | 0.7757 |
91
- | 0.3483 | 22.0 | 176 | 0.5941 | 0.7103 |
92
- | 0.3121 | 23.0 | 184 | 0.5569 | 0.7383 |
93
- | 0.3484 | 24.0 | 192 | 0.5975 | 0.7664 |
94
- | 0.263 | 25.0 | 200 | 0.6544 | 0.7570 |
95
- | 0.263 | 26.0 | 208 | 0.5744 | 0.7757 |
96
- | 0.2633 | 27.0 | 216 | 0.6095 | 0.7664 |
97
- | 0.2935 | 28.0 | 224 | 0.5286 | 0.7664 |
98
- | 0.2332 | 29.0 | 232 | 0.6028 | 0.7850 |
99
- | 0.2314 | 30.0 | 240 | 0.5935 | 0.7944 |
100
- | 0.2314 | 31.0 | 248 | 0.5393 | 0.8318 |
101
- | 0.202 | 32.0 | 256 | 0.5556 | 0.8224 |
102
- | 0.2127 | 33.0 | 264 | 0.5913 | 0.8037 |
103
- | 0.2035 | 34.0 | 272 | 0.5337 | 0.8037 |
104
- | 0.2618 | 35.0 | 280 | 0.6221 | 0.8037 |
105
- | 0.2618 | 36.0 | 288 | 0.5090 | 0.8318 |
106
- | 0.217 | 37.0 | 296 | 0.5649 | 0.8224 |
107
- | 0.2111 | 38.0 | 304 | 0.5683 | 0.8131 |
108
- | 0.2085 | 39.0 | 312 | 0.5398 | 0.8224 |
109
- | 0.1912 | 40.0 | 320 | 0.5548 | 0.8224 |
110
 
111
 
112
  ### Framework versions
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8225806451612904
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224](https://huggingface.co/microsoft/beit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.4801
36
+ - Accuracy: 0.8226
37
 
38
  ## Model description
39
 
 
67
 
68
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | No log | 0.89 | 4 | 1.7603 | 0.1452 |
71
+ | No log | 2.0 | 9 | 1.6852 | 0.1452 |
72
+ | 1.7571 | 2.89 | 13 | 1.5655 | 0.1452 |
73
+ | 1.7571 | 4.0 | 18 | 1.3816 | 0.1452 |
74
+ | 1.5255 | 4.89 | 22 | 1.2599 | 0.3226 |
75
+ | 1.5255 | 6.0 | 27 | 1.1534 | 0.4839 |
76
+ | 1.2245 | 6.89 | 31 | 1.0641 | 0.4839 |
77
+ | 1.2245 | 8.0 | 36 | 1.0372 | 0.4355 |
78
+ | 1.0438 | 8.89 | 40 | 0.9988 | 0.4355 |
79
+ | 1.0438 | 10.0 | 45 | 0.9260 | 0.5161 |
80
+ | 1.0438 | 10.89 | 49 | 0.9085 | 0.7097 |
81
+ | 0.9727 | 12.0 | 54 | 0.8433 | 0.7258 |
82
+ | 0.9727 | 12.89 | 58 | 0.7529 | 0.7742 |
83
+ | 0.8469 | 14.0 | 63 | 0.7187 | 0.7581 |
84
+ | 0.8469 | 14.89 | 67 | 0.6806 | 0.7258 |
85
+ | 0.6908 | 16.0 | 72 | 0.6576 | 0.7581 |
86
+ | 0.6908 | 16.89 | 76 | 0.5742 | 0.7903 |
87
+ | 0.6064 | 18.0 | 81 | 0.6447 | 0.7581 |
88
+ | 0.6064 | 18.89 | 85 | 0.5602 | 0.7742 |
89
+ | 0.5303 | 20.0 | 90 | 0.4943 | 0.7903 |
90
+ | 0.5303 | 20.89 | 94 | 0.5304 | 0.7903 |
91
+ | 0.5303 | 22.0 | 99 | 0.4801 | 0.8226 |
92
+ | 0.4903 | 22.89 | 103 | 0.4849 | 0.8226 |
93
+ | 0.4903 | 24.0 | 108 | 0.5710 | 0.7742 |
94
+ | 0.4261 | 24.89 | 112 | 0.4803 | 0.7903 |
95
+ | 0.4261 | 26.0 | 117 | 0.5671 | 0.7258 |
96
+ | 0.4122 | 26.89 | 121 | 0.4585 | 0.8065 |
97
+ | 0.4122 | 28.0 | 126 | 0.5910 | 0.7097 |
98
+ | 0.3739 | 28.89 | 130 | 0.5821 | 0.7581 |
99
+ | 0.3739 | 30.0 | 135 | 0.5329 | 0.7742 |
100
+ | 0.3739 | 30.89 | 139 | 0.4423 | 0.8226 |
101
+ | 0.3896 | 32.0 | 144 | 0.4716 | 0.7581 |
102
+ | 0.3896 | 32.89 | 148 | 0.4786 | 0.7903 |
103
+ | 0.3472 | 34.0 | 153 | 0.4538 | 0.7903 |
104
+ | 0.3472 | 34.89 | 157 | 0.4553 | 0.7903 |
105
+ | 0.3349 | 35.56 | 160 | 0.4528 | 0.7903 |
 
 
 
 
106
 
107
 
108
  ### Framework versions
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 40.0,
3
- "eval_accuracy": 0.8317757009345794,
4
- "eval_loss": 0.5392867922782898,
5
- "eval_runtime": 2.3105,
6
- "eval_samples_per_second": 46.31,
7
- "eval_steps_per_second": 3.03,
8
- "train_loss": 0.4049976162612438,
9
- "train_runtime": 701.3243,
10
- "train_samples_per_second": 28.403,
11
- "train_steps_per_second": 0.456
12
  }
 
1
  {
2
+ "epoch": 35.56,
3
+ "eval_accuracy": 0.8225806451612904,
4
+ "eval_loss": 0.48010584712028503,
5
+ "eval_runtime": 2.5596,
6
+ "eval_samples_per_second": 24.223,
7
+ "eval_steps_per_second": 1.563,
8
+ "train_loss": 0.7482577681541442,
9
+ "train_runtime": 589.7762,
10
+ "train_samples_per_second": 19.533,
11
+ "train_steps_per_second": 0.271
12
  }
config.json CHANGED
@@ -14,15 +14,19 @@
14
  "hidden_dropout_prob": 0.0,
15
  "hidden_size": 768,
16
  "id2label": {
17
- "0": "HR",
18
- "1": "No HR"
 
 
19
  },
20
  "image_size": 224,
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
23
  "label2id": {
24
- "HR": 0,
25
- "No HR": 1
 
 
26
  },
27
  "layer_norm_eps": 1e-12,
28
  "layer_scale_init_value": 0.1,
 
14
  "hidden_dropout_prob": 0.0,
15
  "hidden_size": 768,
16
  "id2label": {
17
+ "0": "active",
18
+ "1": "active-inactive",
19
+ "2": "healthy",
20
+ "3": "inactive"
21
  },
22
  "image_size": 224,
23
  "initializer_range": 0.02,
24
  "intermediate_size": 3072,
25
  "label2id": {
26
+ "active": 0,
27
+ "active-inactive": 1,
28
+ "healthy": 2,
29
+ "inactive": 3
30
  },
31
  "layer_norm_eps": 1e-12,
32
  "layer_scale_init_value": 0.1,
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 40.0,
3
- "eval_accuracy": 0.8317757009345794,
4
- "eval_loss": 0.5392867922782898,
5
- "eval_runtime": 2.3105,
6
- "eval_samples_per_second": 46.31,
7
- "eval_steps_per_second": 3.03
8
  }
 
1
  {
2
+ "epoch": 35.56,
3
+ "eval_accuracy": 0.8225806451612904,
4
+ "eval_loss": 0.48010584712028503,
5
+ "eval_runtime": 2.5596,
6
+ "eval_samples_per_second": 24.223,
7
+ "eval_steps_per_second": 1.563
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eeea300ff3a120329583ab7e00d190047c17c87c67ed1e5a5799d0b845cb144
3
- size 343080328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2e0354dd3875d2a95e10cb59e09081044efc892a545ff863b0aaa18b051efa6
3
+ size 343086480
runs/Dec01_17-09-38_DESKTOP-SKBE9FB/events.out.tfevents.1733094579.DESKTOP-SKBE9FB.18044.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c90a088556944c18e49c91b9047a211812a0ae629235b3b9577acedb47130633
3
+ size 8146
runs/Dec01_17-53-11_DESKTOP-SKBE9FB/events.out.tfevents.1733097193.DESKTOP-SKBE9FB.15124.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3129a7fb381f5a5b5536bb13a15331b4a1d56416073bcd6536b696503b67e472
3
+ size 17651
runs/Dec01_18-02-46_DESKTOP-SKBE9FB/events.out.tfevents.1733097768.DESKTOP-SKBE9FB.2668.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb3c6fd85e0cd85df68369e4dbfb4a09ba174302b615b80037cd450200187de3
3
+ size 14469
runs/Dec01_18-10-08_DESKTOP-SKBE9FB/events.out.tfevents.1733098210.DESKTOP-SKBE9FB.2796.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a9e884b466dce56ecf87f1ddb8372d84339ad4da748a8b8c73aea615a584ad6
3
+ size 19611
runs/Dec01_18-10-08_DESKTOP-SKBE9FB/events.out.tfevents.1733098802.DESKTOP-SKBE9FB.2796.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1f54208f94aab9d58c8a1f6921b72cd15fb82aa5c029bf45826f5593eb5cab
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 40.0,
3
- "train_loss": 0.4049976162612438,
4
- "train_runtime": 701.3243,
5
- "train_samples_per_second": 28.403,
6
- "train_steps_per_second": 0.456
7
  }
 
1
  {
2
+ "epoch": 35.56,
3
+ "train_loss": 0.7482577681541442,
4
+ "train_runtime": 589.7762,
5
+ "train_samples_per_second": 19.533,
6
+ "train_steps_per_second": 0.271
7
  }
trainer_state.json CHANGED
@@ -1,581 +1,449 @@
1
  {
2
- "best_metric": 0.8317757009345794,
3
- "best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-248",
4
- "epoch": 40.0,
5
  "eval_steps": 500,
6
- "global_step": 320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "eval_accuracy": 0.5887850467289719,
14
- "eval_loss": 0.6887365579605103,
15
- "eval_runtime": 2.8498,
16
- "eval_samples_per_second": 37.546,
17
- "eval_steps_per_second": 2.456,
18
- "step": 8
19
- },
20
- {
21
- "epoch": 1.25,
22
- "learning_rate": 1.5625e-05,
23
- "loss": 0.692,
24
- "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.5887850467289719,
29
- "eval_loss": 0.678210437297821,
30
- "eval_runtime": 1.9785,
31
- "eval_samples_per_second": 54.082,
32
- "eval_steps_per_second": 3.538,
33
- "step": 16
34
  },
35
  {
36
- "epoch": 2.5,
37
  "learning_rate": 3.125e-05,
38
- "loss": 0.6801,
39
- "step": 20
40
- },
41
- {
42
- "epoch": 3.0,
43
- "eval_accuracy": 0.5887850467289719,
44
- "eval_loss": 0.6668981313705444,
45
- "eval_runtime": 2.0656,
46
- "eval_samples_per_second": 51.802,
47
- "eval_steps_per_second": 3.389,
48
- "step": 24
49
  },
50
  {
51
- "epoch": 3.75,
52
- "learning_rate": 4.6875e-05,
53
- "loss": 0.6696,
54
- "step": 30
 
 
 
55
  },
56
  {
57
  "epoch": 4.0,
58
- "eval_accuracy": 0.5887850467289719,
59
- "eval_loss": 0.6644209623336792,
60
- "eval_runtime": 2.1267,
61
- "eval_samples_per_second": 50.313,
62
- "eval_steps_per_second": 3.291,
63
- "step": 32
64
  },
65
  {
66
- "epoch": 5.0,
67
  "learning_rate": 4.8611111111111115e-05,
68
- "loss": 0.6607,
69
- "step": 40
70
  },
71
  {
72
- "epoch": 5.0,
73
- "eval_accuracy": 0.6635514018691588,
74
- "eval_loss": 0.6661449074745178,
75
- "eval_runtime": 1.995,
76
- "eval_samples_per_second": 53.635,
77
- "eval_steps_per_second": 3.509,
78
- "step": 40
79
  },
80
  {
81
  "epoch": 6.0,
82
- "eval_accuracy": 0.6542056074766355,
83
- "eval_loss": 0.6241438388824463,
84
- "eval_runtime": 2.0189,
85
- "eval_samples_per_second": 52.999,
86
- "eval_steps_per_second": 3.467,
87
- "step": 48
88
- },
89
- {
90
- "epoch": 6.25,
91
- "learning_rate": 4.6875e-05,
92
- "loss": 0.6341,
93
- "step": 50
94
  },
95
  {
96
- "epoch": 7.0,
97
- "eval_accuracy": 0.6542056074766355,
98
- "eval_loss": 0.6234968900680542,
99
- "eval_runtime": 1.9955,
100
- "eval_samples_per_second": 53.622,
101
- "eval_steps_per_second": 3.508,
102
- "step": 56
103
- },
104
- {
105
- "epoch": 7.5,
106
  "learning_rate": 4.5138888888888894e-05,
107
- "loss": 0.6089,
108
- "step": 60
109
  },
110
  {
111
- "epoch": 8.0,
112
- "eval_accuracy": 0.6915887850467289,
113
- "eval_loss": 0.6088296175003052,
114
- "eval_runtime": 2.06,
115
- "eval_samples_per_second": 51.942,
116
- "eval_steps_per_second": 3.398,
117
- "step": 64
118
  },
119
  {
120
- "epoch": 8.75,
121
- "learning_rate": 4.340277777777778e-05,
122
- "loss": 0.6095,
123
- "step": 70
 
 
 
124
  },
125
  {
126
- "epoch": 9.0,
127
- "eval_accuracy": 0.6915887850467289,
128
- "eval_loss": 0.5911644101142883,
129
- "eval_runtime": 2.01,
130
- "eval_samples_per_second": 53.235,
131
- "eval_steps_per_second": 3.483,
132
- "step": 72
133
  },
134
  {
135
- "epoch": 10.0,
136
- "learning_rate": 4.166666666666667e-05,
137
- "loss": 0.5632,
138
- "step": 80
 
 
 
139
  },
140
  {
141
  "epoch": 10.0,
142
- "eval_accuracy": 0.6355140186915887,
143
- "eval_loss": 0.660692036151886,
144
- "eval_runtime": 2.126,
145
- "eval_samples_per_second": 50.329,
146
- "eval_steps_per_second": 3.293,
147
- "step": 80
148
  },
149
  {
150
- "epoch": 11.0,
151
- "eval_accuracy": 0.7009345794392523,
152
- "eval_loss": 0.5792553424835205,
153
- "eval_runtime": 1.972,
154
- "eval_samples_per_second": 54.261,
155
- "eval_steps_per_second": 3.55,
156
- "step": 88
157
  },
158
  {
159
- "epoch": 11.25,
160
- "learning_rate": 3.993055555555556e-05,
161
- "loss": 0.5418,
162
- "step": 90
163
  },
164
  {
165
  "epoch": 12.0,
166
- "eval_accuracy": 0.6822429906542056,
167
- "eval_loss": 0.5953279733657837,
168
- "eval_runtime": 2.2228,
169
- "eval_samples_per_second": 48.137,
170
- "eval_steps_per_second": 3.149,
171
- "step": 96
172
- },
173
- {
174
- "epoch": 12.5,
175
- "learning_rate": 3.8194444444444444e-05,
176
- "loss": 0.5336,
177
- "step": 100
178
  },
179
  {
180
- "epoch": 13.0,
181
- "eval_accuracy": 0.7102803738317757,
182
- "eval_loss": 0.5792534947395325,
183
- "eval_runtime": 1.991,
184
- "eval_samples_per_second": 53.742,
185
- "eval_steps_per_second": 3.516,
186
- "step": 104
187
  },
188
  {
189
- "epoch": 13.75,
190
- "learning_rate": 3.6458333333333336e-05,
191
- "loss": 0.5102,
192
- "step": 110
193
  },
194
  {
195
  "epoch": 14.0,
196
- "eval_accuracy": 0.719626168224299,
197
- "eval_loss": 0.5291872024536133,
198
- "eval_runtime": 2.0585,
199
- "eval_samples_per_second": 51.98,
200
- "eval_steps_per_second": 3.401,
201
- "step": 112
202
  },
203
  {
204
- "epoch": 15.0,
205
- "learning_rate": 3.472222222222222e-05,
206
- "loss": 0.4762,
207
- "step": 120
 
 
 
208
  },
209
  {
210
- "epoch": 15.0,
211
- "eval_accuracy": 0.7009345794392523,
212
- "eval_loss": 0.6557727456092834,
213
- "eval_runtime": 1.9985,
214
- "eval_samples_per_second": 53.541,
215
- "eval_steps_per_second": 3.503,
216
- "step": 120
217
  },
218
  {
219
  "epoch": 16.0,
220
- "eval_accuracy": 0.7102803738317757,
221
- "eval_loss": 0.5371208786964417,
222
- "eval_runtime": 1.971,
223
- "eval_samples_per_second": 54.288,
224
- "eval_steps_per_second": 3.552,
225
- "step": 128
226
- },
227
- {
228
- "epoch": 16.25,
229
- "learning_rate": 3.2986111111111115e-05,
230
- "loss": 0.544,
231
- "step": 130
232
  },
233
  {
234
- "epoch": 17.0,
235
- "eval_accuracy": 0.7570093457943925,
236
- "eval_loss": 0.5400705337524414,
237
- "eval_runtime": 1.981,
238
- "eval_samples_per_second": 54.014,
239
- "eval_steps_per_second": 3.534,
240
- "step": 136
241
  },
242
  {
243
- "epoch": 17.5,
244
- "learning_rate": 3.125e-05,
245
- "loss": 0.4256,
246
- "step": 140
247
  },
248
  {
249
  "epoch": 18.0,
250
- "eval_accuracy": 0.794392523364486,
251
- "eval_loss": 0.49267861247062683,
252
- "eval_runtime": 1.9622,
253
- "eval_samples_per_second": 54.531,
254
- "eval_steps_per_second": 3.567,
255
- "step": 144
256
- },
257
- {
258
- "epoch": 18.75,
259
- "learning_rate": 2.951388888888889e-05,
260
- "loss": 0.4082,
261
- "step": 150
262
  },
263
  {
264
- "epoch": 19.0,
265
- "eval_accuracy": 0.7383177570093458,
266
- "eval_loss": 0.5800967216491699,
267
- "eval_runtime": 1.9825,
268
- "eval_samples_per_second": 53.973,
269
- "eval_steps_per_second": 3.531,
270
- "step": 152
271
  },
272
  {
273
  "epoch": 20.0,
274
- "learning_rate": 2.777777777777778e-05,
275
- "loss": 0.4014,
276
- "step": 160
277
  },
278
  {
279
  "epoch": 20.0,
280
- "eval_accuracy": 0.7383177570093458,
281
- "eval_loss": 0.5822688937187195,
282
- "eval_runtime": 1.985,
283
- "eval_samples_per_second": 53.905,
284
- "eval_steps_per_second": 3.526,
285
- "step": 160
286
- },
287
- {
288
- "epoch": 21.0,
289
- "eval_accuracy": 0.7757009345794392,
290
- "eval_loss": 0.5392723083496094,
291
- "eval_runtime": 2.117,
292
- "eval_samples_per_second": 50.542,
293
- "eval_steps_per_second": 3.306,
294
- "step": 168
295
  },
296
  {
297
- "epoch": 21.25,
298
- "learning_rate": 2.604166666666667e-05,
299
- "loss": 0.3483,
300
- "step": 170
 
 
 
301
  },
302
  {
303
  "epoch": 22.0,
304
- "eval_accuracy": 0.7102803738317757,
305
- "eval_loss": 0.5940819382667542,
306
- "eval_runtime": 1.9765,
307
- "eval_samples_per_second": 54.137,
308
- "eval_steps_per_second": 3.542,
309
- "step": 176
310
  },
311
  {
312
- "epoch": 22.5,
313
- "learning_rate": 2.4305555555555558e-05,
314
- "loss": 0.3121,
315
- "step": 180
316
- },
317
- {
318
- "epoch": 23.0,
319
- "eval_accuracy": 0.7383177570093458,
320
- "eval_loss": 0.5568514466285706,
321
- "eval_runtime": 2.1005,
322
- "eval_samples_per_second": 50.94,
323
- "eval_steps_per_second": 3.333,
324
- "step": 184
325
  },
326
  {
327
- "epoch": 23.75,
328
- "learning_rate": 2.2569444444444447e-05,
329
- "loss": 0.3484,
330
- "step": 190
 
 
 
331
  },
332
  {
333
  "epoch": 24.0,
334
- "eval_accuracy": 0.7663551401869159,
335
- "eval_loss": 0.5975044369697571,
336
- "eval_runtime": 1.97,
337
- "eval_samples_per_second": 54.316,
338
- "eval_steps_per_second": 3.553,
339
- "step": 192
340
  },
341
  {
342
- "epoch": 25.0,
343
- "learning_rate": 2.0833333333333336e-05,
344
- "loss": 0.263,
345
- "step": 200
346
  },
347
  {
348
- "epoch": 25.0,
349
- "eval_accuracy": 0.7570093457943925,
350
- "eval_loss": 0.6544022560119629,
351
- "eval_runtime": 2.3716,
352
- "eval_samples_per_second": 45.118,
353
- "eval_steps_per_second": 2.952,
354
- "step": 200
355
  },
356
  {
357
  "epoch": 26.0,
358
- "eval_accuracy": 0.7757009345794392,
359
- "eval_loss": 0.5743973851203918,
360
- "eval_runtime": 2.0105,
361
- "eval_samples_per_second": 53.221,
362
- "eval_steps_per_second": 3.482,
363
- "step": 208
364
  },
365
  {
366
- "epoch": 26.25,
367
- "learning_rate": 1.9097222222222222e-05,
368
- "loss": 0.2633,
369
- "step": 210
370
- },
371
- {
372
- "epoch": 27.0,
373
- "eval_accuracy": 0.7663551401869159,
374
- "eval_loss": 0.6095036864280701,
375
- "eval_runtime": 1.9725,
376
- "eval_samples_per_second": 54.247,
377
- "eval_steps_per_second": 3.549,
378
- "step": 216
379
  },
380
  {
381
- "epoch": 27.5,
382
- "learning_rate": 1.736111111111111e-05,
383
- "loss": 0.2935,
384
- "step": 220
 
 
 
385
  },
386
  {
387
  "epoch": 28.0,
388
- "eval_accuracy": 0.7663551401869159,
389
- "eval_loss": 0.528620719909668,
390
- "eval_runtime": 2.0605,
391
- "eval_samples_per_second": 51.929,
392
- "eval_steps_per_second": 3.397,
393
- "step": 224
394
- },
395
- {
396
- "epoch": 28.75,
397
- "learning_rate": 1.5625e-05,
398
- "loss": 0.2332,
399
- "step": 230
400
  },
401
  {
402
- "epoch": 29.0,
403
- "eval_accuracy": 0.7850467289719626,
404
- "eval_loss": 0.6027860045433044,
405
- "eval_runtime": 1.9665,
406
- "eval_samples_per_second": 54.412,
407
- "eval_steps_per_second": 3.56,
408
- "step": 232
409
  },
410
  {
411
- "epoch": 30.0,
412
- "learning_rate": 1.388888888888889e-05,
413
- "loss": 0.2314,
414
- "step": 240
 
 
 
415
  },
416
  {
417
  "epoch": 30.0,
418
- "eval_accuracy": 0.794392523364486,
419
- "eval_loss": 0.5935384631156921,
420
- "eval_runtime": 1.962,
421
- "eval_samples_per_second": 54.537,
422
- "eval_steps_per_second": 3.568,
423
- "step": 240
424
  },
425
  {
426
- "epoch": 31.0,
427
- "eval_accuracy": 0.8317757009345794,
428
- "eval_loss": 0.5392867922782898,
429
- "eval_runtime": 1.969,
430
- "eval_samples_per_second": 54.343,
431
- "eval_steps_per_second": 3.555,
432
- "step": 248
433
  },
434
  {
435
- "epoch": 31.25,
436
- "learning_rate": 1.2152777777777779e-05,
437
- "loss": 0.202,
438
- "step": 250
439
  },
440
  {
441
  "epoch": 32.0,
442
- "eval_accuracy": 0.822429906542056,
443
- "eval_loss": 0.5556337833404541,
444
- "eval_runtime": 2.012,
445
- "eval_samples_per_second": 53.182,
446
- "eval_steps_per_second": 3.479,
447
- "step": 256
448
- },
449
- {
450
- "epoch": 32.5,
451
- "learning_rate": 1.0416666666666668e-05,
452
- "loss": 0.2127,
453
- "step": 260
454
  },
455
  {
456
- "epoch": 33.0,
457
- "eval_accuracy": 0.8037383177570093,
458
- "eval_loss": 0.5912833213806152,
459
- "eval_runtime": 1.98,
460
- "eval_samples_per_second": 54.041,
461
- "eval_steps_per_second": 3.535,
462
- "step": 264
463
  },
464
  {
465
- "epoch": 33.75,
466
- "learning_rate": 8.680555555555556e-06,
467
- "loss": 0.2035,
468
- "step": 270
469
  },
470
  {
471
  "epoch": 34.0,
472
- "eval_accuracy": 0.8037383177570093,
473
- "eval_loss": 0.5337203741073608,
474
- "eval_runtime": 1.9834,
475
- "eval_samples_per_second": 53.947,
476
- "eval_steps_per_second": 3.529,
477
- "step": 272
478
- },
479
- {
480
- "epoch": 35.0,
481
- "learning_rate": 6.944444444444445e-06,
482
- "loss": 0.2618,
483
- "step": 280
484
- },
485
- {
486
- "epoch": 35.0,
487
- "eval_accuracy": 0.8037383177570093,
488
- "eval_loss": 0.622107982635498,
489
- "eval_runtime": 2.0025,
490
- "eval_samples_per_second": 53.434,
491
- "eval_steps_per_second": 3.496,
492
- "step": 280
493
- },
494
- {
495
- "epoch": 36.0,
496
- "eval_accuracy": 0.8317757009345794,
497
- "eval_loss": 0.5089600682258606,
498
- "eval_runtime": 1.9755,
499
- "eval_samples_per_second": 54.164,
500
- "eval_steps_per_second": 3.543,
501
- "step": 288
502
- },
503
- {
504
- "epoch": 36.25,
505
- "learning_rate": 5.208333333333334e-06,
506
- "loss": 0.217,
507
- "step": 290
508
- },
509
- {
510
- "epoch": 37.0,
511
- "eval_accuracy": 0.822429906542056,
512
- "eval_loss": 0.5649047493934631,
513
- "eval_runtime": 1.991,
514
- "eval_samples_per_second": 53.743,
515
- "eval_steps_per_second": 3.516,
516
- "step": 296
517
  },
518
  {
519
- "epoch": 37.5,
520
- "learning_rate": 3.4722222222222224e-06,
521
- "loss": 0.2111,
522
- "step": 300
523
- },
524
- {
525
- "epoch": 38.0,
526
- "eval_accuracy": 0.8130841121495327,
527
- "eval_loss": 0.568317174911499,
528
- "eval_runtime": 2.0365,
529
- "eval_samples_per_second": 52.542,
530
- "eval_steps_per_second": 3.437,
531
- "step": 304
532
  },
533
  {
534
- "epoch": 38.75,
535
- "learning_rate": 1.7361111111111112e-06,
536
- "loss": 0.2085,
537
- "step": 310
538
  },
539
  {
540
- "epoch": 39.0,
541
- "eval_accuracy": 0.822429906542056,
542
- "eval_loss": 0.5397838950157166,
543
- "eval_runtime": 1.9935,
544
- "eval_samples_per_second": 53.675,
545
- "eval_steps_per_second": 3.511,
546
- "step": 312
547
  },
548
  {
549
- "epoch": 40.0,
550
- "learning_rate": 0.0,
551
- "loss": 0.1912,
552
- "step": 320
553
- },
554
- {
555
- "epoch": 40.0,
556
- "eval_accuracy": 0.822429906542056,
557
- "eval_loss": 0.5548034310340881,
558
- "eval_runtime": 1.978,
559
- "eval_samples_per_second": 54.096,
560
- "eval_steps_per_second": 3.539,
561
- "step": 320
562
- },
563
- {
564
- "epoch": 40.0,
565
- "step": 320,
566
- "total_flos": 1.5429806632629043e+18,
567
- "train_loss": 0.4049976162612438,
568
- "train_runtime": 701.3243,
569
- "train_samples_per_second": 28.403,
570
- "train_steps_per_second": 0.456
571
  }
572
  ],
573
  "logging_steps": 10,
574
- "max_steps": 320,
575
  "num_input_tokens_seen": 0,
576
  "num_train_epochs": 40,
577
  "save_steps": 500,
578
- "total_flos": 1.5429806632629043e+18,
579
  "train_batch_size": 16,
580
  "trial_name": null,
581
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8225806451612904,
3
+ "best_model_checkpoint": "beit-base-patch16-224-OT\\checkpoint-99",
4
+ "epoch": 35.55555555555556,
5
  "eval_steps": 500,
6
+ "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.89,
13
+ "eval_accuracy": 0.14516129032258066,
14
+ "eval_loss": 1.7602994441986084,
15
+ "eval_runtime": 2.148,
16
+ "eval_samples_per_second": 28.864,
17
+ "eval_steps_per_second": 1.862,
18
+ "step": 4
 
 
 
 
 
 
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.14516129032258066,
23
+ "eval_loss": 1.685233473777771,
24
+ "eval_runtime": 2.219,
25
+ "eval_samples_per_second": 27.94,
26
+ "eval_steps_per_second": 1.803,
27
+ "step": 9
28
  },
29
  {
30
+ "epoch": 2.22,
31
  "learning_rate": 3.125e-05,
32
+ "loss": 1.7571,
33
+ "step": 10
 
 
 
 
 
 
 
 
 
34
  },
35
  {
36
+ "epoch": 2.89,
37
+ "eval_accuracy": 0.14516129032258066,
38
+ "eval_loss": 1.5655227899551392,
39
+ "eval_runtime": 2.222,
40
+ "eval_samples_per_second": 27.903,
41
+ "eval_steps_per_second": 1.8,
42
+ "step": 13
43
  },
44
  {
45
  "epoch": 4.0,
46
+ "eval_accuracy": 0.14516129032258066,
47
+ "eval_loss": 1.3815577030181885,
48
+ "eval_runtime": 2.3696,
49
+ "eval_samples_per_second": 26.165,
50
+ "eval_steps_per_second": 1.688,
51
+ "step": 18
52
  },
53
  {
54
+ "epoch": 4.44,
55
  "learning_rate": 4.8611111111111115e-05,
56
+ "loss": 1.5255,
57
+ "step": 20
58
  },
59
  {
60
+ "epoch": 4.89,
61
+ "eval_accuracy": 0.3225806451612903,
62
+ "eval_loss": 1.2598901987075806,
63
+ "eval_runtime": 2.178,
64
+ "eval_samples_per_second": 28.466,
65
+ "eval_steps_per_second": 1.837,
66
+ "step": 22
67
  },
68
  {
69
  "epoch": 6.0,
70
+ "eval_accuracy": 0.4838709677419355,
71
+ "eval_loss": 1.153410792350769,
72
+ "eval_runtime": 2.227,
73
+ "eval_samples_per_second": 27.84,
74
+ "eval_steps_per_second": 1.796,
75
+ "step": 27
 
 
 
 
 
 
76
  },
77
  {
78
+ "epoch": 6.67,
 
 
 
 
 
 
 
 
 
79
  "learning_rate": 4.5138888888888894e-05,
80
+ "loss": 1.2245,
81
+ "step": 30
82
  },
83
  {
84
+ "epoch": 6.89,
85
+ "eval_accuracy": 0.4838709677419355,
86
+ "eval_loss": 1.0641188621520996,
87
+ "eval_runtime": 2.203,
88
+ "eval_samples_per_second": 28.143,
89
+ "eval_steps_per_second": 1.816,
90
+ "step": 31
91
  },
92
  {
93
+ "epoch": 8.0,
94
+ "eval_accuracy": 0.43548387096774194,
95
+ "eval_loss": 1.0371758937835693,
96
+ "eval_runtime": 2.2355,
97
+ "eval_samples_per_second": 27.734,
98
+ "eval_steps_per_second": 1.789,
99
+ "step": 36
100
  },
101
  {
102
+ "epoch": 8.89,
103
+ "learning_rate": 4.166666666666667e-05,
104
+ "loss": 1.0438,
105
+ "step": 40
 
 
 
106
  },
107
  {
108
+ "epoch": 8.89,
109
+ "eval_accuracy": 0.43548387096774194,
110
+ "eval_loss": 0.9987961649894714,
111
+ "eval_runtime": 2.3086,
112
+ "eval_samples_per_second": 26.857,
113
+ "eval_steps_per_second": 1.733,
114
+ "step": 40
115
  },
116
  {
117
  "epoch": 10.0,
118
+ "eval_accuracy": 0.5161290322580645,
119
+ "eval_loss": 0.9259945154190063,
120
+ "eval_runtime": 2.2905,
121
+ "eval_samples_per_second": 27.068,
122
+ "eval_steps_per_second": 1.746,
123
+ "step": 45
124
  },
125
  {
126
+ "epoch": 10.89,
127
+ "eval_accuracy": 0.7096774193548387,
128
+ "eval_loss": 0.9084866046905518,
129
+ "eval_runtime": 2.2545,
130
+ "eval_samples_per_second": 27.5,
131
+ "eval_steps_per_second": 1.774,
132
+ "step": 49
133
  },
134
  {
135
+ "epoch": 11.11,
136
+ "learning_rate": 3.8194444444444444e-05,
137
+ "loss": 0.9727,
138
+ "step": 50
139
  },
140
  {
141
  "epoch": 12.0,
142
+ "eval_accuracy": 0.7258064516129032,
143
+ "eval_loss": 0.843325674533844,
144
+ "eval_runtime": 2.1865,
145
+ "eval_samples_per_second": 28.355,
146
+ "eval_steps_per_second": 1.829,
147
+ "step": 54
 
 
 
 
 
 
148
  },
149
  {
150
+ "epoch": 12.89,
151
+ "eval_accuracy": 0.7741935483870968,
152
+ "eval_loss": 0.7529016137123108,
153
+ "eval_runtime": 2.1995,
154
+ "eval_samples_per_second": 28.188,
155
+ "eval_steps_per_second": 1.819,
156
+ "step": 58
157
  },
158
  {
159
+ "epoch": 13.33,
160
+ "learning_rate": 3.472222222222222e-05,
161
+ "loss": 0.8469,
162
+ "step": 60
163
  },
164
  {
165
  "epoch": 14.0,
166
+ "eval_accuracy": 0.7580645161290323,
167
+ "eval_loss": 0.7186572551727295,
168
+ "eval_runtime": 2.2225,
169
+ "eval_samples_per_second": 27.896,
170
+ "eval_steps_per_second": 1.8,
171
+ "step": 63
172
  },
173
  {
174
+ "epoch": 14.89,
175
+ "eval_accuracy": 0.7258064516129032,
176
+ "eval_loss": 0.6805762648582458,
177
+ "eval_runtime": 2.1855,
178
+ "eval_samples_per_second": 28.369,
179
+ "eval_steps_per_second": 1.83,
180
+ "step": 67
181
  },
182
  {
183
+ "epoch": 15.56,
184
+ "learning_rate": 3.125e-05,
185
+ "loss": 0.6908,
186
+ "step": 70
 
 
 
187
  },
188
  {
189
  "epoch": 16.0,
190
+ "eval_accuracy": 0.7580645161290323,
191
+ "eval_loss": 0.6575707197189331,
192
+ "eval_runtime": 2.2315,
193
+ "eval_samples_per_second": 27.784,
194
+ "eval_steps_per_second": 1.792,
195
+ "step": 72
 
 
 
 
 
 
196
  },
197
  {
198
+ "epoch": 16.89,
199
+ "eval_accuracy": 0.7903225806451613,
200
+ "eval_loss": 0.5742202997207642,
201
+ "eval_runtime": 2.261,
202
+ "eval_samples_per_second": 27.421,
203
+ "eval_steps_per_second": 1.769,
204
+ "step": 76
205
  },
206
  {
207
+ "epoch": 17.78,
208
+ "learning_rate": 2.777777777777778e-05,
209
+ "loss": 0.6064,
210
+ "step": 80
211
  },
212
  {
213
  "epoch": 18.0,
214
+ "eval_accuracy": 0.7580645161290323,
215
+ "eval_loss": 0.6446634531021118,
216
+ "eval_runtime": 2.3311,
217
+ "eval_samples_per_second": 26.597,
218
+ "eval_steps_per_second": 1.716,
219
+ "step": 81
 
 
 
 
 
 
220
  },
221
  {
222
+ "epoch": 18.89,
223
+ "eval_accuracy": 0.7741935483870968,
224
+ "eval_loss": 0.5602142810821533,
225
+ "eval_runtime": 2.1895,
226
+ "eval_samples_per_second": 28.317,
227
+ "eval_steps_per_second": 1.827,
228
+ "step": 85
229
  },
230
  {
231
  "epoch": 20.0,
232
+ "learning_rate": 2.4305555555555558e-05,
233
+ "loss": 0.5303,
234
+ "step": 90
235
  },
236
  {
237
  "epoch": 20.0,
238
+ "eval_accuracy": 0.7903225806451613,
239
+ "eval_loss": 0.49427932500839233,
240
+ "eval_runtime": 2.2745,
241
+ "eval_samples_per_second": 27.258,
242
+ "eval_steps_per_second": 1.759,
243
+ "step": 90
 
 
 
 
 
 
 
 
 
244
  },
245
  {
246
+ "epoch": 20.89,
247
+ "eval_accuracy": 0.7903225806451613,
248
+ "eval_loss": 0.530381441116333,
249
+ "eval_runtime": 2.142,
250
+ "eval_samples_per_second": 28.945,
251
+ "eval_steps_per_second": 1.867,
252
+ "step": 94
253
  },
254
  {
255
  "epoch": 22.0,
256
+ "eval_accuracy": 0.8225806451612904,
257
+ "eval_loss": 0.48010584712028503,
258
+ "eval_runtime": 2.3616,
259
+ "eval_samples_per_second": 26.254,
260
+ "eval_steps_per_second": 1.694,
261
+ "step": 99
262
  },
263
  {
264
+ "epoch": 22.22,
265
+ "learning_rate": 2.0833333333333336e-05,
266
+ "loss": 0.4903,
267
+ "step": 100
 
 
 
 
 
 
 
 
 
268
  },
269
  {
270
+ "epoch": 22.89,
271
+ "eval_accuracy": 0.8225806451612904,
272
+ "eval_loss": 0.4848884344100952,
273
+ "eval_runtime": 2.16,
274
+ "eval_samples_per_second": 28.703,
275
+ "eval_steps_per_second": 1.852,
276
+ "step": 103
277
  },
278
  {
279
  "epoch": 24.0,
280
+ "eval_accuracy": 0.7741935483870968,
281
+ "eval_loss": 0.5709980726242065,
282
+ "eval_runtime": 2.1685,
283
+ "eval_samples_per_second": 28.591,
284
+ "eval_steps_per_second": 1.845,
285
+ "step": 108
286
  },
287
  {
288
+ "epoch": 24.44,
289
+ "learning_rate": 1.736111111111111e-05,
290
+ "loss": 0.4261,
291
+ "step": 110
292
  },
293
  {
294
+ "epoch": 24.89,
295
+ "eval_accuracy": 0.7903225806451613,
296
+ "eval_loss": 0.4803168475627899,
297
+ "eval_runtime": 2.2145,
298
+ "eval_samples_per_second": 27.997,
299
+ "eval_steps_per_second": 1.806,
300
+ "step": 112
301
  },
302
  {
303
  "epoch": 26.0,
304
+ "eval_accuracy": 0.7258064516129032,
305
+ "eval_loss": 0.5670634508132935,
306
+ "eval_runtime": 2.1993,
307
+ "eval_samples_per_second": 28.191,
308
+ "eval_steps_per_second": 1.819,
309
+ "step": 117
310
  },
311
  {
312
+ "epoch": 26.67,
313
+ "learning_rate": 1.388888888888889e-05,
314
+ "loss": 0.4122,
315
+ "step": 120
 
 
 
 
 
 
 
 
 
316
  },
317
  {
318
+ "epoch": 26.89,
319
+ "eval_accuracy": 0.8064516129032258,
320
+ "eval_loss": 0.4585064947605133,
321
+ "eval_runtime": 2.2065,
322
+ "eval_samples_per_second": 28.098,
323
+ "eval_steps_per_second": 1.813,
324
+ "step": 121
325
  },
326
  {
327
  "epoch": 28.0,
328
+ "eval_accuracy": 0.7096774193548387,
329
+ "eval_loss": 0.5910329222679138,
330
+ "eval_runtime": 2.23,
331
+ "eval_samples_per_second": 27.802,
332
+ "eval_steps_per_second": 1.794,
333
+ "step": 126
 
 
 
 
 
 
334
  },
335
  {
336
+ "epoch": 28.89,
337
+ "learning_rate": 1.0416666666666668e-05,
338
+ "loss": 0.3739,
339
+ "step": 130
 
 
 
340
  },
341
  {
342
+ "epoch": 28.89,
343
+ "eval_accuracy": 0.7580645161290323,
344
+ "eval_loss": 0.5821260213851929,
345
+ "eval_runtime": 2.2145,
346
+ "eval_samples_per_second": 27.997,
347
+ "eval_steps_per_second": 1.806,
348
+ "step": 130
349
  },
350
  {
351
  "epoch": 30.0,
352
+ "eval_accuracy": 0.7741935483870968,
353
+ "eval_loss": 0.5329306125640869,
354
+ "eval_runtime": 2.3526,
355
+ "eval_samples_per_second": 26.354,
356
+ "eval_steps_per_second": 1.7,
357
+ "step": 135
358
  },
359
  {
360
+ "epoch": 30.89,
361
+ "eval_accuracy": 0.8225806451612904,
362
+ "eval_loss": 0.4422537386417389,
363
+ "eval_runtime": 2.1625,
364
+ "eval_samples_per_second": 28.67,
365
+ "eval_steps_per_second": 1.85,
366
+ "step": 139
367
  },
368
  {
369
+ "epoch": 31.11,
370
+ "learning_rate": 6.944444444444445e-06,
371
+ "loss": 0.3896,
372
+ "step": 140
373
  },
374
  {
375
  "epoch": 32.0,
376
+ "eval_accuracy": 0.7580645161290323,
377
+ "eval_loss": 0.47155243158340454,
378
+ "eval_runtime": 2.1865,
379
+ "eval_samples_per_second": 28.356,
380
+ "eval_steps_per_second": 1.829,
381
+ "step": 144
 
 
 
 
 
 
382
  },
383
  {
384
+ "epoch": 32.89,
385
+ "eval_accuracy": 0.7903225806451613,
386
+ "eval_loss": 0.4785827100276947,
387
+ "eval_runtime": 2.232,
388
+ "eval_samples_per_second": 27.777,
389
+ "eval_steps_per_second": 1.792,
390
+ "step": 148
391
  },
392
  {
393
+ "epoch": 33.33,
394
+ "learning_rate": 3.4722222222222224e-06,
395
+ "loss": 0.3472,
396
+ "step": 150
397
  },
398
  {
399
  "epoch": 34.0,
400
+ "eval_accuracy": 0.7903225806451613,
401
+ "eval_loss": 0.45382845401763916,
402
+ "eval_runtime": 2.19,
403
+ "eval_samples_per_second": 28.31,
404
+ "eval_steps_per_second": 1.826,
405
+ "step": 153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  },
407
  {
408
+ "epoch": 34.89,
409
+ "eval_accuracy": 0.7903225806451613,
410
+ "eval_loss": 0.455282062292099,
411
+ "eval_runtime": 2.2165,
412
+ "eval_samples_per_second": 27.972,
413
+ "eval_steps_per_second": 1.805,
414
+ "step": 157
 
 
 
 
 
 
415
  },
416
  {
417
+ "epoch": 35.56,
418
+ "learning_rate": 0.0,
419
+ "loss": 0.3349,
420
+ "step": 160
421
  },
422
  {
423
+ "epoch": 35.56,
424
+ "eval_accuracy": 0.7903225806451613,
425
+ "eval_loss": 0.45279815793037415,
426
+ "eval_runtime": 2.4931,
427
+ "eval_samples_per_second": 24.869,
428
+ "eval_steps_per_second": 1.604,
429
+ "step": 160
430
  },
431
  {
432
+ "epoch": 35.56,
433
+ "step": 160,
434
+ "total_flos": 7.931930389512192e+17,
435
+ "train_loss": 0.7482577681541442,
436
+ "train_runtime": 589.7762,
437
+ "train_samples_per_second": 19.533,
438
+ "train_steps_per_second": 0.271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  }
440
  ],
441
  "logging_steps": 10,
442
+ "max_steps": 160,
443
  "num_input_tokens_seen": 0,
444
  "num_train_epochs": 40,
445
  "save_steps": 500,
446
+ "total_flos": 7.931930389512192e+17,
447
  "train_batch_size": 16,
448
  "trial_name": null,
449
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:289be5ea4287de61c05dc38506ba40eb212d6de2c0bca319801e20a96f6874c0
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70281fb12f068732959d20071bec1711ddb5ad7a6484dbe72999c1aab81491a0
3
  size 4728