bombshelll commited on
Commit
8347ec0
·
verified ·
1 Parent(s): 7545b56

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. all_results.json +11 -11
  3. eval_results.json +6 -6
  4. train_results.json +6 -6
  5. trainer_state.json +249 -116
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.3453
22
- - Accuracy: 0.8895
23
 
24
  ## Model description
25
 
 
18
 
19
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.2871
22
+ - Accuracy: 0.9116
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 11.294117647058824,
3
- "eval_accuracy": 0.9226519337016574,
4
- "eval_loss": 0.27896857261657715,
5
- "eval_runtime": 1.4568,
6
- "eval_samples_per_second": 124.248,
7
- "eval_steps_per_second": 4.119,
8
- "total_flos": 4.5598374310035456e+17,
9
- "train_loss": 0.40474244703849155,
10
- "train_runtime": 265.8333,
11
- "train_samples_per_second": 73.309,
12
- "train_steps_per_second": 0.542
13
  }
 
1
  {
2
+ "epoch": 18.823529411764707,
3
+ "eval_accuracy": 0.9116022099447514,
4
+ "eval_loss": 0.28710222244262695,
5
+ "eval_runtime": 1.5215,
6
+ "eval_samples_per_second": 118.965,
7
+ "eval_steps_per_second": 3.944,
8
+ "total_flos": 7.600391915087462e+17,
9
+ "train_loss": 0.3423821290334066,
10
+ "train_runtime": 426.6472,
11
+ "train_samples_per_second": 76.128,
12
+ "train_steps_per_second": 0.563
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 11.294117647058824,
3
- "eval_accuracy": 0.9226519337016574,
4
- "eval_loss": 0.27896857261657715,
5
- "eval_runtime": 1.4568,
6
- "eval_samples_per_second": 124.248,
7
- "eval_steps_per_second": 4.119
8
  }
 
1
  {
2
+ "epoch": 18.823529411764707,
3
+ "eval_accuracy": 0.9116022099447514,
4
+ "eval_loss": 0.28710222244262695,
5
+ "eval_runtime": 1.5215,
6
+ "eval_samples_per_second": 118.965,
7
+ "eval_steps_per_second": 3.944
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 11.294117647058824,
3
- "total_flos": 4.5598374310035456e+17,
4
- "train_loss": 0.40474244703849155,
5
- "train_runtime": 265.8333,
6
- "train_samples_per_second": 73.309,
7
- "train_steps_per_second": 0.542
8
  }
 
1
  {
2
+ "epoch": 18.823529411764707,
3
+ "total_flos": 7.600391915087462e+17,
4
+ "train_loss": 0.3423821290334066,
5
+ "train_runtime": 426.6472,
6
+ "train_samples_per_second": 76.128,
7
+ "train_steps_per_second": 0.563
8
  }
trainer_state.json CHANGED
@@ -1,233 +1,366 @@
1
  {
2
- "best_metric": 0.9226519337016574,
3
- "best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-144",
4
- "epoch": 11.294117647058824,
5
  "eval_steps": 500,
6
- "global_step": 144,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.7843137254901961,
13
- "grad_norm": 7.8652119636535645,
14
- "learning_rate": 3.3333333333333335e-05,
15
- "loss": 1.2317,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.9411764705882353,
20
- "eval_accuracy": 0.7679558011049724,
21
- "eval_loss": 0.8055561780929565,
22
- "eval_runtime": 1.4266,
23
- "eval_samples_per_second": 126.873,
24
- "eval_steps_per_second": 4.206,
25
  "step": 12
26
  },
27
  {
28
  "epoch": 1.5686274509803921,
29
- "grad_norm": 10.274601936340332,
30
- "learning_rate": 4.8062015503875976e-05,
31
- "loss": 0.7321,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.9607843137254903,
36
  "eval_accuracy": 0.8784530386740331,
37
- "eval_loss": 0.34271344542503357,
38
- "eval_runtime": 1.441,
39
- "eval_samples_per_second": 125.611,
40
- "eval_steps_per_second": 4.164,
41
  "step": 25
42
  },
43
  {
44
  "epoch": 2.3529411764705883,
45
- "grad_norm": 7.0333251953125,
46
- "learning_rate": 4.418604651162791e-05,
47
- "loss": 0.4526,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 2.980392156862745,
52
- "eval_accuracy": 0.9116022099447514,
53
- "eval_loss": 0.2998741567134857,
54
- "eval_runtime": 1.4374,
55
- "eval_samples_per_second": 125.92,
56
- "eval_steps_per_second": 4.174,
57
  "step": 38
58
  },
59
  {
60
  "epoch": 3.1372549019607843,
61
- "grad_norm": 6.83636474609375,
62
- "learning_rate": 4.0310077519379843e-05,
63
- "loss": 0.3845,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 3.9215686274509802,
68
- "grad_norm": 12.36241340637207,
69
- "learning_rate": 3.6434108527131784e-05,
70
- "loss": 0.3984,
71
  "step": 50
72
  },
73
  {
74
  "epoch": 4.0,
75
- "eval_accuracy": 0.8950276243093923,
76
- "eval_loss": 0.30480191111564636,
77
- "eval_runtime": 1.4688,
78
- "eval_samples_per_second": 123.229,
79
- "eval_steps_per_second": 4.085,
80
  "step": 51
81
  },
82
  {
83
  "epoch": 4.705882352941177,
84
- "grad_norm": 4.690925121307373,
85
- "learning_rate": 3.2558139534883724e-05,
86
- "loss": 0.3362,
87
  "step": 60
88
  },
89
  {
90
  "epoch": 4.9411764705882355,
91
- "eval_accuracy": 0.9060773480662984,
92
- "eval_loss": 0.2832357585430145,
93
- "eval_runtime": 1.4103,
94
- "eval_samples_per_second": 128.343,
95
- "eval_steps_per_second": 4.254,
96
  "step": 63
97
  },
98
  {
99
  "epoch": 5.490196078431373,
100
- "grad_norm": 4.939608097076416,
101
- "learning_rate": 2.868217054263566e-05,
102
- "loss": 0.3356,
103
  "step": 70
104
  },
105
  {
106
  "epoch": 5.96078431372549,
107
- "eval_accuracy": 0.8950276243093923,
108
- "eval_loss": 0.29102668166160583,
109
- "eval_runtime": 1.4198,
110
- "eval_samples_per_second": 127.482,
111
- "eval_steps_per_second": 4.226,
112
  "step": 76
113
  },
114
  {
115
  "epoch": 6.2745098039215685,
116
- "grad_norm": 5.255800724029541,
117
- "learning_rate": 2.48062015503876e-05,
118
- "loss": 0.2958,
119
  "step": 80
120
  },
121
  {
122
  "epoch": 6.980392156862745,
123
  "eval_accuracy": 0.9116022099447514,
124
- "eval_loss": 0.2802250385284424,
125
- "eval_runtime": 1.405,
126
- "eval_samples_per_second": 128.824,
127
- "eval_steps_per_second": 4.27,
128
  "step": 89
129
  },
130
  {
131
  "epoch": 7.0588235294117645,
132
- "grad_norm": 4.129535675048828,
133
- "learning_rate": 2.0930232558139536e-05,
134
- "loss": 0.2735,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 7.8431372549019605,
139
- "grad_norm": 3.834648370742798,
140
- "learning_rate": 1.7054263565891473e-05,
141
- "loss": 0.2728,
142
  "step": 100
143
  },
144
  {
145
  "epoch": 8.0,
146
- "eval_accuracy": 0.9116022099447514,
147
- "eval_loss": 0.2806554436683655,
148
- "eval_runtime": 1.4695,
149
- "eval_samples_per_second": 123.174,
150
- "eval_steps_per_second": 4.083,
151
  "step": 102
152
  },
153
  {
154
  "epoch": 8.627450980392156,
155
- "grad_norm": 5.502172470092773,
156
- "learning_rate": 1.3178294573643413e-05,
157
- "loss": 0.2605,
158
  "step": 110
159
  },
160
  {
161
  "epoch": 8.941176470588236,
162
- "eval_accuracy": 0.9171270718232044,
163
- "eval_loss": 0.2814862132072449,
164
- "eval_runtime": 1.4181,
165
- "eval_samples_per_second": 127.639,
166
- "eval_steps_per_second": 4.231,
167
  "step": 114
168
  },
169
  {
170
  "epoch": 9.411764705882353,
171
- "grad_norm": 3.6628360748291016,
172
- "learning_rate": 9.302325581395349e-06,
173
- "loss": 0.2407,
174
  "step": 120
175
  },
176
  {
177
  "epoch": 9.96078431372549,
178
- "eval_accuracy": 0.9171270718232044,
179
- "eval_loss": 0.2806278169155121,
180
- "eval_runtime": 1.4234,
181
- "eval_samples_per_second": 127.162,
182
- "eval_steps_per_second": 4.215,
183
  "step": 127
184
  },
185
  {
186
  "epoch": 10.196078431372548,
187
- "grad_norm": 3.5698726177215576,
188
- "learning_rate": 5.426356589147287e-06,
189
- "loss": 0.2528,
190
  "step": 130
191
  },
192
  {
193
  "epoch": 10.980392156862745,
194
- "grad_norm": 4.9788408279418945,
195
- "learning_rate": 1.550387596899225e-06,
196
- "loss": 0.2549,
197
  "step": 140
198
  },
199
  {
200
  "epoch": 10.980392156862745,
201
- "eval_accuracy": 0.9171270718232044,
202
- "eval_loss": 0.2795223295688629,
203
- "eval_runtime": 1.466,
204
- "eval_samples_per_second": 123.468,
205
- "eval_steps_per_second": 4.093,
206
  "step": 140
207
  },
208
  {
209
- "epoch": 11.294117647058824,
210
- "eval_accuracy": 0.9226519337016574,
211
- "eval_loss": 0.27896857261657715,
212
- "eval_runtime": 1.4683,
213
- "eval_samples_per_second": 123.273,
214
- "eval_steps_per_second": 4.086,
215
- "step": 144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  },
217
  {
218
- "epoch": 11.294117647058824,
219
- "step": 144,
220
- "total_flos": 4.5598374310035456e+17,
221
- "train_loss": 0.40474244703849155,
222
- "train_runtime": 265.8333,
223
- "train_samples_per_second": 73.309,
224
- "train_steps_per_second": 0.542
225
  }
226
  ],
227
  "logging_steps": 10,
228
- "max_steps": 144,
229
  "num_input_tokens_seen": 0,
230
- "num_train_epochs": 12,
231
  "save_steps": 500,
232
  "stateful_callbacks": {
233
  "CustomEarlyStoppingCallback": {
@@ -250,7 +383,7 @@
250
  "attributes": {}
251
  }
252
  },
253
- "total_flos": 4.5598374310035456e+17,
254
  "train_batch_size": 32,
255
  "trial_name": null,
256
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9116022099447514,
3
+ "best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-51",
4
+ "epoch": 18.823529411764707,
5
  "eval_steps": 500,
6
+ "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.7843137254901961,
13
+ "grad_norm": 5.9126081466674805,
14
+ "learning_rate": 2.0833333333333336e-05,
15
+ "loss": 1.3069,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.9411764705882353,
20
+ "eval_accuracy": 0.6077348066298343,
21
+ "eval_loss": 0.9999544024467468,
22
+ "eval_runtime": 1.4322,
23
+ "eval_samples_per_second": 126.379,
24
+ "eval_steps_per_second": 4.189,
25
  "step": 12
26
  },
27
  {
28
  "epoch": 1.5686274509803921,
29
+ "grad_norm": 7.789252758026123,
30
+ "learning_rate": 4.166666666666667e-05,
31
+ "loss": 0.8924,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.9607843137254903,
36
  "eval_accuracy": 0.8784530386740331,
37
+ "eval_loss": 0.43337252736091614,
38
+ "eval_runtime": 1.4151,
39
+ "eval_samples_per_second": 127.902,
40
+ "eval_steps_per_second": 4.24,
41
  "step": 25
42
  },
43
  {
44
  "epoch": 2.3529411764705883,
45
+ "grad_norm": 6.7294697761535645,
46
+ "learning_rate": 4.8611111111111115e-05,
47
+ "loss": 0.5365,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 2.980392156862745,
52
+ "eval_accuracy": 0.9005524861878453,
53
+ "eval_loss": 0.3143160939216614,
54
+ "eval_runtime": 1.4092,
55
+ "eval_samples_per_second": 128.446,
56
+ "eval_steps_per_second": 4.258,
57
  "step": 38
58
  },
59
  {
60
  "epoch": 3.1372549019607843,
61
+ "grad_norm": 8.53775691986084,
62
+ "learning_rate": 4.62962962962963e-05,
63
+ "loss": 0.4119,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 3.9215686274509802,
68
+ "grad_norm": 8.774258613586426,
69
+ "learning_rate": 4.3981481481481486e-05,
70
+ "loss": 0.3814,
71
  "step": 50
72
  },
73
  {
74
  "epoch": 4.0,
75
+ "eval_accuracy": 0.9116022099447514,
76
+ "eval_loss": 0.28710222244262695,
77
+ "eval_runtime": 1.4257,
78
+ "eval_samples_per_second": 126.96,
79
+ "eval_steps_per_second": 4.209,
80
  "step": 51
81
  },
82
  {
83
  "epoch": 4.705882352941177,
84
+ "grad_norm": 5.285433769226074,
85
+ "learning_rate": 4.166666666666667e-05,
86
+ "loss": 0.3336,
87
  "step": 60
88
  },
89
  {
90
  "epoch": 4.9411764705882355,
91
+ "eval_accuracy": 0.9116022099447514,
92
+ "eval_loss": 0.2963091731071472,
93
+ "eval_runtime": 1.415,
94
+ "eval_samples_per_second": 127.916,
95
+ "eval_steps_per_second": 4.24,
96
  "step": 63
97
  },
98
  {
99
  "epoch": 5.490196078431373,
100
+ "grad_norm": 7.04965353012085,
101
+ "learning_rate": 3.935185185185186e-05,
102
+ "loss": 0.353,
103
  "step": 70
104
  },
105
  {
106
  "epoch": 5.96078431372549,
107
+ "eval_accuracy": 0.8729281767955801,
108
+ "eval_loss": 0.31954672932624817,
109
+ "eval_runtime": 1.4556,
110
+ "eval_samples_per_second": 124.344,
111
+ "eval_steps_per_second": 4.122,
112
  "step": 76
113
  },
114
  {
115
  "epoch": 6.2745098039215685,
116
+ "grad_norm": 5.833162307739258,
117
+ "learning_rate": 3.7037037037037037e-05,
118
+ "loss": 0.3069,
119
  "step": 80
120
  },
121
  {
122
  "epoch": 6.980392156862745,
123
  "eval_accuracy": 0.9116022099447514,
124
+ "eval_loss": 0.29521241784095764,
125
+ "eval_runtime": 1.4158,
126
+ "eval_samples_per_second": 127.846,
127
+ "eval_steps_per_second": 4.238,
128
  "step": 89
129
  },
130
  {
131
  "epoch": 7.0588235294117645,
132
+ "grad_norm": 5.050061225891113,
133
+ "learning_rate": 3.472222222222222e-05,
134
+ "loss": 0.2789,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 7.8431372549019605,
139
+ "grad_norm": 4.222379207611084,
140
+ "learning_rate": 3.240740740740741e-05,
141
+ "loss": 0.293,
142
  "step": 100
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "eval_accuracy": 0.8895027624309392,
147
+ "eval_loss": 0.3174145817756653,
148
+ "eval_runtime": 1.4186,
149
+ "eval_samples_per_second": 127.591,
150
+ "eval_steps_per_second": 4.23,
151
  "step": 102
152
  },
153
  {
154
  "epoch": 8.627450980392156,
155
+ "grad_norm": 7.039156436920166,
156
+ "learning_rate": 3.0092592592592593e-05,
157
+ "loss": 0.2667,
158
  "step": 110
159
  },
160
  {
161
  "epoch": 8.941176470588236,
162
+ "eval_accuracy": 0.8950276243093923,
163
+ "eval_loss": 0.3225868344306946,
164
+ "eval_runtime": 1.4137,
165
+ "eval_samples_per_second": 128.03,
166
+ "eval_steps_per_second": 4.244,
167
  "step": 114
168
  },
169
  {
170
  "epoch": 9.411764705882353,
171
+ "grad_norm": 5.598822593688965,
172
+ "learning_rate": 2.777777777777778e-05,
173
+ "loss": 0.2424,
174
  "step": 120
175
  },
176
  {
177
  "epoch": 9.96078431372549,
178
+ "eval_accuracy": 0.8895027624309392,
179
+ "eval_loss": 0.3213161826133728,
180
+ "eval_runtime": 1.4313,
181
+ "eval_samples_per_second": 126.455,
182
+ "eval_steps_per_second": 4.192,
183
  "step": 127
184
  },
185
  {
186
  "epoch": 10.196078431372548,
187
+ "grad_norm": 4.785697937011719,
188
+ "learning_rate": 2.5462962962962965e-05,
189
+ "loss": 0.2544,
190
  "step": 130
191
  },
192
  {
193
  "epoch": 10.980392156862745,
194
+ "grad_norm": 5.349719047546387,
195
+ "learning_rate": 2.314814814814815e-05,
196
+ "loss": 0.2605,
197
  "step": 140
198
  },
199
  {
200
  "epoch": 10.980392156862745,
201
+ "eval_accuracy": 0.8895027624309392,
202
+ "eval_loss": 0.31716108322143555,
203
+ "eval_runtime": 1.4269,
204
+ "eval_samples_per_second": 126.846,
205
+ "eval_steps_per_second": 4.205,
206
  "step": 140
207
  },
208
  {
209
+ "epoch": 11.764705882352942,
210
+ "grad_norm": 6.121713161468506,
211
+ "learning_rate": 2.0833333333333336e-05,
212
+ "loss": 0.232,
213
+ "step": 150
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "eval_accuracy": 0.8895027624309392,
218
+ "eval_loss": 0.33846884965896606,
219
+ "eval_runtime": 1.406,
220
+ "eval_samples_per_second": 128.737,
221
+ "eval_steps_per_second": 4.268,
222
+ "step": 153
223
+ },
224
+ {
225
+ "epoch": 12.549019607843137,
226
+ "grad_norm": 7.647618770599365,
227
+ "learning_rate": 1.8518518518518518e-05,
228
+ "loss": 0.242,
229
+ "step": 160
230
+ },
231
+ {
232
+ "epoch": 12.941176470588236,
233
+ "eval_accuracy": 0.8950276243093923,
234
+ "eval_loss": 0.32744264602661133,
235
+ "eval_runtime": 1.4273,
236
+ "eval_samples_per_second": 126.813,
237
+ "eval_steps_per_second": 4.204,
238
+ "step": 165
239
+ },
240
+ {
241
+ "epoch": 13.333333333333334,
242
+ "grad_norm": 6.248785972595215,
243
+ "learning_rate": 1.6203703703703704e-05,
244
+ "loss": 0.215,
245
+ "step": 170
246
+ },
247
+ {
248
+ "epoch": 13.96078431372549,
249
+ "eval_accuracy": 0.8950276243093923,
250
+ "eval_loss": 0.33850720524787903,
251
+ "eval_runtime": 1.4433,
252
+ "eval_samples_per_second": 125.407,
253
+ "eval_steps_per_second": 4.157,
254
+ "step": 178
255
+ },
256
+ {
257
+ "epoch": 14.117647058823529,
258
+ "grad_norm": 5.345800876617432,
259
+ "learning_rate": 1.388888888888889e-05,
260
+ "loss": 0.2123,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 14.901960784313726,
265
+ "grad_norm": 5.421293258666992,
266
+ "learning_rate": 1.1574074074074075e-05,
267
+ "loss": 0.2131,
268
+ "step": 190
269
+ },
270
+ {
271
+ "epoch": 14.980392156862745,
272
+ "eval_accuracy": 0.8950276243093923,
273
+ "eval_loss": 0.34223416447639465,
274
+ "eval_runtime": 1.409,
275
+ "eval_samples_per_second": 128.462,
276
+ "eval_steps_per_second": 4.258,
277
+ "step": 191
278
+ },
279
+ {
280
+ "epoch": 15.686274509803921,
281
+ "grad_norm": 4.188720703125,
282
+ "learning_rate": 9.259259259259259e-06,
283
+ "loss": 0.201,
284
+ "step": 200
285
+ },
286
+ {
287
+ "epoch": 16.0,
288
+ "eval_accuracy": 0.8784530386740331,
289
+ "eval_loss": 0.341948539018631,
290
+ "eval_runtime": 1.4191,
291
+ "eval_samples_per_second": 127.549,
292
+ "eval_steps_per_second": 4.228,
293
+ "step": 204
294
+ },
295
+ {
296
+ "epoch": 16.470588235294116,
297
+ "grad_norm": 4.887516498565674,
298
+ "learning_rate": 6.944444444444445e-06,
299
+ "loss": 0.1976,
300
+ "step": 210
301
+ },
302
+ {
303
+ "epoch": 16.941176470588236,
304
+ "eval_accuracy": 0.9005524861878453,
305
+ "eval_loss": 0.3447644114494324,
306
+ "eval_runtime": 1.4043,
307
+ "eval_samples_per_second": 128.89,
308
+ "eval_steps_per_second": 4.273,
309
+ "step": 216
310
+ },
311
+ {
312
+ "epoch": 17.254901960784313,
313
+ "grad_norm": 5.946260452270508,
314
+ "learning_rate": 4.6296296296296296e-06,
315
+ "loss": 0.1886,
316
+ "step": 220
317
+ },
318
+ {
319
+ "epoch": 17.96078431372549,
320
+ "eval_accuracy": 0.8895027624309392,
321
+ "eval_loss": 0.3459985554218292,
322
+ "eval_runtime": 1.4593,
323
+ "eval_samples_per_second": 124.035,
324
+ "eval_steps_per_second": 4.112,
325
+ "step": 229
326
+ },
327
+ {
328
+ "epoch": 18.03921568627451,
329
+ "grad_norm": 6.021714210510254,
330
+ "learning_rate": 2.3148148148148148e-06,
331
+ "loss": 0.2,
332
+ "step": 230
333
+ },
334
+ {
335
+ "epoch": 18.823529411764707,
336
+ "grad_norm": 3.8110241889953613,
337
+ "learning_rate": 0.0,
338
+ "loss": 0.1972,
339
+ "step": 240
340
+ },
341
+ {
342
+ "epoch": 18.823529411764707,
343
+ "eval_accuracy": 0.8895027624309392,
344
+ "eval_loss": 0.34530630707740784,
345
+ "eval_runtime": 1.454,
346
+ "eval_samples_per_second": 124.486,
347
+ "eval_steps_per_second": 4.127,
348
+ "step": 240
349
  },
350
  {
351
+ "epoch": 18.823529411764707,
352
+ "step": 240,
353
+ "total_flos": 7.600391915087462e+17,
354
+ "train_loss": 0.3423821290334066,
355
+ "train_runtime": 426.6472,
356
+ "train_samples_per_second": 76.128,
357
+ "train_steps_per_second": 0.563
358
  }
359
  ],
360
  "logging_steps": 10,
361
+ "max_steps": 240,
362
  "num_input_tokens_seen": 0,
363
+ "num_train_epochs": 20,
364
  "save_steps": 500,
365
  "stateful_callbacks": {
366
  "CustomEarlyStoppingCallback": {
 
383
  "attributes": {}
384
  }
385
  },
386
+ "total_flos": 7.600391915087462e+17,
387
  "train_batch_size": 32,
388
  "trial_name": null,
389
  "trial_params": null