bombshelll commited on
Commit
9d15c81
·
verified ·
1 Parent(s): ae399dd

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +50 -334
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 18.823529411764707,
3
- "eval_accuracy": 0.9116022099447514,
4
- "eval_loss": 0.28710222244262695,
5
- "eval_runtime": 1.5215,
6
- "eval_samples_per_second": 118.965,
7
- "eval_steps_per_second": 3.944,
8
- "total_flos": 7.600391915087462e+17,
9
- "train_loss": 0.3423821290334066,
10
- "train_runtime": 426.6472,
11
- "train_samples_per_second": 76.128,
12
- "train_steps_per_second": 0.563
13
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9911111111111112,
4
+ "eval_loss": 0.02681696228682995,
5
+ "eval_runtime": 1.8717,
6
+ "eval_samples_per_second": 120.214,
7
+ "eval_steps_per_second": 4.274,
8
+ "total_flos": 1.5048656676458496e+17,
9
+ "train_loss": 0.5111757349222898,
10
+ "train_runtime": 85.6924,
11
+ "train_samples_per_second": 470.987,
12
+ "train_steps_per_second": 3.734
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 18.823529411764707,
3
- "eval_accuracy": 0.9116022099447514,
4
- "eval_loss": 0.28710222244262695,
5
- "eval_runtime": 1.5215,
6
- "eval_samples_per_second": 118.965,
7
- "eval_steps_per_second": 3.944
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9911111111111112,
4
+ "eval_loss": 0.02681696228682995,
5
+ "eval_runtime": 1.8717,
6
+ "eval_samples_per_second": 120.214,
7
+ "eval_steps_per_second": 4.274
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 18.823529411764707,
3
- "total_flos": 7.600391915087462e+17,
4
- "train_loss": 0.3423821290334066,
5
- "train_runtime": 426.6472,
6
- "train_samples_per_second": 76.128,
7
- "train_steps_per_second": 0.563
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "total_flos": 1.5048656676458496e+17,
4
+ "train_loss": 0.5111757349222898,
5
+ "train_runtime": 85.6924,
6
+ "train_samples_per_second": 470.987,
7
+ "train_steps_per_second": 3.734
8
  }
trainer_state.json CHANGED
@@ -1,364 +1,80 @@
1
  {
2
- "best_metric": 0.9116022099447514,
3
- "best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-51",
4
- "epoch": 18.823529411764707,
5
  "eval_steps": 500,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.7843137254901961,
13
- "grad_norm": 5.9126081466674805,
14
- "learning_rate": 2.0833333333333336e-05,
15
- "loss": 1.3069,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.9411764705882353,
20
- "eval_accuracy": 0.6077348066298343,
21
- "eval_loss": 0.9999544024467468,
22
- "eval_runtime": 1.4322,
23
- "eval_samples_per_second": 126.379,
24
- "eval_steps_per_second": 4.189,
25
- "step": 12
26
  },
27
  {
28
- "epoch": 1.5686274509803921,
29
- "grad_norm": 7.789252758026123,
30
- "learning_rate": 4.166666666666667e-05,
31
- "loss": 0.8924,
32
  "step": 20
33
  },
34
  {
35
- "epoch": 1.9607843137254903,
36
- "eval_accuracy": 0.8784530386740331,
37
- "eval_loss": 0.43337252736091614,
38
- "eval_runtime": 1.4151,
39
- "eval_samples_per_second": 127.902,
40
- "eval_steps_per_second": 4.24,
41
- "step": 25
42
- },
43
- {
44
- "epoch": 2.3529411764705883,
45
- "grad_norm": 6.7294697761535645,
46
- "learning_rate": 4.8611111111111115e-05,
47
- "loss": 0.5365,
48
  "step": 30
49
  },
50
  {
51
- "epoch": 2.980392156862745,
52
- "eval_accuracy": 0.9005524861878453,
53
- "eval_loss": 0.3143160939216614,
54
- "eval_runtime": 1.4092,
55
- "eval_samples_per_second": 128.446,
56
- "eval_steps_per_second": 4.258,
57
- "step": 38
58
  },
59
  {
60
- "epoch": 3.1372549019607843,
61
- "grad_norm": 8.53775691986084,
62
- "learning_rate": 4.62962962962963e-05,
63
- "loss": 0.4119,
64
  "step": 40
65
  },
66
  {
67
- "epoch": 3.9215686274509802,
68
- "grad_norm": 8.774258613586426,
69
- "learning_rate": 4.3981481481481486e-05,
70
- "loss": 0.3814,
71
- "step": 50
72
- },
73
- {
74
- "epoch": 4.0,
75
- "eval_accuracy": 0.9116022099447514,
76
- "eval_loss": 0.28710222244262695,
77
- "eval_runtime": 1.4257,
78
- "eval_samples_per_second": 126.96,
79
- "eval_steps_per_second": 4.209,
80
- "step": 51
81
- },
82
- {
83
- "epoch": 4.705882352941177,
84
- "grad_norm": 5.285433769226074,
85
- "learning_rate": 4.166666666666667e-05,
86
- "loss": 0.3336,
87
- "step": 60
88
- },
89
- {
90
- "epoch": 4.9411764705882355,
91
- "eval_accuracy": 0.9116022099447514,
92
- "eval_loss": 0.2963091731071472,
93
- "eval_runtime": 1.415,
94
- "eval_samples_per_second": 127.916,
95
- "eval_steps_per_second": 4.24,
96
- "step": 63
97
- },
98
- {
99
- "epoch": 5.490196078431373,
100
- "grad_norm": 7.04965353012085,
101
- "learning_rate": 3.935185185185186e-05,
102
- "loss": 0.353,
103
- "step": 70
104
- },
105
- {
106
- "epoch": 5.96078431372549,
107
- "eval_accuracy": 0.8729281767955801,
108
- "eval_loss": 0.31954672932624817,
109
- "eval_runtime": 1.4556,
110
- "eval_samples_per_second": 124.344,
111
- "eval_steps_per_second": 4.122,
112
- "step": 76
113
- },
114
- {
115
- "epoch": 6.2745098039215685,
116
- "grad_norm": 5.833162307739258,
117
- "learning_rate": 3.7037037037037037e-05,
118
- "loss": 0.3069,
119
- "step": 80
120
- },
121
- {
122
- "epoch": 6.980392156862745,
123
- "eval_accuracy": 0.9116022099447514,
124
- "eval_loss": 0.29521241784095764,
125
- "eval_runtime": 1.4158,
126
- "eval_samples_per_second": 127.846,
127
- "eval_steps_per_second": 4.238,
128
- "step": 89
129
- },
130
- {
131
- "epoch": 7.0588235294117645,
132
- "grad_norm": 5.050061225891113,
133
- "learning_rate": 3.472222222222222e-05,
134
- "loss": 0.2789,
135
- "step": 90
136
- },
137
- {
138
- "epoch": 7.8431372549019605,
139
- "grad_norm": 4.222379207611084,
140
- "learning_rate": 3.240740740740741e-05,
141
- "loss": 0.293,
142
- "step": 100
143
- },
144
- {
145
- "epoch": 8.0,
146
- "eval_accuracy": 0.8895027624309392,
147
- "eval_loss": 0.3174145817756653,
148
- "eval_runtime": 1.4186,
149
- "eval_samples_per_second": 127.591,
150
- "eval_steps_per_second": 4.23,
151
- "step": 102
152
- },
153
- {
154
- "epoch": 8.627450980392156,
155
- "grad_norm": 7.039156436920166,
156
- "learning_rate": 3.0092592592592593e-05,
157
- "loss": 0.2667,
158
- "step": 110
159
- },
160
- {
161
- "epoch": 8.941176470588236,
162
- "eval_accuracy": 0.8950276243093923,
163
- "eval_loss": 0.3225868344306946,
164
- "eval_runtime": 1.4137,
165
- "eval_samples_per_second": 128.03,
166
- "eval_steps_per_second": 4.244,
167
- "step": 114
168
- },
169
- {
170
- "epoch": 9.411764705882353,
171
- "grad_norm": 5.598822593688965,
172
- "learning_rate": 2.777777777777778e-05,
173
- "loss": 0.2424,
174
- "step": 120
175
- },
176
- {
177
- "epoch": 9.96078431372549,
178
- "eval_accuracy": 0.8895027624309392,
179
- "eval_loss": 0.3213161826133728,
180
- "eval_runtime": 1.4313,
181
- "eval_samples_per_second": 126.455,
182
- "eval_steps_per_second": 4.192,
183
- "step": 127
184
- },
185
- {
186
- "epoch": 10.196078431372548,
187
- "grad_norm": 4.785697937011719,
188
- "learning_rate": 2.5462962962962965e-05,
189
- "loss": 0.2544,
190
- "step": 130
191
- },
192
- {
193
- "epoch": 10.980392156862745,
194
- "grad_norm": 5.349719047546387,
195
- "learning_rate": 2.314814814814815e-05,
196
- "loss": 0.2605,
197
- "step": 140
198
- },
199
- {
200
- "epoch": 10.980392156862745,
201
- "eval_accuracy": 0.8895027624309392,
202
- "eval_loss": 0.31716108322143555,
203
- "eval_runtime": 1.4269,
204
- "eval_samples_per_second": 126.846,
205
- "eval_steps_per_second": 4.205,
206
- "step": 140
207
- },
208
- {
209
- "epoch": 11.764705882352942,
210
- "grad_norm": 6.121713161468506,
211
- "learning_rate": 2.0833333333333336e-05,
212
- "loss": 0.232,
213
- "step": 150
214
- },
215
- {
216
- "epoch": 12.0,
217
- "eval_accuracy": 0.8895027624309392,
218
- "eval_loss": 0.33846884965896606,
219
- "eval_runtime": 1.406,
220
- "eval_samples_per_second": 128.737,
221
- "eval_steps_per_second": 4.268,
222
- "step": 153
223
- },
224
- {
225
- "epoch": 12.549019607843137,
226
- "grad_norm": 7.647618770599365,
227
- "learning_rate": 1.8518518518518518e-05,
228
- "loss": 0.242,
229
- "step": 160
230
- },
231
- {
232
- "epoch": 12.941176470588236,
233
- "eval_accuracy": 0.8950276243093923,
234
- "eval_loss": 0.32744264602661133,
235
- "eval_runtime": 1.4273,
236
- "eval_samples_per_second": 126.813,
237
- "eval_steps_per_second": 4.204,
238
- "step": 165
239
- },
240
- {
241
- "epoch": 13.333333333333334,
242
- "grad_norm": 6.248785972595215,
243
- "learning_rate": 1.6203703703703704e-05,
244
- "loss": 0.215,
245
- "step": 170
246
- },
247
- {
248
- "epoch": 13.96078431372549,
249
- "eval_accuracy": 0.8950276243093923,
250
- "eval_loss": 0.33850720524787903,
251
- "eval_runtime": 1.4433,
252
- "eval_samples_per_second": 125.407,
253
- "eval_steps_per_second": 4.157,
254
- "step": 178
255
- },
256
- {
257
- "epoch": 14.117647058823529,
258
- "grad_norm": 5.345800876617432,
259
- "learning_rate": 1.388888888888889e-05,
260
- "loss": 0.2123,
261
- "step": 180
262
- },
263
- {
264
- "epoch": 14.901960784313726,
265
- "grad_norm": 5.421293258666992,
266
- "learning_rate": 1.1574074074074075e-05,
267
- "loss": 0.2131,
268
- "step": 190
269
- },
270
- {
271
- "epoch": 14.980392156862745,
272
- "eval_accuracy": 0.8950276243093923,
273
- "eval_loss": 0.34223416447639465,
274
- "eval_runtime": 1.409,
275
- "eval_samples_per_second": 128.462,
276
- "eval_steps_per_second": 4.258,
277
- "step": 191
278
- },
279
- {
280
- "epoch": 15.686274509803921,
281
- "grad_norm": 4.188720703125,
282
- "learning_rate": 9.259259259259259e-06,
283
- "loss": 0.201,
284
- "step": 200
285
- },
286
- {
287
- "epoch": 16.0,
288
- "eval_accuracy": 0.8784530386740331,
289
- "eval_loss": 0.341948539018631,
290
- "eval_runtime": 1.4191,
291
- "eval_samples_per_second": 127.549,
292
- "eval_steps_per_second": 4.228,
293
- "step": 204
294
- },
295
- {
296
- "epoch": 16.470588235294116,
297
- "grad_norm": 4.887516498565674,
298
- "learning_rate": 6.944444444444445e-06,
299
- "loss": 0.1976,
300
- "step": 210
301
- },
302
- {
303
- "epoch": 16.941176470588236,
304
- "eval_accuracy": 0.9005524861878453,
305
- "eval_loss": 0.3447644114494324,
306
- "eval_runtime": 1.4043,
307
- "eval_samples_per_second": 128.89,
308
- "eval_steps_per_second": 4.273,
309
- "step": 216
310
- },
311
- {
312
- "epoch": 17.254901960784313,
313
- "grad_norm": 5.946260452270508,
314
- "learning_rate": 4.6296296296296296e-06,
315
- "loss": 0.1886,
316
- "step": 220
317
- },
318
- {
319
- "epoch": 17.96078431372549,
320
- "eval_accuracy": 0.8895027624309392,
321
- "eval_loss": 0.3459985554218292,
322
- "eval_runtime": 1.4593,
323
- "eval_samples_per_second": 124.035,
324
- "eval_steps_per_second": 4.112,
325
- "step": 229
326
- },
327
- {
328
- "epoch": 18.03921568627451,
329
- "grad_norm": 6.021714210510254,
330
- "learning_rate": 2.3148148148148148e-06,
331
- "loss": 0.2,
332
- "step": 230
333
- },
334
- {
335
- "epoch": 18.823529411764707,
336
- "grad_norm": 3.8110241889953613,
337
- "learning_rate": 0.0,
338
- "loss": 0.1972,
339
- "step": 240
340
- },
341
- {
342
- "epoch": 18.823529411764707,
343
- "eval_accuracy": 0.8895027624309392,
344
- "eval_loss": 0.34530630707740784,
345
- "eval_runtime": 1.454,
346
- "eval_samples_per_second": 124.486,
347
- "eval_steps_per_second": 4.127,
348
- "step": 240
349
  },
350
  {
351
- "epoch": 18.823529411764707,
352
- "step": 240,
353
- "total_flos": 7.600391915087462e+17,
354
- "train_loss": 0.3423821290334066,
355
- "train_runtime": 426.6472,
356
- "train_samples_per_second": 76.128,
357
- "train_steps_per_second": 0.563
358
  }
359
  ],
360
  "logging_steps": 10,
361
- "max_steps": 240,
362
  "num_input_tokens_seen": 0,
363
  "num_train_epochs": 20,
364
  "save_steps": 500,
@@ -383,7 +99,7 @@
383
  "attributes": {}
384
  }
385
  },
386
- "total_flos": 7.600391915087462e+17,
387
  "train_batch_size": 32,
388
  "trial_name": null,
389
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9911111111111112,
3
+ "best_model_checkpoint": "/kaggle/working/swin-brain-modality-classification/checkpoint-48",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.625,
13
+ "grad_norm": 9.655741691589355,
14
+ "learning_rate": 1.5625e-05,
15
+ "loss": 1.364,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.8266666666666667,
21
+ "eval_loss": 0.6507545113563538,
22
+ "eval_runtime": 1.8483,
23
+ "eval_samples_per_second": 121.736,
24
+ "eval_steps_per_second": 4.328,
25
+ "step": 16
26
  },
27
  {
28
+ "epoch": 1.25,
29
+ "grad_norm": 6.807246685028076,
30
+ "learning_rate": 3.125e-05,
31
+ "loss": 0.7521,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 1.875,
36
+ "grad_norm": 5.148420333862305,
37
+ "learning_rate": 4.6875e-05,
38
+ "loss": 0.2053,
 
 
 
 
 
 
 
 
 
39
  "step": 30
40
  },
41
  {
42
+ "epoch": 2.0,
43
+ "eval_accuracy": 0.9555555555555556,
44
+ "eval_loss": 0.08017182350158691,
45
+ "eval_runtime": 1.8504,
46
+ "eval_samples_per_second": 121.592,
47
+ "eval_steps_per_second": 4.323,
48
+ "step": 32
49
  },
50
  {
51
+ "epoch": 2.5,
52
+ "grad_norm": 9.427779197692871,
53
+ "learning_rate": 4.8611111111111115e-05,
54
+ "loss": 0.0841,
55
  "step": 40
56
  },
57
  {
58
+ "epoch": 3.0,
59
+ "eval_accuracy": 0.9911111111111112,
60
+ "eval_loss": 0.02681696228682995,
61
+ "eval_runtime": 1.886,
62
+ "eval_samples_per_second": 119.3,
63
+ "eval_steps_per_second": 4.242,
64
+ "step": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  },
66
  {
67
+ "epoch": 3.0,
68
+ "step": 48,
69
+ "total_flos": 1.5048656676458496e+17,
70
+ "train_loss": 0.5111757349222898,
71
+ "train_runtime": 85.6924,
72
+ "train_samples_per_second": 470.987,
73
+ "train_steps_per_second": 3.734
74
  }
75
  ],
76
  "logging_steps": 10,
77
+ "max_steps": 320,
78
  "num_input_tokens_seen": 0,
79
  "num_train_epochs": 20,
80
  "save_steps": 500,
 
99
  "attributes": {}
100
  }
101
  },
102
+ "total_flos": 1.5048656676458496e+17,
103
  "train_batch_size": 32,
104
  "trial_name": null,
105
  "trial_params": null