bmedeiros commited on
Commit
494c434
·
verified ·
1 Parent(s): 194aa8e

End of training

Browse files
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
- value: 0.7395626242544732
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,8 +33,8 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.5320
37
- - Accuracy: 0.7396
38
 
39
  ## Model description
40
 
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.8071570576540755
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.4697
37
+ - Accuracy: 0.8072
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
- "epoch": 38.4,
3
- "total_flos": 7.390182645274706e+17,
4
- "train_loss": 0.35739548206329347,
5
- "train_runtime": 377.1425,
6
- "train_samples_per_second": 82.091,
7
- "train_steps_per_second": 0.636
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 53.333333333333336,
3
+ "eval_accuracy": 0.8071570576540755,
4
+ "eval_loss": 0.46969088912010193,
5
+ "eval_runtime": 3.0372,
6
+ "eval_samples_per_second": 165.611,
7
+ "eval_steps_per_second": 5.268,
8
+ "total_flos": 1.140094502803243e+18,
9
+ "train_loss": 0.34168325927522447,
10
+ "train_runtime": 709.0525,
11
+ "train_samples_per_second": 72.773,
12
+ "train_steps_per_second": 0.508
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.9936034115138592,
3
- "eval_accuracy": 0.9696,
4
- "eval_loss": 0.08869826793670654,
5
- "eval_runtime": 29.3551,
6
- "eval_samples_per_second": 170.328,
7
- "eval_steps_per_second": 5.348
8
  }
 
1
  {
2
+ "epoch": 53.333333333333336,
3
+ "eval_accuracy": 0.8071570576540755,
4
+ "eval_loss": 0.46969088912010193,
5
+ "eval_runtime": 3.0372,
6
+ "eval_samples_per_second": 165.611,
7
+ "eval_steps_per_second": 5.268
8
  }
runs/Dec09_18-28-48_c9ba084c85bf/events.out.tfevents.1733769728.c9ba084c85bf.5668.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:566e64b29ae31d91cbd718c6da762c24c347a2cf668ad17054c84f497b0a6681
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 38.4,
3
- "total_flos": 7.390182645274706e+17,
4
- "train_loss": 0.35739548206329347,
5
- "train_runtime": 377.1425,
6
- "train_samples_per_second": 82.091,
7
- "train_steps_per_second": 0.636
8
  }
 
1
  {
2
+ "epoch": 53.333333333333336,
3
+ "total_flos": 1.140094502803243e+18,
4
+ "train_loss": 0.34168325927522447,
5
+ "train_runtime": 709.0525,
6
+ "train_samples_per_second": 72.773,
7
+ "train_steps_per_second": 0.508
8
  }
trainer_state.json CHANGED
@@ -1,546 +1,765 @@
1
  {
2
- "best_metric": 0.8023255813953488,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-100",
4
- "epoch": 38.4,
5
  "eval_steps": 500,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.96,
13
- "eval_accuracy": 0.686046511627907,
14
- "eval_loss": 0.5580831170082092,
15
- "eval_runtime": 0.4689,
16
- "eval_samples_per_second": 183.412,
17
- "eval_steps_per_second": 6.398,
18
  "step": 6
19
  },
20
  {
21
- "epoch": 1.6,
22
- "grad_norm": 11.4143648147583,
23
- "learning_rate": 2.0833333333333336e-05,
24
- "loss": 0.5082,
25
  "step": 10
26
  },
27
  {
28
- "epoch": 1.92,
29
- "eval_accuracy": 0.6627906976744186,
30
- "eval_loss": 0.5865343809127808,
31
- "eval_runtime": 0.4656,
32
- "eval_samples_per_second": 184.706,
33
- "eval_steps_per_second": 6.443,
34
- "step": 12
35
  },
36
  {
37
- "epoch": 2.88,
38
- "eval_accuracy": 0.686046511627907,
39
- "eval_loss": 0.5983394980430603,
40
- "eval_runtime": 0.457,
41
- "eval_samples_per_second": 188.165,
42
- "eval_steps_per_second": 6.564,
43
- "step": 18
44
  },
45
  {
46
- "epoch": 3.2,
47
- "grad_norm": 43.284976959228516,
48
- "learning_rate": 4.166666666666667e-05,
49
- "loss": 0.4618,
 
 
50
  "step": 20
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.6744186046511628,
55
- "eval_loss": 0.6791186928749084,
56
- "eval_runtime": 0.4897,
57
- "eval_samples_per_second": 175.601,
58
- "eval_steps_per_second": 6.126,
59
- "step": 25
60
- },
61
- {
62
- "epoch": 4.8,
63
- "grad_norm": 52.168453216552734,
64
- "learning_rate": 4.8611111111111115e-05,
65
- "loss": 0.3901,
66
- "step": 30
67
  },
68
  {
69
- "epoch": 4.96,
70
- "eval_accuracy": 0.7325581395348837,
71
- "eval_loss": 0.5641953945159912,
72
- "eval_runtime": 0.4579,
73
- "eval_samples_per_second": 187.798,
74
- "eval_steps_per_second": 6.551,
75
- "step": 31
76
  },
77
  {
78
- "epoch": 5.92,
79
- "eval_accuracy": 0.7093023255813954,
80
- "eval_loss": 0.5044412612915039,
81
- "eval_runtime": 0.4414,
82
- "eval_samples_per_second": 194.839,
83
- "eval_steps_per_second": 6.797,
84
- "step": 37
85
  },
86
  {
87
- "epoch": 6.4,
88
- "grad_norm": 144.19395446777344,
89
- "learning_rate": 4.62962962962963e-05,
90
- "loss": 0.4175,
91
  "step": 40
92
  },
93
  {
94
- "epoch": 6.88,
95
- "eval_accuracy": 0.6976744186046512,
96
- "eval_loss": 0.5285409688949585,
97
- "eval_runtime": 0.4518,
98
- "eval_samples_per_second": 190.363,
99
- "eval_steps_per_second": 6.641,
100
- "step": 43
101
  },
102
  {
103
- "epoch": 8.0,
104
- "grad_norm": 22.712488174438477,
105
- "learning_rate": 4.3981481481481486e-05,
106
- "loss": 0.4308,
107
- "step": 50
 
 
108
  },
109
  {
110
- "epoch": 8.0,
111
- "eval_accuracy": 0.7093023255813954,
112
- "eval_loss": 0.5152430534362793,
113
- "eval_runtime": 0.4733,
114
- "eval_samples_per_second": 181.688,
115
- "eval_steps_per_second": 6.338,
116
  "step": 50
117
  },
118
  {
119
- "epoch": 8.96,
120
- "eval_accuracy": 0.7209302325581395,
121
- "eval_loss": 0.5627985000610352,
122
- "eval_runtime": 0.4653,
123
- "eval_samples_per_second": 184.819,
124
- "eval_steps_per_second": 6.447,
125
- "step": 56
126
  },
127
  {
128
- "epoch": 9.6,
129
- "grad_norm": 19.49258041381836,
130
- "learning_rate": 4.166666666666667e-05,
131
- "loss": 0.3998,
132
  "step": 60
133
  },
134
  {
135
- "epoch": 9.92,
136
- "eval_accuracy": 0.7674418604651163,
137
- "eval_loss": 0.5401325821876526,
138
- "eval_runtime": 0.4492,
139
- "eval_samples_per_second": 191.442,
140
- "eval_steps_per_second": 6.678,
141
- "step": 62
142
  },
143
  {
144
- "epoch": 10.88,
145
- "eval_accuracy": 0.7790697674418605,
146
- "eval_loss": 0.5199148654937744,
147
- "eval_runtime": 0.4492,
148
- "eval_samples_per_second": 191.438,
149
- "eval_steps_per_second": 6.678,
150
- "step": 68
151
  },
152
  {
153
- "epoch": 11.2,
154
- "grad_norm": 16.36634063720703,
155
- "learning_rate": 3.935185185185186e-05,
156
- "loss": 0.3682,
157
  "step": 70
158
  },
159
  {
160
- "epoch": 12.0,
161
- "eval_accuracy": 0.7906976744186046,
162
- "eval_loss": 0.5042721033096313,
163
- "eval_runtime": 0.4513,
164
- "eval_samples_per_second": 190.572,
165
- "eval_steps_per_second": 6.648,
166
- "step": 75
167
  },
168
  {
169
- "epoch": 12.8,
170
- "grad_norm": 20.526153564453125,
171
- "learning_rate": 3.7037037037037037e-05,
172
- "loss": 0.3528,
173
  "step": 80
174
  },
175
  {
176
- "epoch": 12.96,
177
- "eval_accuracy": 0.7790697674418605,
178
- "eval_loss": 0.47959619760513306,
179
- "eval_runtime": 0.4571,
180
- "eval_samples_per_second": 188.126,
181
- "eval_steps_per_second": 6.563,
182
  "step": 81
183
  },
184
  {
185
- "epoch": 13.92,
186
- "eval_accuracy": 0.7790697674418605,
187
- "eval_loss": 0.4937508702278137,
188
- "eval_runtime": 0.4685,
189
- "eval_samples_per_second": 183.563,
190
- "eval_steps_per_second": 6.403,
191
  "step": 87
192
  },
193
  {
194
- "epoch": 14.4,
195
- "grad_norm": 24.452070236206055,
196
- "learning_rate": 3.472222222222222e-05,
197
- "loss": 0.3324,
198
  "step": 90
199
  },
200
  {
201
- "epoch": 14.88,
202
- "eval_accuracy": 0.7558139534883721,
203
- "eval_loss": 0.4879016876220703,
204
- "eval_runtime": 0.461,
205
- "eval_samples_per_second": 186.569,
206
- "eval_steps_per_second": 6.508,
207
- "step": 93
208
  },
209
  {
210
- "epoch": 16.0,
211
- "grad_norm": 20.879064559936523,
212
- "learning_rate": 3.240740740740741e-05,
213
- "loss": 0.3579,
214
  "step": 100
215
  },
216
  {
217
- "epoch": 16.0,
218
- "eval_accuracy": 0.8023255813953488,
219
- "eval_loss": 0.4972393214702606,
220
- "eval_runtime": 0.4593,
221
- "eval_samples_per_second": 187.231,
222
- "eval_steps_per_second": 6.531,
223
- "step": 100
224
  },
225
  {
226
- "epoch": 16.96,
227
- "eval_accuracy": 0.7674418604651163,
228
- "eval_loss": 0.45788562297821045,
229
- "eval_runtime": 0.4452,
230
- "eval_samples_per_second": 193.187,
231
- "eval_steps_per_second": 6.739,
232
- "step": 106
233
- },
234
- {
235
- "epoch": 17.6,
236
- "grad_norm": 30.157981872558594,
237
- "learning_rate": 3.0092592592592593e-05,
238
- "loss": 0.3566,
239
  "step": 110
240
  },
241
  {
242
- "epoch": 17.92,
243
- "eval_accuracy": 0.7790697674418605,
244
- "eval_loss": 0.48914143443107605,
245
- "eval_runtime": 0.4757,
246
- "eval_samples_per_second": 180.776,
247
- "eval_steps_per_second": 6.306,
248
- "step": 112
249
  },
250
  {
251
- "epoch": 18.88,
252
- "eval_accuracy": 0.8023255813953488,
253
- "eval_loss": 0.4653853178024292,
254
- "eval_runtime": 0.4882,
255
- "eval_samples_per_second": 176.154,
256
- "eval_steps_per_second": 6.145,
257
- "step": 118
258
- },
259
- {
260
- "epoch": 19.2,
261
- "grad_norm": 18.67655372619629,
262
- "learning_rate": 2.777777777777778e-05,
263
- "loss": 0.3382,
264
  "step": 120
265
  },
266
  {
267
- "epoch": 20.0,
268
- "eval_accuracy": 0.7906976744186046,
269
- "eval_loss": 0.46721386909484863,
270
- "eval_runtime": 0.4644,
271
- "eval_samples_per_second": 185.204,
272
- "eval_steps_per_second": 6.461,
273
- "step": 125
274
- },
275
- {
276
- "epoch": 20.8,
277
- "grad_norm": 16.18861198425293,
278
- "learning_rate": 2.5462962962962965e-05,
279
- "loss": 0.3534,
280
- "step": 130
281
  },
282
  {
283
- "epoch": 20.96,
284
- "eval_accuracy": 0.7790697674418605,
285
- "eval_loss": 0.4687628149986267,
286
- "eval_runtime": 0.4743,
287
- "eval_samples_per_second": 181.318,
288
- "eval_steps_per_second": 6.325,
289
- "step": 131
290
  },
291
  {
292
- "epoch": 21.92,
293
- "eval_accuracy": 0.7558139534883721,
294
- "eval_loss": 0.48910030722618103,
295
- "eval_runtime": 0.4909,
296
- "eval_samples_per_second": 175.174,
297
- "eval_steps_per_second": 6.111,
298
- "step": 137
299
  },
300
  {
301
- "epoch": 22.4,
302
- "grad_norm": 16.807844161987305,
303
- "learning_rate": 2.314814814814815e-05,
304
- "loss": 0.3462,
 
 
 
 
 
 
 
 
 
305
  "step": 140
306
  },
307
  {
308
- "epoch": 22.88,
309
- "eval_accuracy": 0.7441860465116279,
310
- "eval_loss": 0.502510130405426,
311
- "eval_runtime": 0.4792,
312
- "eval_samples_per_second": 179.482,
313
- "eval_steps_per_second": 6.261,
314
- "step": 143
315
  },
316
  {
317
- "epoch": 24.0,
318
- "grad_norm": 37.028865814208984,
319
- "learning_rate": 2.0833333333333336e-05,
320
- "loss": 0.3208,
321
- "step": 150
 
 
322
  },
323
  {
324
- "epoch": 24.0,
325
- "eval_accuracy": 0.7674418604651163,
326
- "eval_loss": 0.5026240944862366,
327
- "eval_runtime": 0.4788,
328
- "eval_samples_per_second": 179.607,
329
- "eval_steps_per_second": 6.265,
330
  "step": 150
331
  },
332
  {
333
- "epoch": 24.96,
334
- "eval_accuracy": 0.7674418604651163,
335
- "eval_loss": 0.493563175201416,
336
- "eval_runtime": 0.4917,
337
- "eval_samples_per_second": 174.896,
338
- "eval_steps_per_second": 6.101,
339
- "step": 156
340
  },
341
  {
342
- "epoch": 25.6,
343
- "grad_norm": 10.407114028930664,
344
- "learning_rate": 1.8518518518518518e-05,
345
- "loss": 0.3408,
346
  "step": 160
347
  },
348
  {
349
- "epoch": 25.92,
350
- "eval_accuracy": 0.7790697674418605,
351
- "eval_loss": 0.46066081523895264,
352
- "eval_runtime": 0.4807,
353
- "eval_samples_per_second": 178.913,
354
- "eval_steps_per_second": 6.241,
355
  "step": 162
356
  },
357
  {
358
- "epoch": 26.88,
359
- "eval_accuracy": 0.7906976744186046,
360
- "eval_loss": 0.4420366883277893,
361
- "eval_runtime": 0.4813,
362
- "eval_samples_per_second": 178.692,
363
- "eval_steps_per_second": 6.233,
364
  "step": 168
365
  },
366
  {
367
- "epoch": 27.2,
368
- "grad_norm": 10.114925384521484,
369
- "learning_rate": 1.6203703703703704e-05,
370
- "loss": 0.333,
371
  "step": 170
372
  },
373
  {
374
- "epoch": 28.0,
375
- "eval_accuracy": 0.7906976744186046,
376
- "eval_loss": 0.4296189248561859,
377
- "eval_runtime": 0.4717,
378
- "eval_samples_per_second": 182.312,
379
- "eval_steps_per_second": 6.36,
380
  "step": 175
381
  },
382
  {
383
- "epoch": 28.8,
384
- "grad_norm": 16.923192977905273,
385
- "learning_rate": 1.388888888888889e-05,
386
- "loss": 0.3169,
387
  "step": 180
388
  },
389
  {
390
- "epoch": 28.96,
391
- "eval_accuracy": 0.7906976744186046,
392
- "eval_loss": 0.4503108263015747,
393
- "eval_runtime": 0.4671,
394
- "eval_samples_per_second": 184.114,
395
- "eval_steps_per_second": 6.423,
396
- "step": 181
397
  },
398
  {
399
- "epoch": 29.92,
400
- "eval_accuracy": 0.8023255813953488,
401
- "eval_loss": 0.4894145727157593,
402
- "eval_runtime": 0.4851,
403
- "eval_samples_per_second": 177.272,
404
- "eval_steps_per_second": 6.184,
405
- "step": 187
406
  },
407
  {
408
- "epoch": 30.4,
409
- "grad_norm": 16.85201072692871,
410
- "learning_rate": 1.1574074074074075e-05,
411
- "loss": 0.3267,
412
  "step": 190
413
  },
414
  {
415
- "epoch": 30.88,
416
- "eval_accuracy": 0.7906976744186046,
417
- "eval_loss": 0.4838241636753082,
418
- "eval_runtime": 0.4795,
419
- "eval_samples_per_second": 179.37,
420
- "eval_steps_per_second": 6.257,
421
- "step": 193
422
  },
423
  {
424
- "epoch": 32.0,
425
- "grad_norm": 19.141088485717773,
426
- "learning_rate": 9.259259259259259e-06,
427
- "loss": 0.3114,
428
  "step": 200
429
  },
430
  {
431
- "epoch": 32.0,
432
- "eval_accuracy": 0.7790697674418605,
433
- "eval_loss": 0.5220069289207458,
434
- "eval_runtime": 0.5078,
435
- "eval_samples_per_second": 169.353,
436
- "eval_steps_per_second": 5.908,
437
- "step": 200
438
  },
439
  {
440
- "epoch": 32.96,
441
- "eval_accuracy": 0.7790697674418605,
442
- "eval_loss": 0.49199116230010986,
443
- "eval_runtime": 0.4644,
444
- "eval_samples_per_second": 185.186,
445
- "eval_steps_per_second": 6.46,
446
- "step": 206
447
  },
448
  {
449
- "epoch": 33.6,
450
- "grad_norm": 27.817716598510742,
451
- "learning_rate": 6.944444444444445e-06,
452
- "loss": 0.3143,
453
  "step": 210
454
  },
455
  {
456
- "epoch": 33.92,
457
- "eval_accuracy": 0.7441860465116279,
458
- "eval_loss": 0.48098844289779663,
459
- "eval_runtime": 0.4803,
460
- "eval_samples_per_second": 179.049,
461
- "eval_steps_per_second": 6.246,
462
- "step": 212
 
 
 
 
 
 
 
463
  },
464
  {
465
- "epoch": 34.88,
466
- "eval_accuracy": 0.7558139534883721,
467
- "eval_loss": 0.4828723073005676,
468
- "eval_runtime": 0.492,
469
- "eval_samples_per_second": 174.781,
470
- "eval_steps_per_second": 6.097,
471
- "step": 218
472
  },
473
  {
474
- "epoch": 35.2,
475
- "grad_norm": 46.449974060058594,
476
- "learning_rate": 4.6296296296296296e-06,
477
- "loss": 0.3034,
478
- "step": 220
 
 
479
  },
480
  {
481
- "epoch": 36.0,
482
- "eval_accuracy": 0.7674418604651163,
483
- "eval_loss": 0.4835253953933716,
484
- "eval_runtime": 0.496,
485
- "eval_samples_per_second": 173.392,
486
- "eval_steps_per_second": 6.049,
487
- "step": 225
488
- },
489
- {
490
- "epoch": 36.8,
491
- "grad_norm": 18.135772705078125,
492
- "learning_rate": 2.3148148148148148e-06,
493
- "loss": 0.2944,
494
  "step": 230
495
  },
496
  {
497
- "epoch": 36.96,
498
- "eval_accuracy": 0.7790697674418605,
499
- "eval_loss": 0.4811899960041046,
500
- "eval_runtime": 0.4612,
501
- "eval_samples_per_second": 186.468,
502
- "eval_steps_per_second": 6.505,
503
- "step": 231
504
  },
505
  {
506
- "epoch": 37.92,
507
- "eval_accuracy": 0.7674418604651163,
508
- "eval_loss": 0.47796696424484253,
509
- "eval_runtime": 0.4674,
510
- "eval_samples_per_second": 183.98,
511
- "eval_steps_per_second": 6.418,
512
- "step": 237
513
  },
514
  {
515
- "epoch": 38.4,
516
- "grad_norm": 20.760650634765625,
517
- "learning_rate": 0.0,
518
- "loss": 0.3018,
519
- "step": 240
 
 
520
  },
521
  {
522
- "epoch": 38.4,
523
- "eval_accuracy": 0.7674418604651163,
524
- "eval_loss": 0.4778375029563904,
525
- "eval_runtime": 0.4679,
526
- "eval_samples_per_second": 183.798,
527
- "eval_steps_per_second": 6.412,
528
- "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
  },
530
  {
531
- "epoch": 38.4,
532
- "step": 240,
533
- "total_flos": 7.390182645274706e+17,
534
- "train_loss": 0.35739548206329347,
535
- "train_runtime": 377.1425,
536
- "train_samples_per_second": 82.091,
537
- "train_steps_per_second": 0.636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  }
539
  ],
540
  "logging_steps": 10,
541
- "max_steps": 240,
542
  "num_input_tokens_seen": 0,
543
- "num_train_epochs": 40,
544
  "save_steps": 500,
545
  "stateful_callbacks": {
546
  "TrainerControl": {
@@ -554,7 +773,7 @@
554
  "attributes": {}
555
  }
556
  },
557
- "total_flos": 7.390182645274706e+17,
558
  "train_batch_size": 32,
559
  "trial_name": null,
560
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8071570576540755,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-101",
4
+ "epoch": 53.333333333333336,
5
  "eval_steps": 500,
6
+ "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.8888888888888888,
13
+ "eval_accuracy": 0.6898608349900597,
14
+ "eval_loss": 0.6376240849494934,
15
+ "eval_runtime": 2.8661,
16
+ "eval_samples_per_second": 175.501,
17
+ "eval_steps_per_second": 5.583,
18
  "step": 6
19
  },
20
  {
21
+ "epoch": 1.4814814814814814,
22
+ "grad_norm": 9.892236709594727,
23
+ "learning_rate": 1.388888888888889e-05,
24
+ "loss": 0.6757,
25
  "step": 10
26
  },
27
  {
28
+ "epoch": 1.925925925925926,
29
+ "eval_accuracy": 0.6938369781312127,
30
+ "eval_loss": 0.6052560806274414,
31
+ "eval_runtime": 2.8417,
32
+ "eval_samples_per_second": 177.006,
33
+ "eval_steps_per_second": 5.63,
34
+ "step": 13
35
  },
36
  {
37
+ "epoch": 2.962962962962963,
38
+ "grad_norm": 8.984474182128906,
39
+ "learning_rate": 2.777777777777778e-05,
40
+ "loss": 0.5472,
41
+ "step": 20
 
 
42
  },
43
  {
44
+ "epoch": 2.962962962962963,
45
+ "eval_accuracy": 0.7256461232604374,
46
+ "eval_loss": 0.5903410315513611,
47
+ "eval_runtime": 2.7461,
48
+ "eval_samples_per_second": 183.169,
49
+ "eval_steps_per_second": 5.826,
50
  "step": 20
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.731610337972167,
55
+ "eval_loss": 0.5782421827316284,
56
+ "eval_runtime": 2.7061,
57
+ "eval_samples_per_second": 185.878,
58
+ "eval_steps_per_second": 5.913,
59
+ "step": 27
 
 
 
 
 
 
 
60
  },
61
  {
62
+ "epoch": 4.444444444444445,
63
+ "grad_norm": 13.92780590057373,
64
+ "learning_rate": 4.166666666666667e-05,
65
+ "loss": 0.4628,
66
+ "step": 30
 
 
67
  },
68
  {
69
+ "epoch": 4.888888888888889,
70
+ "eval_accuracy": 0.7455268389662028,
71
+ "eval_loss": 0.5979239344596863,
72
+ "eval_runtime": 2.6102,
73
+ "eval_samples_per_second": 192.706,
74
+ "eval_steps_per_second": 6.13,
75
+ "step": 33
76
  },
77
  {
78
+ "epoch": 5.925925925925926,
79
+ "grad_norm": 25.355466842651367,
80
+ "learning_rate": 4.938271604938271e-05,
81
+ "loss": 0.4181,
82
  "step": 40
83
  },
84
  {
85
+ "epoch": 5.925925925925926,
86
+ "eval_accuracy": 0.7614314115308151,
87
+ "eval_loss": 0.5735302567481995,
88
+ "eval_runtime": 2.7235,
89
+ "eval_samples_per_second": 184.688,
90
+ "eval_steps_per_second": 5.875,
91
+ "step": 40
92
  },
93
  {
94
+ "epoch": 6.962962962962963,
95
+ "eval_accuracy": 0.7495029821073559,
96
+ "eval_loss": 0.5251761078834534,
97
+ "eval_runtime": 2.662,
98
+ "eval_samples_per_second": 188.956,
99
+ "eval_steps_per_second": 6.011,
100
+ "step": 47
101
  },
102
  {
103
+ "epoch": 7.407407407407407,
104
+ "grad_norm": 46.14825439453125,
105
+ "learning_rate": 4.783950617283951e-05,
106
+ "loss": 0.4079,
 
 
107
  "step": 50
108
  },
109
  {
110
+ "epoch": 8.0,
111
+ "eval_accuracy": 0.7475149105367793,
112
+ "eval_loss": 0.5362666249275208,
113
+ "eval_runtime": 2.6877,
114
+ "eval_samples_per_second": 187.151,
115
+ "eval_steps_per_second": 5.953,
116
+ "step": 54
117
  },
118
  {
119
+ "epoch": 8.88888888888889,
120
+ "grad_norm": 22.70941925048828,
121
+ "learning_rate": 4.62962962962963e-05,
122
+ "loss": 0.4102,
123
  "step": 60
124
  },
125
  {
126
+ "epoch": 8.88888888888889,
127
+ "eval_accuracy": 0.7495029821073559,
128
+ "eval_loss": 0.5288791060447693,
129
+ "eval_runtime": 2.7002,
130
+ "eval_samples_per_second": 186.285,
131
+ "eval_steps_per_second": 5.926,
132
+ "step": 60
133
  },
134
  {
135
+ "epoch": 9.925925925925926,
136
+ "eval_accuracy": 0.7534791252485089,
137
+ "eval_loss": 0.522729754447937,
138
+ "eval_runtime": 2.7539,
139
+ "eval_samples_per_second": 182.652,
140
+ "eval_steps_per_second": 5.81,
141
+ "step": 67
142
  },
143
  {
144
+ "epoch": 10.37037037037037,
145
+ "grad_norm": 92.61900329589844,
146
+ "learning_rate": 4.4753086419753084e-05,
147
+ "loss": 0.373,
148
  "step": 70
149
  },
150
  {
151
+ "epoch": 10.962962962962964,
152
+ "eval_accuracy": 0.7773359840954275,
153
+ "eval_loss": 0.46767404675483704,
154
+ "eval_runtime": 2.7441,
155
+ "eval_samples_per_second": 183.301,
156
+ "eval_steps_per_second": 5.831,
157
+ "step": 74
158
  },
159
  {
160
+ "epoch": 11.851851851851851,
161
+ "grad_norm": 13.839974403381348,
162
+ "learning_rate": 4.3209876543209875e-05,
163
+ "loss": 0.3639,
164
  "step": 80
165
  },
166
  {
167
+ "epoch": 12.0,
168
+ "eval_accuracy": 0.7813121272365805,
169
+ "eval_loss": 0.4978141188621521,
170
+ "eval_runtime": 2.7627,
171
+ "eval_samples_per_second": 182.069,
172
+ "eval_steps_per_second": 5.791,
173
  "step": 81
174
  },
175
  {
176
+ "epoch": 12.88888888888889,
177
+ "eval_accuracy": 0.7992047713717694,
178
+ "eval_loss": 0.465084969997406,
179
+ "eval_runtime": 2.7393,
180
+ "eval_samples_per_second": 183.625,
181
+ "eval_steps_per_second": 5.841,
182
  "step": 87
183
  },
184
  {
185
+ "epoch": 13.333333333333334,
186
+ "grad_norm": 12.678291320800781,
187
+ "learning_rate": 4.166666666666667e-05,
188
+ "loss": 0.3779,
189
  "step": 90
190
  },
191
  {
192
+ "epoch": 13.925925925925926,
193
+ "eval_accuracy": 0.7912524850894632,
194
+ "eval_loss": 0.4737919569015503,
195
+ "eval_runtime": 2.7321,
196
+ "eval_samples_per_second": 184.107,
197
+ "eval_steps_per_second": 5.856,
198
+ "step": 94
199
  },
200
  {
201
+ "epoch": 14.814814814814815,
202
+ "grad_norm": 13.37376594543457,
203
+ "learning_rate": 4.012345679012346e-05,
204
+ "loss": 0.3476,
205
  "step": 100
206
  },
207
  {
208
+ "epoch": 14.962962962962964,
209
+ "eval_accuracy": 0.8071570576540755,
210
+ "eval_loss": 0.46969088912010193,
211
+ "eval_runtime": 2.704,
212
+ "eval_samples_per_second": 186.017,
213
+ "eval_steps_per_second": 5.917,
214
+ "step": 101
215
  },
216
  {
217
+ "epoch": 16.0,
218
+ "eval_accuracy": 0.7952286282306164,
219
+ "eval_loss": 0.47190144658088684,
220
+ "eval_runtime": 2.651,
221
+ "eval_samples_per_second": 189.741,
222
+ "eval_steps_per_second": 6.036,
223
+ "step": 108
224
+ },
225
+ {
226
+ "epoch": 16.296296296296298,
227
+ "grad_norm": 36.415714263916016,
228
+ "learning_rate": 3.8580246913580246e-05,
229
+ "loss": 0.3467,
230
  "step": 110
231
  },
232
  {
233
+ "epoch": 16.88888888888889,
234
+ "eval_accuracy": 0.7892644135188867,
235
+ "eval_loss": 0.4551805257797241,
236
+ "eval_runtime": 2.7016,
237
+ "eval_samples_per_second": 186.184,
238
+ "eval_steps_per_second": 5.922,
239
+ "step": 114
240
  },
241
  {
242
+ "epoch": 17.77777777777778,
243
+ "grad_norm": 21.425716400146484,
244
+ "learning_rate": 3.7037037037037037e-05,
245
+ "loss": 0.3496,
 
 
 
 
 
 
 
 
 
246
  "step": 120
247
  },
248
  {
249
+ "epoch": 17.925925925925927,
250
+ "eval_accuracy": 0.7713717693836978,
251
+ "eval_loss": 0.5186495780944824,
252
+ "eval_runtime": 2.9189,
253
+ "eval_samples_per_second": 172.324,
254
+ "eval_steps_per_second": 5.481,
255
+ "step": 121
 
 
 
 
 
 
 
256
  },
257
  {
258
+ "epoch": 18.962962962962962,
259
+ "eval_accuracy": 0.7952286282306164,
260
+ "eval_loss": 0.4575484097003937,
261
+ "eval_runtime": 2.8214,
262
+ "eval_samples_per_second": 178.277,
263
+ "eval_steps_per_second": 5.671,
264
+ "step": 128
265
  },
266
  {
267
+ "epoch": 19.25925925925926,
268
+ "grad_norm": 25.632354736328125,
269
+ "learning_rate": 3.5493827160493834e-05,
270
+ "loss": 0.3657,
271
+ "step": 130
 
 
272
  },
273
  {
274
+ "epoch": 20.0,
275
+ "eval_accuracy": 0.7793240556660039,
276
+ "eval_loss": 0.476377010345459,
277
+ "eval_runtime": 2.6973,
278
+ "eval_samples_per_second": 186.481,
279
+ "eval_steps_per_second": 5.932,
280
+ "step": 135
281
+ },
282
+ {
283
+ "epoch": 20.74074074074074,
284
+ "grad_norm": 37.57546615600586,
285
+ "learning_rate": 3.395061728395062e-05,
286
+ "loss": 0.3888,
287
  "step": 140
288
  },
289
  {
290
+ "epoch": 20.88888888888889,
291
+ "eval_accuracy": 0.7713717693836978,
292
+ "eval_loss": 0.5008699297904968,
293
+ "eval_runtime": 2.7366,
294
+ "eval_samples_per_second": 183.807,
295
+ "eval_steps_per_second": 5.847,
296
+ "step": 141
297
  },
298
  {
299
+ "epoch": 21.925925925925927,
300
+ "eval_accuracy": 0.7813121272365805,
301
+ "eval_loss": 0.46731534600257874,
302
+ "eval_runtime": 2.8548,
303
+ "eval_samples_per_second": 176.192,
304
+ "eval_steps_per_second": 5.605,
305
+ "step": 148
306
  },
307
  {
308
+ "epoch": 22.22222222222222,
309
+ "grad_norm": 23.367645263671875,
310
+ "learning_rate": 3.240740740740741e-05,
311
+ "loss": 0.3236,
 
 
312
  "step": 150
313
  },
314
  {
315
+ "epoch": 22.962962962962962,
316
+ "eval_accuracy": 0.7753479125248509,
317
+ "eval_loss": 0.493118554353714,
318
+ "eval_runtime": 2.8057,
319
+ "eval_samples_per_second": 179.277,
320
+ "eval_steps_per_second": 5.703,
321
+ "step": 155
322
  },
323
  {
324
+ "epoch": 23.703703703703702,
325
+ "grad_norm": 41.31706237792969,
326
+ "learning_rate": 3.08641975308642e-05,
327
+ "loss": 0.3179,
328
  "step": 160
329
  },
330
  {
331
+ "epoch": 24.0,
332
+ "eval_accuracy": 0.7654075546719682,
333
+ "eval_loss": 0.4837174415588379,
334
+ "eval_runtime": 2.8061,
335
+ "eval_samples_per_second": 179.252,
336
+ "eval_steps_per_second": 5.702,
337
  "step": 162
338
  },
339
  {
340
+ "epoch": 24.88888888888889,
341
+ "eval_accuracy": 0.7693836978131213,
342
+ "eval_loss": 0.4651556611061096,
343
+ "eval_runtime": 2.7801,
344
+ "eval_samples_per_second": 180.927,
345
+ "eval_steps_per_second": 5.755,
346
  "step": 168
347
  },
348
  {
349
+ "epoch": 25.185185185185187,
350
+ "grad_norm": 48.94294357299805,
351
+ "learning_rate": 2.9320987654320992e-05,
352
+ "loss": 0.327,
353
  "step": 170
354
  },
355
  {
356
+ "epoch": 25.925925925925927,
357
+ "eval_accuracy": 0.7495029821073559,
358
+ "eval_loss": 0.5107513070106506,
359
+ "eval_runtime": 2.746,
360
+ "eval_samples_per_second": 183.173,
361
+ "eval_steps_per_second": 5.827,
362
  "step": 175
363
  },
364
  {
365
+ "epoch": 26.666666666666668,
366
+ "grad_norm": 17.72180938720703,
367
+ "learning_rate": 2.777777777777778e-05,
368
+ "loss": 0.3253,
369
  "step": 180
370
  },
371
  {
372
+ "epoch": 26.962962962962962,
373
+ "eval_accuracy": 0.7833001988071571,
374
+ "eval_loss": 0.4423621594905853,
375
+ "eval_runtime": 2.851,
376
+ "eval_samples_per_second": 176.432,
377
+ "eval_steps_per_second": 5.612,
378
+ "step": 182
379
  },
380
  {
381
+ "epoch": 28.0,
382
+ "eval_accuracy": 0.7335984095427436,
383
+ "eval_loss": 0.5621975064277649,
384
+ "eval_runtime": 2.8441,
385
+ "eval_samples_per_second": 176.859,
386
+ "eval_steps_per_second": 5.626,
387
+ "step": 189
388
  },
389
  {
390
+ "epoch": 28.14814814814815,
391
+ "grad_norm": 26.241844177246094,
392
+ "learning_rate": 2.623456790123457e-05,
393
+ "loss": 0.3382,
394
  "step": 190
395
  },
396
  {
397
+ "epoch": 28.88888888888889,
398
+ "eval_accuracy": 0.7693836978131213,
399
+ "eval_loss": 0.5067819952964783,
400
+ "eval_runtime": 2.8675,
401
+ "eval_samples_per_second": 175.415,
402
+ "eval_steps_per_second": 5.58,
403
+ "step": 195
404
  },
405
  {
406
+ "epoch": 29.62962962962963,
407
+ "grad_norm": 12.02319049835205,
408
+ "learning_rate": 2.4691358024691357e-05,
409
+ "loss": 0.331,
410
  "step": 200
411
  },
412
  {
413
+ "epoch": 29.925925925925927,
414
+ "eval_accuracy": 0.7693836978131213,
415
+ "eval_loss": 0.45300325751304626,
416
+ "eval_runtime": 2.7755,
417
+ "eval_samples_per_second": 181.232,
418
+ "eval_steps_per_second": 5.765,
419
+ "step": 202
420
  },
421
  {
422
+ "epoch": 30.962962962962962,
423
+ "eval_accuracy": 0.731610337972167,
424
+ "eval_loss": 0.5205386877059937,
425
+ "eval_runtime": 2.8057,
426
+ "eval_samples_per_second": 179.276,
427
+ "eval_steps_per_second": 5.703,
428
+ "step": 209
429
  },
430
  {
431
+ "epoch": 31.11111111111111,
432
+ "grad_norm": 14.40517807006836,
433
+ "learning_rate": 2.314814814814815e-05,
434
+ "loss": 0.3302,
435
  "step": 210
436
  },
437
  {
438
+ "epoch": 32.0,
439
+ "eval_accuracy": 0.7852882703777336,
440
+ "eval_loss": 0.4385511875152588,
441
+ "eval_runtime": 2.8045,
442
+ "eval_samples_per_second": 179.357,
443
+ "eval_steps_per_second": 5.705,
444
+ "step": 216
445
+ },
446
+ {
447
+ "epoch": 32.592592592592595,
448
+ "grad_norm": 18.574872970581055,
449
+ "learning_rate": 2.1604938271604937e-05,
450
+ "loss": 0.2972,
451
+ "step": 220
452
  },
453
  {
454
+ "epoch": 32.888888888888886,
455
+ "eval_accuracy": 0.7773359840954275,
456
+ "eval_loss": 0.5030562281608582,
457
+ "eval_runtime": 2.6994,
458
+ "eval_samples_per_second": 186.336,
459
+ "eval_steps_per_second": 5.927,
460
+ "step": 222
461
  },
462
  {
463
+ "epoch": 33.925925925925924,
464
+ "eval_accuracy": 0.757455268389662,
465
+ "eval_loss": 0.49088525772094727,
466
+ "eval_runtime": 2.709,
467
+ "eval_samples_per_second": 185.676,
468
+ "eval_steps_per_second": 5.906,
469
+ "step": 229
470
  },
471
  {
472
+ "epoch": 34.074074074074076,
473
+ "grad_norm": 21.681509017944336,
474
+ "learning_rate": 2.006172839506173e-05,
475
+ "loss": 0.3121,
 
 
 
 
 
 
 
 
 
476
  "step": 230
477
  },
478
  {
479
+ "epoch": 34.96296296296296,
480
+ "eval_accuracy": 0.7793240556660039,
481
+ "eval_loss": 0.47658684849739075,
482
+ "eval_runtime": 2.7374,
483
+ "eval_samples_per_second": 183.751,
484
+ "eval_steps_per_second": 5.845,
485
+ "step": 236
486
  },
487
  {
488
+ "epoch": 35.55555555555556,
489
+ "grad_norm": 29.247716903686523,
490
+ "learning_rate": 1.8518518518518518e-05,
491
+ "loss": 0.2956,
492
+ "step": 240
 
 
493
  },
494
  {
495
+ "epoch": 36.0,
496
+ "eval_accuracy": 0.7415506958250497,
497
+ "eval_loss": 0.5262213945388794,
498
+ "eval_runtime": 2.8701,
499
+ "eval_samples_per_second": 175.256,
500
+ "eval_steps_per_second": 5.575,
501
+ "step": 243
502
  },
503
  {
504
+ "epoch": 36.888888888888886,
505
+ "eval_accuracy": 0.731610337972167,
506
+ "eval_loss": 0.5373868942260742,
507
+ "eval_runtime": 2.878,
508
+ "eval_samples_per_second": 174.771,
509
+ "eval_steps_per_second": 5.559,
510
+ "step": 249
511
+ },
512
+ {
513
+ "epoch": 37.03703703703704,
514
+ "grad_norm": 13.55726146697998,
515
+ "learning_rate": 1.697530864197531e-05,
516
+ "loss": 0.2947,
517
+ "step": 250
518
+ },
519
+ {
520
+ "epoch": 37.925925925925924,
521
+ "eval_accuracy": 0.7673956262425448,
522
+ "eval_loss": 0.48880261182785034,
523
+ "eval_runtime": 2.7134,
524
+ "eval_samples_per_second": 185.373,
525
+ "eval_steps_per_second": 5.897,
526
+ "step": 256
527
+ },
528
+ {
529
+ "epoch": 38.51851851851852,
530
+ "grad_norm": 8.393943786621094,
531
+ "learning_rate": 1.54320987654321e-05,
532
+ "loss": 0.2662,
533
+ "step": 260
534
+ },
535
+ {
536
+ "epoch": 38.96296296296296,
537
+ "eval_accuracy": 0.7693836978131213,
538
+ "eval_loss": 0.4880698323249817,
539
+ "eval_runtime": 2.7354,
540
+ "eval_samples_per_second": 183.888,
541
+ "eval_steps_per_second": 5.849,
542
+ "step": 263
543
+ },
544
+ {
545
+ "epoch": 40.0,
546
+ "grad_norm": 24.057205200195312,
547
+ "learning_rate": 1.388888888888889e-05,
548
+ "loss": 0.2826,
549
+ "step": 270
550
+ },
551
+ {
552
+ "epoch": 40.0,
553
+ "eval_accuracy": 0.7892644135188867,
554
+ "eval_loss": 0.46687519550323486,
555
+ "eval_runtime": 2.7865,
556
+ "eval_samples_per_second": 180.514,
557
+ "eval_steps_per_second": 5.742,
558
+ "step": 270
559
+ },
560
+ {
561
+ "epoch": 40.888888888888886,
562
+ "eval_accuracy": 0.7972166998011928,
563
+ "eval_loss": 0.45914533734321594,
564
+ "eval_runtime": 2.8041,
565
+ "eval_samples_per_second": 179.381,
566
+ "eval_steps_per_second": 5.706,
567
+ "step": 276
568
+ },
569
+ {
570
+ "epoch": 41.48148148148148,
571
+ "grad_norm": 20.68549346923828,
572
+ "learning_rate": 1.2345679012345678e-05,
573
+ "loss": 0.2768,
574
+ "step": 280
575
+ },
576
+ {
577
+ "epoch": 41.925925925925924,
578
+ "eval_accuracy": 0.757455268389662,
579
+ "eval_loss": 0.5089908838272095,
580
+ "eval_runtime": 2.9736,
581
+ "eval_samples_per_second": 169.157,
582
+ "eval_steps_per_second": 5.381,
583
+ "step": 283
584
+ },
585
+ {
586
+ "epoch": 42.96296296296296,
587
+ "grad_norm": 9.9234619140625,
588
+ "learning_rate": 1.0802469135802469e-05,
589
+ "loss": 0.2836,
590
+ "step": 290
591
+ },
592
+ {
593
+ "epoch": 42.96296296296296,
594
+ "eval_accuracy": 0.7495029821073559,
595
+ "eval_loss": 0.5249876379966736,
596
+ "eval_runtime": 2.8985,
597
+ "eval_samples_per_second": 173.54,
598
+ "eval_steps_per_second": 5.52,
599
+ "step": 290
600
+ },
601
+ {
602
+ "epoch": 44.0,
603
+ "eval_accuracy": 0.7654075546719682,
604
+ "eval_loss": 0.4747855067253113,
605
+ "eval_runtime": 2.9057,
606
+ "eval_samples_per_second": 173.108,
607
+ "eval_steps_per_second": 5.506,
608
+ "step": 297
609
+ },
610
+ {
611
+ "epoch": 44.44444444444444,
612
+ "grad_norm": 19.76637840270996,
613
+ "learning_rate": 9.259259259259259e-06,
614
+ "loss": 0.2724,
615
+ "step": 300
616
  },
617
  {
618
+ "epoch": 44.888888888888886,
619
+ "eval_accuracy": 0.7833001988071571,
620
+ "eval_loss": 0.44288724660873413,
621
+ "eval_runtime": 2.7865,
622
+ "eval_samples_per_second": 180.51,
623
+ "eval_steps_per_second": 5.742,
624
+ "step": 303
625
+ },
626
+ {
627
+ "epoch": 45.925925925925924,
628
+ "grad_norm": 12.390064239501953,
629
+ "learning_rate": 7.71604938271605e-06,
630
+ "loss": 0.2498,
631
+ "step": 310
632
+ },
633
+ {
634
+ "epoch": 45.925925925925924,
635
+ "eval_accuracy": 0.7892644135188867,
636
+ "eval_loss": 0.4459961950778961,
637
+ "eval_runtime": 2.8017,
638
+ "eval_samples_per_second": 179.537,
639
+ "eval_steps_per_second": 5.711,
640
+ "step": 310
641
+ },
642
+ {
643
+ "epoch": 46.96296296296296,
644
+ "eval_accuracy": 0.7793240556660039,
645
+ "eval_loss": 0.4721997082233429,
646
+ "eval_runtime": 2.8302,
647
+ "eval_samples_per_second": 177.729,
648
+ "eval_steps_per_second": 5.653,
649
+ "step": 317
650
+ },
651
+ {
652
+ "epoch": 47.407407407407405,
653
+ "grad_norm": 11.559773445129395,
654
+ "learning_rate": 6.172839506172839e-06,
655
+ "loss": 0.2893,
656
+ "step": 320
657
+ },
658
+ {
659
+ "epoch": 48.0,
660
+ "eval_accuracy": 0.7713717693836978,
661
+ "eval_loss": 0.47993555665016174,
662
+ "eval_runtime": 2.9229,
663
+ "eval_samples_per_second": 172.087,
664
+ "eval_steps_per_second": 5.474,
665
+ "step": 324
666
+ },
667
+ {
668
+ "epoch": 48.888888888888886,
669
+ "grad_norm": 11.171250343322754,
670
+ "learning_rate": 4.6296296296296296e-06,
671
+ "loss": 0.2618,
672
+ "step": 330
673
+ },
674
+ {
675
+ "epoch": 48.888888888888886,
676
+ "eval_accuracy": 0.7713717693836978,
677
+ "eval_loss": 0.4849596321582794,
678
+ "eval_runtime": 2.8546,
679
+ "eval_samples_per_second": 176.209,
680
+ "eval_steps_per_second": 5.605,
681
+ "step": 330
682
+ },
683
+ {
684
+ "epoch": 49.925925925925924,
685
+ "eval_accuracy": 0.7495029821073559,
686
+ "eval_loss": 0.5151545405387878,
687
+ "eval_runtime": 2.8154,
688
+ "eval_samples_per_second": 178.658,
689
+ "eval_steps_per_second": 5.683,
690
+ "step": 337
691
+ },
692
+ {
693
+ "epoch": 50.370370370370374,
694
+ "grad_norm": 16.287992477416992,
695
+ "learning_rate": 3.0864197530864196e-06,
696
+ "loss": 0.2664,
697
+ "step": 340
698
+ },
699
+ {
700
+ "epoch": 50.96296296296296,
701
+ "eval_accuracy": 0.7395626242544732,
702
+ "eval_loss": 0.5347036123275757,
703
+ "eval_runtime": 2.8326,
704
+ "eval_samples_per_second": 177.574,
705
+ "eval_steps_per_second": 5.648,
706
+ "step": 344
707
+ },
708
+ {
709
+ "epoch": 51.851851851851855,
710
+ "grad_norm": 15.289400100708008,
711
+ "learning_rate": 1.5432098765432098e-06,
712
+ "loss": 0.27,
713
+ "step": 350
714
+ },
715
+ {
716
+ "epoch": 52.0,
717
+ "eval_accuracy": 0.7415506958250497,
718
+ "eval_loss": 0.5342876315116882,
719
+ "eval_runtime": 2.8365,
720
+ "eval_samples_per_second": 177.329,
721
+ "eval_steps_per_second": 5.641,
722
+ "step": 351
723
+ },
724
+ {
725
+ "epoch": 52.888888888888886,
726
+ "eval_accuracy": 0.7415506958250497,
727
+ "eval_loss": 0.5330411195755005,
728
+ "eval_runtime": 2.823,
729
+ "eval_samples_per_second": 178.181,
730
+ "eval_steps_per_second": 5.668,
731
+ "step": 357
732
+ },
733
+ {
734
+ "epoch": 53.333333333333336,
735
+ "grad_norm": 15.15584945678711,
736
+ "learning_rate": 0.0,
737
+ "loss": 0.2539,
738
+ "step": 360
739
+ },
740
+ {
741
+ "epoch": 53.333333333333336,
742
+ "eval_accuracy": 0.7395626242544732,
743
+ "eval_loss": 0.5319550037384033,
744
+ "eval_runtime": 2.8191,
745
+ "eval_samples_per_second": 178.427,
746
+ "eval_steps_per_second": 5.676,
747
+ "step": 360
748
+ },
749
+ {
750
+ "epoch": 53.333333333333336,
751
+ "step": 360,
752
+ "total_flos": 1.140094502803243e+18,
753
+ "train_loss": 0.34168325927522447,
754
+ "train_runtime": 709.0525,
755
+ "train_samples_per_second": 72.773,
756
+ "train_steps_per_second": 0.508
757
  }
758
  ],
759
  "logging_steps": 10,
760
+ "max_steps": 360,
761
  "num_input_tokens_seen": 0,
762
+ "num_train_epochs": 60,
763
  "save_steps": 500,
764
  "stateful_callbacks": {
765
  "TrainerControl": {
 
773
  "attributes": {}
774
  }
775
  },
776
+ "total_flos": 1.140094502803243e+18,
777
  "train_batch_size": 32,
778
  "trial_name": null,
779
  "trial_params": null