Gokulapriyan commited on
Commit
31ba05f
·
1 Parent(s): 379aa41

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9557870124349027,
4
- "eval_loss": 0.12056268006563187,
5
- "eval_runtime": 116.9691,
6
- "eval_samples_per_second": 80.44,
7
- "eval_steps_per_second": 2.522,
8
- "total_flos": 6.312630587402281e+18,
9
- "train_loss": 0.39632425417036826,
10
- "train_runtime": 5574.9078,
11
- "train_samples_per_second": 45.565,
12
- "train_steps_per_second": 0.356
13
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9741915422885572,
4
+ "eval_loss": 0.06838314980268478,
5
+ "eval_runtime": 131.8368,
6
+ "eval_samples_per_second": 73.181,
7
+ "eval_steps_per_second": 2.291,
8
+ "total_flos": 8.391154825876193e+18,
9
+ "train_loss": 0.33317647269826234,
10
+ "train_runtime": 8035.3313,
11
+ "train_samples_per_second": 42.02,
12
+ "train_steps_per_second": 0.328
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9557870124349027,
4
- "eval_loss": 0.12056268006563187,
5
- "eval_runtime": 116.9691,
6
- "eval_samples_per_second": 80.44,
7
- "eval_steps_per_second": 2.522
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9741915422885572,
4
+ "eval_loss": 0.06838314980268478,
5
+ "eval_runtime": 131.8368,
6
+ "eval_samples_per_second": 73.181,
7
+ "eval_steps_per_second": 2.291
8
  }
runs/Feb07_08-02-43_dcad63cb3363/events.out.tfevents.1675765217.dcad63cb3363.606.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd69521417bb4f8013807a4d9ed2212a4137fc9296960c4de72c518445b26ef1
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 6.312630587402281e+18,
4
- "train_loss": 0.39632425417036826,
5
- "train_runtime": 5574.9078,
6
- "train_samples_per_second": 45.565,
7
- "train_steps_per_second": 0.356
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 8.391154825876193e+18,
4
+ "train_loss": 0.33317647269826234,
5
+ "train_runtime": 8035.3313,
6
+ "train_samples_per_second": 42.02,
7
+ "train_steps_per_second": 0.328
8
  }
trainer_state.json CHANGED
@@ -1,1240 +1,1648 @@
1
  {
2
- "best_metric": 0.9557870124349027,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1983",
4
- "epoch": 2.9988666414809217,
5
- "global_step": 1983,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.02,
12
- "learning_rate": 2.512562814070352e-06,
13
- "loss": 1.3842,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.03,
18
- "learning_rate": 5.025125628140704e-06,
19
- "loss": 1.3045,
20
  "step": 20
21
  },
22
  {
23
- "epoch": 0.05,
24
- "learning_rate": 7.537688442211055e-06,
25
- "loss": 1.1787,
26
  "step": 30
27
  },
28
  {
29
- "epoch": 0.06,
30
- "learning_rate": 1.0050251256281408e-05,
31
- "loss": 1.02,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 0.08,
36
- "learning_rate": 1.2562814070351759e-05,
37
- "loss": 0.9295,
38
  "step": 50
39
  },
40
  {
41
- "epoch": 0.09,
42
- "learning_rate": 1.507537688442211e-05,
43
- "loss": 0.837,
44
  "step": 60
45
  },
46
  {
47
- "epoch": 0.11,
48
- "learning_rate": 1.7587939698492464e-05,
49
- "loss": 0.8061,
50
  "step": 70
51
  },
52
  {
53
- "epoch": 0.12,
54
- "learning_rate": 2.0100502512562815e-05,
55
- "loss": 0.8085,
56
  "step": 80
57
  },
58
  {
59
- "epoch": 0.14,
60
- "learning_rate": 2.2613065326633167e-05,
61
- "loss": 0.7833,
62
  "step": 90
63
  },
64
  {
65
- "epoch": 0.15,
66
- "learning_rate": 2.5125628140703518e-05,
67
- "loss": 0.7737,
68
  "step": 100
69
  },
70
  {
71
- "epoch": 0.17,
72
- "learning_rate": 2.763819095477387e-05,
73
- "loss": 0.7031,
74
  "step": 110
75
  },
76
  {
77
- "epoch": 0.18,
78
- "learning_rate": 3.015075376884422e-05,
79
- "loss": 0.7153,
80
  "step": 120
81
  },
82
  {
83
- "epoch": 0.2,
84
- "learning_rate": 3.2663316582914576e-05,
85
- "loss": 0.7036,
86
  "step": 130
87
  },
88
  {
89
- "epoch": 0.21,
90
- "learning_rate": 3.517587939698493e-05,
91
- "loss": 0.7027,
92
  "step": 140
93
  },
94
  {
95
- "epoch": 0.23,
96
- "learning_rate": 3.768844221105528e-05,
97
- "loss": 0.7046,
98
  "step": 150
99
  },
100
  {
101
- "epoch": 0.24,
102
- "learning_rate": 4.020100502512563e-05,
103
- "loss": 0.6977,
104
  "step": 160
105
  },
106
  {
107
- "epoch": 0.26,
108
- "learning_rate": 4.271356783919598e-05,
109
- "loss": 0.6629,
110
  "step": 170
111
  },
112
  {
113
- "epoch": 0.27,
114
- "learning_rate": 4.522613065326633e-05,
115
- "loss": 0.7026,
116
  "step": 180
117
  },
118
  {
119
- "epoch": 0.29,
120
- "learning_rate": 4.7738693467336685e-05,
121
- "loss": 0.6673,
122
  "step": 190
123
  },
124
  {
125
- "epoch": 0.3,
126
- "learning_rate": 4.997197309417041e-05,
127
- "loss": 0.6554,
128
  "step": 200
129
  },
130
  {
131
- "epoch": 0.32,
132
- "learning_rate": 4.969170403587444e-05,
133
- "loss": 0.6548,
134
  "step": 210
135
  },
136
  {
137
- "epoch": 0.33,
138
- "learning_rate": 4.941143497757848e-05,
139
- "loss": 0.6225,
140
  "step": 220
141
  },
142
  {
143
- "epoch": 0.35,
144
- "learning_rate": 4.913116591928251e-05,
145
- "loss": 0.6582,
146
  "step": 230
147
  },
148
  {
149
- "epoch": 0.36,
150
- "learning_rate": 4.885089686098655e-05,
151
- "loss": 0.6132,
152
  "step": 240
153
  },
154
  {
155
- "epoch": 0.38,
156
- "learning_rate": 4.857062780269058e-05,
157
- "loss": 0.583,
158
  "step": 250
159
  },
160
  {
161
- "epoch": 0.39,
162
- "learning_rate": 4.829035874439462e-05,
163
- "loss": 0.6279,
164
  "step": 260
165
  },
166
  {
167
- "epoch": 0.41,
168
- "learning_rate": 4.801008968609866e-05,
169
- "loss": 0.6178,
170
  "step": 270
171
  },
172
  {
173
- "epoch": 0.42,
174
- "learning_rate": 4.7729820627802694e-05,
175
- "loss": 0.5601,
176
  "step": 280
177
  },
178
  {
179
- "epoch": 0.44,
180
- "learning_rate": 4.744955156950673e-05,
181
- "loss": 0.5939,
182
  "step": 290
183
  },
184
  {
185
- "epoch": 0.45,
186
- "learning_rate": 4.7169282511210764e-05,
187
- "loss": 0.6023,
188
  "step": 300
189
  },
190
  {
191
- "epoch": 0.47,
192
- "learning_rate": 4.68890134529148e-05,
193
- "loss": 0.5932,
194
  "step": 310
195
  },
196
  {
197
- "epoch": 0.48,
198
- "learning_rate": 4.6608744394618834e-05,
199
- "loss": 0.5353,
200
  "step": 320
201
  },
202
  {
203
- "epoch": 0.5,
204
- "learning_rate": 4.6328475336322875e-05,
205
- "loss": 0.5472,
206
  "step": 330
207
  },
208
  {
209
- "epoch": 0.51,
210
- "learning_rate": 4.6048206278026903e-05,
211
- "loss": 0.5445,
212
  "step": 340
213
  },
214
  {
215
- "epoch": 0.53,
216
- "learning_rate": 4.5767937219730945e-05,
217
- "loss": 0.5398,
218
  "step": 350
219
  },
220
  {
221
- "epoch": 0.54,
222
- "learning_rate": 4.548766816143498e-05,
223
- "loss": 0.5054,
224
  "step": 360
225
  },
226
  {
227
- "epoch": 0.56,
228
- "learning_rate": 4.5207399103139015e-05,
229
- "loss": 0.5082,
230
  "step": 370
231
  },
232
  {
233
- "epoch": 0.57,
234
- "learning_rate": 4.492713004484305e-05,
235
- "loss": 0.5801,
236
  "step": 380
237
  },
238
  {
239
- "epoch": 0.59,
240
- "learning_rate": 4.464686098654709e-05,
241
- "loss": 0.548,
242
  "step": 390
243
  },
244
  {
245
- "epoch": 0.6,
246
- "learning_rate": 4.436659192825112e-05,
247
- "loss": 0.5703,
248
  "step": 400
249
  },
250
  {
251
- "epoch": 0.62,
252
- "learning_rate": 4.408632286995516e-05,
253
- "loss": 0.507,
254
  "step": 410
255
  },
256
  {
257
- "epoch": 0.63,
258
- "learning_rate": 4.380605381165919e-05,
259
- "loss": 0.505,
260
  "step": 420
261
  },
262
  {
263
- "epoch": 0.65,
264
- "learning_rate": 4.352578475336323e-05,
265
- "loss": 0.4939,
266
  "step": 430
267
  },
268
  {
269
- "epoch": 0.66,
270
- "learning_rate": 4.3245515695067267e-05,
271
- "loss": 0.4916,
272
  "step": 440
273
  },
274
  {
275
- "epoch": 0.68,
276
- "learning_rate": 4.29652466367713e-05,
277
- "loss": 0.4722,
278
  "step": 450
279
  },
280
  {
281
- "epoch": 0.7,
282
- "learning_rate": 4.2684977578475336e-05,
283
- "loss": 0.4934,
284
  "step": 460
285
  },
286
  {
287
- "epoch": 0.71,
288
- "learning_rate": 4.240470852017938e-05,
289
- "loss": 0.4524,
290
  "step": 470
291
  },
292
  {
293
- "epoch": 0.73,
294
- "learning_rate": 4.2124439461883406e-05,
295
- "loss": 0.483,
296
  "step": 480
297
  },
298
  {
299
- "epoch": 0.74,
300
- "learning_rate": 4.184417040358745e-05,
301
- "loss": 0.4941,
302
  "step": 490
303
  },
304
  {
305
- "epoch": 0.76,
306
- "learning_rate": 4.156390134529148e-05,
307
- "loss": 0.4584,
308
  "step": 500
309
  },
310
  {
311
- "epoch": 0.77,
312
- "learning_rate": 4.128363228699552e-05,
313
- "loss": 0.4782,
314
  "step": 510
315
  },
316
  {
317
- "epoch": 0.79,
318
- "learning_rate": 4.100336322869955e-05,
319
- "loss": 0.4642,
320
  "step": 520
321
  },
322
  {
323
- "epoch": 0.8,
324
- "learning_rate": 4.0723094170403595e-05,
325
- "loss": 0.4527,
 
 
 
 
 
 
 
 
 
326
  "step": 530
327
  },
328
  {
329
- "epoch": 0.82,
330
- "learning_rate": 4.044282511210762e-05,
331
- "loss": 0.4402,
332
  "step": 540
333
  },
334
  {
335
- "epoch": 0.83,
336
- "learning_rate": 4.0162556053811665e-05,
337
- "loss": 0.4271,
338
  "step": 550
339
  },
340
  {
341
- "epoch": 0.85,
342
- "learning_rate": 3.98822869955157e-05,
343
- "loss": 0.4439,
344
  "step": 560
345
  },
346
  {
347
- "epoch": 0.86,
348
- "learning_rate": 3.9602017937219735e-05,
349
- "loss": 0.455,
350
  "step": 570
351
  },
352
  {
353
- "epoch": 0.88,
354
- "learning_rate": 3.932174887892377e-05,
355
- "loss": 0.4369,
356
  "step": 580
357
  },
358
  {
359
- "epoch": 0.89,
360
- "learning_rate": 3.9041479820627804e-05,
361
- "loss": 0.4563,
362
  "step": 590
363
  },
364
  {
365
- "epoch": 0.91,
366
- "learning_rate": 3.876121076233184e-05,
367
- "loss": 0.4441,
368
  "step": 600
369
  },
370
  {
371
- "epoch": 0.92,
372
- "learning_rate": 3.8480941704035874e-05,
373
- "loss": 0.4234,
374
  "step": 610
375
  },
376
  {
377
- "epoch": 0.94,
378
- "learning_rate": 3.820067264573991e-05,
379
- "loss": 0.4257,
380
  "step": 620
381
  },
382
  {
383
- "epoch": 0.95,
384
- "learning_rate": 3.792040358744395e-05,
385
- "loss": 0.3985,
386
  "step": 630
387
  },
388
  {
389
- "epoch": 0.97,
390
- "learning_rate": 3.7640134529147986e-05,
391
- "loss": 0.3935,
392
  "step": 640
393
  },
394
  {
395
- "epoch": 0.98,
396
- "learning_rate": 3.735986547085202e-05,
397
- "loss": 0.4099,
398
  "step": 650
399
  },
400
  {
401
- "epoch": 1.0,
402
- "learning_rate": 3.7079596412556056e-05,
403
- "loss": 0.4052,
404
  "step": 660
405
  },
406
  {
407
- "epoch": 1.0,
408
- "eval_accuracy": 0.8906366245084494,
409
- "eval_loss": 0.2878882884979248,
410
- "eval_runtime": 123.5087,
411
- "eval_samples_per_second": 76.181,
412
- "eval_steps_per_second": 2.388,
413
- "step": 661
414
- },
415
- {
416
- "epoch": 1.01,
417
- "learning_rate": 3.679932735426009e-05,
418
- "loss": 0.4176,
419
  "step": 670
420
  },
421
  {
422
- "epoch": 1.03,
423
- "learning_rate": 3.6519058295964126e-05,
424
- "loss": 0.3807,
425
  "step": 680
426
  },
427
  {
428
- "epoch": 1.04,
429
- "learning_rate": 3.623878923766816e-05,
430
- "loss": 0.3781,
431
  "step": 690
432
  },
433
  {
434
- "epoch": 1.06,
435
- "learning_rate": 3.59585201793722e-05,
436
- "loss": 0.4276,
437
  "step": 700
438
  },
439
  {
440
- "epoch": 1.07,
441
- "learning_rate": 3.567825112107623e-05,
442
- "loss": 0.4053,
443
  "step": 710
444
  },
445
  {
446
- "epoch": 1.09,
447
- "learning_rate": 3.539798206278027e-05,
448
- "loss": 0.3546,
449
  "step": 720
450
  },
451
  {
452
- "epoch": 1.1,
453
- "learning_rate": 3.51177130044843e-05,
454
- "loss": 0.3792,
455
  "step": 730
456
  },
457
  {
458
- "epoch": 1.12,
459
- "learning_rate": 3.483744394618834e-05,
460
- "loss": 0.3942,
461
  "step": 740
462
  },
463
  {
464
- "epoch": 1.13,
465
- "learning_rate": 3.455717488789238e-05,
466
- "loss": 0.409,
467
  "step": 750
468
  },
469
  {
470
- "epoch": 1.15,
471
- "learning_rate": 3.427690582959641e-05,
472
- "loss": 0.359,
473
  "step": 760
474
  },
475
  {
476
- "epoch": 1.16,
477
- "learning_rate": 3.399663677130045e-05,
478
- "loss": 0.4147,
479
  "step": 770
480
  },
481
  {
482
- "epoch": 1.18,
483
- "learning_rate": 3.371636771300449e-05,
484
- "loss": 0.3849,
485
  "step": 780
486
  },
487
  {
488
- "epoch": 1.19,
489
- "learning_rate": 3.343609865470852e-05,
490
- "loss": 0.3619,
491
  "step": 790
492
  },
493
  {
494
- "epoch": 1.21,
495
- "learning_rate": 3.315582959641256e-05,
496
- "loss": 0.3841,
497
  "step": 800
498
  },
499
  {
500
- "epoch": 1.23,
501
- "learning_rate": 3.2875560538116594e-05,
502
- "loss": 0.3437,
503
  "step": 810
504
  },
505
  {
506
- "epoch": 1.24,
507
- "learning_rate": 3.259529147982063e-05,
508
- "loss": 0.3413,
509
  "step": 820
510
  },
511
  {
512
- "epoch": 1.26,
513
- "learning_rate": 3.2315022421524664e-05,
514
- "loss": 0.3605,
515
  "step": 830
516
  },
517
  {
518
- "epoch": 1.27,
519
- "learning_rate": 3.2034753363228705e-05,
520
- "loss": 0.3406,
521
  "step": 840
522
  },
523
  {
524
- "epoch": 1.29,
525
- "learning_rate": 3.1754484304932734e-05,
526
- "loss": 0.376,
527
  "step": 850
528
  },
529
  {
530
- "epoch": 1.3,
531
- "learning_rate": 3.1474215246636775e-05,
532
- "loss": 0.329,
533
  "step": 860
534
  },
535
  {
536
- "epoch": 1.32,
537
- "learning_rate": 3.119394618834081e-05,
538
- "loss": 0.3673,
539
  "step": 870
540
  },
541
  {
542
- "epoch": 1.33,
543
- "learning_rate": 3.0913677130044845e-05,
544
- "loss": 0.3527,
545
  "step": 880
546
  },
547
  {
548
- "epoch": 1.35,
549
- "learning_rate": 3.063340807174888e-05,
550
- "loss": 0.3198,
551
  "step": 890
552
  },
553
  {
554
- "epoch": 1.36,
555
- "learning_rate": 3.0353139013452915e-05,
556
- "loss": 0.3233,
557
  "step": 900
558
  },
559
  {
560
- "epoch": 1.38,
561
- "learning_rate": 3.007286995515695e-05,
562
- "loss": 0.3703,
563
  "step": 910
564
  },
565
  {
566
- "epoch": 1.39,
567
- "learning_rate": 2.979260089686099e-05,
568
- "loss": 0.3353,
569
  "step": 920
570
  },
571
  {
572
- "epoch": 1.41,
573
- "learning_rate": 2.951233183856502e-05,
574
- "loss": 0.3367,
575
  "step": 930
576
  },
577
  {
578
- "epoch": 1.42,
579
- "learning_rate": 2.923206278026906e-05,
580
- "loss": 0.3346,
581
  "step": 940
582
  },
583
  {
584
- "epoch": 1.44,
585
- "learning_rate": 2.8951793721973097e-05,
586
- "loss": 0.3155,
587
  "step": 950
588
  },
589
  {
590
- "epoch": 1.45,
591
- "learning_rate": 2.867152466367713e-05,
592
- "loss": 0.3248,
593
  "step": 960
594
  },
595
  {
596
- "epoch": 1.47,
597
- "learning_rate": 2.8391255605381167e-05,
598
- "loss": 0.3529,
599
  "step": 970
600
  },
601
  {
602
- "epoch": 1.48,
603
- "learning_rate": 2.8110986547085205e-05,
604
- "loss": 0.3273,
605
  "step": 980
606
  },
607
  {
608
- "epoch": 1.5,
609
- "learning_rate": 2.7830717488789237e-05,
610
- "loss": 0.3517,
611
  "step": 990
612
  },
613
  {
614
- "epoch": 1.51,
615
- "learning_rate": 2.7550448430493275e-05,
616
- "loss": 0.3532,
617
  "step": 1000
618
  },
619
  {
620
- "epoch": 1.53,
621
- "learning_rate": 2.7270179372197313e-05,
622
- "loss": 0.3549,
623
  "step": 1010
624
  },
625
  {
626
- "epoch": 1.54,
627
- "learning_rate": 2.6989910313901345e-05,
628
- "loss": 0.3091,
629
  "step": 1020
630
  },
631
  {
632
- "epoch": 1.56,
633
- "learning_rate": 2.6709641255605383e-05,
634
- "loss": 0.3171,
635
  "step": 1030
636
  },
637
  {
638
- "epoch": 1.57,
639
- "learning_rate": 2.642937219730942e-05,
640
- "loss": 0.3219,
641
  "step": 1040
642
  },
643
  {
644
- "epoch": 1.59,
645
- "learning_rate": 2.6149103139013453e-05,
646
- "loss": 0.3309,
647
  "step": 1050
648
  },
649
  {
650
- "epoch": 1.6,
651
- "learning_rate": 2.586883408071749e-05,
652
- "loss": 0.3166,
 
 
 
 
 
 
 
 
 
653
  "step": 1060
654
  },
655
  {
656
- "epoch": 1.62,
657
- "learning_rate": 2.5588565022421523e-05,
658
- "loss": 0.298,
659
  "step": 1070
660
  },
661
  {
662
- "epoch": 1.63,
663
- "learning_rate": 2.530829596412556e-05,
664
- "loss": 0.3025,
665
  "step": 1080
666
  },
667
  {
668
- "epoch": 1.65,
669
- "learning_rate": 2.50280269058296e-05,
670
- "loss": 0.3075,
671
  "step": 1090
672
  },
673
  {
674
- "epoch": 1.66,
675
- "learning_rate": 2.4747757847533635e-05,
676
- "loss": 0.3047,
677
  "step": 1100
678
  },
679
  {
680
- "epoch": 1.68,
681
- "learning_rate": 2.446748878923767e-05,
682
- "loss": 0.3483,
683
  "step": 1110
684
  },
685
  {
686
- "epoch": 1.69,
687
- "learning_rate": 2.4187219730941705e-05,
688
- "loss": 0.288,
689
  "step": 1120
690
  },
691
  {
692
- "epoch": 1.71,
693
- "learning_rate": 2.3906950672645743e-05,
694
- "loss": 0.3153,
695
  "step": 1130
696
  },
697
  {
698
- "epoch": 1.72,
699
- "learning_rate": 2.3626681614349778e-05,
700
- "loss": 0.3133,
701
  "step": 1140
702
  },
703
  {
704
- "epoch": 1.74,
705
- "learning_rate": 2.3346412556053813e-05,
706
- "loss": 0.314,
707
  "step": 1150
708
  },
709
  {
710
- "epoch": 1.75,
711
- "learning_rate": 2.306614349775785e-05,
712
- "loss": 0.2799,
713
  "step": 1160
714
  },
715
  {
716
- "epoch": 1.77,
717
- "learning_rate": 2.2785874439461886e-05,
718
- "loss": 0.2767,
719
  "step": 1170
720
  },
721
  {
722
- "epoch": 1.78,
723
- "learning_rate": 2.250560538116592e-05,
724
- "loss": 0.3038,
725
  "step": 1180
726
  },
727
  {
728
- "epoch": 1.8,
729
- "learning_rate": 2.2225336322869956e-05,
730
- "loss": 0.2795,
731
  "step": 1190
732
  },
733
  {
734
- "epoch": 1.81,
735
- "learning_rate": 2.1945067264573994e-05,
736
- "loss": 0.3159,
737
  "step": 1200
738
  },
739
  {
740
- "epoch": 1.83,
741
- "learning_rate": 2.166479820627803e-05,
742
- "loss": 0.3275,
743
  "step": 1210
744
  },
745
  {
746
- "epoch": 1.84,
747
- "learning_rate": 2.1384529147982064e-05,
748
- "loss": 0.2889,
749
  "step": 1220
750
  },
751
  {
752
- "epoch": 1.86,
753
- "learning_rate": 2.11042600896861e-05,
754
- "loss": 0.2949,
755
  "step": 1230
756
  },
757
  {
758
- "epoch": 1.87,
759
- "learning_rate": 2.0823991031390138e-05,
760
- "loss": 0.3073,
761
  "step": 1240
762
  },
763
  {
764
- "epoch": 1.89,
765
- "learning_rate": 2.0543721973094173e-05,
766
- "loss": 0.285,
767
  "step": 1250
768
  },
769
  {
770
- "epoch": 1.91,
771
- "learning_rate": 2.0263452914798208e-05,
772
- "loss": 0.3138,
773
  "step": 1260
774
  },
775
  {
776
- "epoch": 1.92,
777
- "learning_rate": 1.9983183856502243e-05,
778
- "loss": 0.2894,
779
  "step": 1270
780
  },
781
  {
782
- "epoch": 1.94,
783
- "learning_rate": 1.9702914798206277e-05,
784
- "loss": 0.2711,
785
  "step": 1280
786
  },
787
  {
788
- "epoch": 1.95,
789
- "learning_rate": 1.9422645739910312e-05,
790
- "loss": 0.289,
791
  "step": 1290
792
  },
793
  {
794
- "epoch": 1.97,
795
- "learning_rate": 1.914237668161435e-05,
796
- "loss": 0.2621,
797
  "step": 1300
798
  },
799
  {
800
- "epoch": 1.98,
801
- "learning_rate": 1.8862107623318386e-05,
802
- "loss": 0.2872,
803
  "step": 1310
804
  },
805
  {
806
- "epoch": 2.0,
807
- "learning_rate": 1.858183856502242e-05,
808
- "loss": 0.3079,
809
  "step": 1320
810
  },
811
  {
812
- "epoch": 2.0,
813
- "eval_accuracy": 0.936762674035498,
814
- "eval_loss": 0.15844394266605377,
815
- "eval_runtime": 121.8002,
816
- "eval_samples_per_second": 77.249,
817
- "eval_steps_per_second": 2.422,
818
- "step": 1322
819
- },
820
- {
821
- "epoch": 2.01,
822
- "learning_rate": 1.8301569506726456e-05,
823
- "loss": 0.3002,
824
  "step": 1330
825
  },
826
  {
827
- "epoch": 2.03,
828
- "learning_rate": 1.8021300448430494e-05,
829
- "loss": 0.2138,
830
  "step": 1340
831
  },
832
  {
833
- "epoch": 2.04,
834
- "learning_rate": 1.774103139013453e-05,
835
- "loss": 0.29,
836
  "step": 1350
837
  },
838
  {
839
- "epoch": 2.06,
840
- "learning_rate": 1.7460762331838564e-05,
841
- "loss": 0.2843,
842
  "step": 1360
843
  },
844
  {
845
- "epoch": 2.07,
846
- "learning_rate": 1.7180493273542602e-05,
847
- "loss": 0.275,
848
  "step": 1370
849
  },
850
  {
851
- "epoch": 2.09,
852
- "learning_rate": 1.6900224215246637e-05,
853
- "loss": 0.2545,
854
  "step": 1380
855
  },
856
  {
857
- "epoch": 2.1,
858
- "learning_rate": 1.6619955156950672e-05,
859
- "loss": 0.262,
860
  "step": 1390
861
  },
862
  {
863
- "epoch": 2.12,
864
- "learning_rate": 1.633968609865471e-05,
865
- "loss": 0.2746,
866
  "step": 1400
867
  },
868
  {
869
- "epoch": 2.13,
870
- "learning_rate": 1.6059417040358745e-05,
871
- "loss": 0.2595,
872
  "step": 1410
873
  },
874
  {
875
- "epoch": 2.15,
876
- "learning_rate": 1.577914798206278e-05,
877
- "loss": 0.2648,
878
  "step": 1420
879
  },
880
  {
881
- "epoch": 2.16,
882
- "learning_rate": 1.5498878923766815e-05,
883
- "loss": 0.2705,
884
  "step": 1430
885
  },
886
  {
887
- "epoch": 2.18,
888
- "learning_rate": 1.5218609865470854e-05,
889
- "loss": 0.2653,
890
  "step": 1440
891
  },
892
  {
893
- "epoch": 2.19,
894
- "learning_rate": 1.4938340807174889e-05,
895
- "loss": 0.2706,
896
  "step": 1450
897
  },
898
  {
899
- "epoch": 2.21,
900
- "learning_rate": 1.4658071748878924e-05,
901
- "loss": 0.2658,
902
  "step": 1460
903
  },
904
  {
905
- "epoch": 2.22,
906
- "learning_rate": 1.4377802690582962e-05,
907
- "loss": 0.277,
908
  "step": 1470
909
  },
910
  {
911
- "epoch": 2.24,
912
- "learning_rate": 1.4097533632286997e-05,
913
- "loss": 0.2391,
914
  "step": 1480
915
  },
916
  {
917
- "epoch": 2.25,
918
- "learning_rate": 1.3817264573991032e-05,
919
- "loss": 0.2576,
920
  "step": 1490
921
  },
922
  {
923
- "epoch": 2.27,
924
- "learning_rate": 1.3536995515695067e-05,
925
- "loss": 0.278,
926
  "step": 1500
927
  },
928
  {
929
- "epoch": 2.28,
930
- "learning_rate": 1.3256726457399105e-05,
931
- "loss": 0.2247,
932
  "step": 1510
933
  },
934
  {
935
- "epoch": 2.3,
936
- "learning_rate": 1.297645739910314e-05,
937
- "loss": 0.2638,
938
  "step": 1520
939
  },
940
  {
941
- "epoch": 2.31,
942
- "learning_rate": 1.2696188340807175e-05,
943
- "loss": 0.2399,
944
  "step": 1530
945
  },
946
  {
947
- "epoch": 2.33,
948
- "learning_rate": 1.2415919282511212e-05,
949
- "loss": 0.2306,
950
  "step": 1540
951
  },
952
  {
953
- "epoch": 2.34,
954
- "learning_rate": 1.2135650224215247e-05,
955
- "loss": 0.263,
956
  "step": 1550
957
  },
958
  {
959
- "epoch": 2.36,
960
- "learning_rate": 1.1855381165919283e-05,
961
- "loss": 0.2624,
962
  "step": 1560
963
  },
964
  {
965
- "epoch": 2.37,
966
- "learning_rate": 1.1575112107623318e-05,
967
- "loss": 0.2533,
968
  "step": 1570
969
  },
970
  {
971
- "epoch": 2.39,
972
- "learning_rate": 1.1294843049327355e-05,
973
- "loss": 0.2431,
974
  "step": 1580
975
  },
976
  {
977
- "epoch": 2.4,
978
- "learning_rate": 1.101457399103139e-05,
979
- "loss": 0.252,
 
 
 
 
 
 
 
 
 
980
  "step": 1590
981
  },
982
  {
983
- "epoch": 2.42,
984
- "learning_rate": 1.0734304932735427e-05,
985
- "loss": 0.2275,
986
  "step": 1600
987
  },
988
  {
989
- "epoch": 2.44,
990
- "learning_rate": 1.0454035874439462e-05,
991
- "loss": 0.2553,
992
  "step": 1610
993
  },
994
  {
995
- "epoch": 2.45,
996
- "learning_rate": 1.0173766816143498e-05,
997
- "loss": 0.2388,
998
  "step": 1620
999
  },
1000
  {
1001
- "epoch": 2.47,
1002
- "learning_rate": 9.893497757847533e-06,
1003
- "loss": 0.2367,
1004
  "step": 1630
1005
  },
1006
  {
1007
- "epoch": 2.48,
1008
- "learning_rate": 9.61322869955157e-06,
1009
- "loss": 0.2495,
1010
  "step": 1640
1011
  },
1012
  {
1013
- "epoch": 2.5,
1014
- "learning_rate": 9.332959641255606e-06,
1015
- "loss": 0.2574,
1016
  "step": 1650
1017
  },
1018
  {
1019
- "epoch": 2.51,
1020
- "learning_rate": 9.052690582959641e-06,
1021
- "loss": 0.2425,
1022
  "step": 1660
1023
  },
1024
  {
1025
- "epoch": 2.53,
1026
- "learning_rate": 8.772421524663678e-06,
1027
- "loss": 0.2636,
1028
  "step": 1670
1029
  },
1030
  {
1031
- "epoch": 2.54,
1032
- "learning_rate": 8.492152466367713e-06,
1033
- "loss": 0.23,
1034
  "step": 1680
1035
  },
1036
  {
1037
- "epoch": 2.56,
1038
- "learning_rate": 8.21188340807175e-06,
1039
- "loss": 0.253,
1040
  "step": 1690
1041
  },
1042
  {
1043
- "epoch": 2.57,
1044
- "learning_rate": 7.931614349775786e-06,
1045
- "loss": 0.2397,
1046
  "step": 1700
1047
  },
1048
  {
1049
- "epoch": 2.59,
1050
- "learning_rate": 7.651345291479821e-06,
1051
- "loss": 0.2319,
1052
  "step": 1710
1053
  },
1054
  {
1055
- "epoch": 2.6,
1056
- "learning_rate": 7.371076233183857e-06,
1057
- "loss": 0.217,
1058
  "step": 1720
1059
  },
1060
  {
1061
- "epoch": 2.62,
1062
- "learning_rate": 7.090807174887892e-06,
1063
- "loss": 0.275,
1064
  "step": 1730
1065
  },
1066
  {
1067
- "epoch": 2.63,
1068
- "learning_rate": 6.810538116591929e-06,
1069
- "loss": 0.2507,
1070
  "step": 1740
1071
  },
1072
  {
1073
- "epoch": 2.65,
1074
- "learning_rate": 6.530269058295964e-06,
1075
  "loss": 0.2409,
1076
  "step": 1750
1077
  },
1078
  {
1079
- "epoch": 2.66,
1080
- "learning_rate": 6.25e-06,
1081
- "loss": 0.2405,
1082
  "step": 1760
1083
  },
1084
  {
1085
- "epoch": 2.68,
1086
- "learning_rate": 5.969730941704036e-06,
1087
- "loss": 0.2353,
1088
  "step": 1770
1089
  },
1090
  {
1091
- "epoch": 2.69,
1092
- "learning_rate": 5.689461883408072e-06,
1093
- "loss": 0.2283,
1094
  "step": 1780
1095
  },
1096
  {
1097
- "epoch": 2.71,
1098
- "learning_rate": 5.409192825112108e-06,
1099
- "loss": 0.2595,
1100
  "step": 1790
1101
  },
1102
  {
1103
- "epoch": 2.72,
1104
- "learning_rate": 5.128923766816144e-06,
1105
- "loss": 0.2345,
1106
  "step": 1800
1107
  },
1108
  {
1109
- "epoch": 2.74,
1110
- "learning_rate": 4.848654708520179e-06,
1111
- "loss": 0.2261,
1112
  "step": 1810
1113
  },
1114
  {
1115
- "epoch": 2.75,
1116
- "learning_rate": 4.568385650224215e-06,
1117
- "loss": 0.233,
1118
  "step": 1820
1119
  },
1120
  {
1121
- "epoch": 2.77,
1122
- "learning_rate": 4.288116591928251e-06,
1123
- "loss": 0.2184,
1124
  "step": 1830
1125
  },
1126
  {
1127
- "epoch": 2.78,
1128
- "learning_rate": 4.007847533632287e-06,
1129
- "loss": 0.247,
1130
  "step": 1840
1131
  },
1132
  {
1133
- "epoch": 2.8,
1134
- "learning_rate": 3.7275784753363225e-06,
1135
- "loss": 0.2328,
1136
  "step": 1850
1137
  },
1138
  {
1139
- "epoch": 2.81,
1140
- "learning_rate": 3.447309417040359e-06,
1141
- "loss": 0.2115,
1142
  "step": 1860
1143
  },
1144
  {
1145
- "epoch": 2.83,
1146
- "learning_rate": 3.167040358744395e-06,
1147
- "loss": 0.2455,
1148
  "step": 1870
1149
  },
1150
  {
1151
- "epoch": 2.84,
1152
- "learning_rate": 2.886771300448431e-06,
1153
- "loss": 0.2406,
1154
  "step": 1880
1155
  },
1156
  {
1157
- "epoch": 2.86,
1158
- "learning_rate": 2.606502242152466e-06,
1159
- "loss": 0.2438,
1160
  "step": 1890
1161
  },
1162
  {
1163
- "epoch": 2.87,
1164
- "learning_rate": 2.3262331838565024e-06,
1165
- "loss": 0.2534,
1166
  "step": 1900
1167
  },
1168
  {
1169
- "epoch": 2.89,
1170
- "learning_rate": 2.045964125560538e-06,
1171
- "loss": 0.2053,
1172
  "step": 1910
1173
  },
1174
  {
1175
- "epoch": 2.9,
1176
- "learning_rate": 1.765695067264574e-06,
1177
- "loss": 0.241,
1178
  "step": 1920
1179
  },
1180
  {
1181
- "epoch": 2.92,
1182
- "learning_rate": 1.4854260089686098e-06,
1183
- "loss": 0.2144,
1184
  "step": 1930
1185
  },
1186
  {
1187
- "epoch": 2.93,
1188
- "learning_rate": 1.2051569506726458e-06,
1189
- "loss": 0.2587,
1190
  "step": 1940
1191
  },
1192
  {
1193
- "epoch": 2.95,
1194
- "learning_rate": 9.248878923766817e-07,
1195
- "loss": 0.2551,
1196
  "step": 1950
1197
  },
1198
  {
1199
- "epoch": 2.96,
1200
- "learning_rate": 6.446188340807175e-07,
1201
- "loss": 0.2279,
1202
  "step": 1960
1203
  },
1204
  {
1205
- "epoch": 2.98,
1206
- "learning_rate": 3.643497757847534e-07,
1207
- "loss": 0.2128,
1208
  "step": 1970
1209
  },
1210
  {
1211
- "epoch": 2.99,
1212
- "learning_rate": 8.408071748878925e-08,
1213
- "loss": 0.2444,
1214
  "step": 1980
1215
  },
1216
  {
1217
- "epoch": 3.0,
1218
- "eval_accuracy": 0.9557870124349027,
1219
- "eval_loss": 0.12056268006563187,
1220
- "eval_runtime": 124.8088,
1221
- "eval_samples_per_second": 75.387,
1222
- "eval_steps_per_second": 2.364,
1223
- "step": 1983
1224
  },
1225
  {
1226
- "epoch": 3.0,
1227
- "step": 1983,
1228
- "total_flos": 6.312630587402281e+18,
1229
- "train_loss": 0.39632425417036826,
1230
- "train_runtime": 5574.9078,
1231
- "train_samples_per_second": 45.565,
1232
- "train_steps_per_second": 0.356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1233
  }
1234
  ],
1235
- "max_steps": 1983,
1236
- "num_train_epochs": 3,
1237
- "total_flos": 6.312630587402281e+18,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
 
1
  {
2
+ "best_metric": 0.9741915422885572,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-2635",
4
+ "epoch": 4.99857887257224,
5
+ "global_step": 2635,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.02,
12
+ "learning_rate": 1.8939393939393941e-06,
13
+ "loss": 1.4619,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.04,
18
+ "learning_rate": 3.7878787878787882e-06,
19
+ "loss": 1.3888,
20
  "step": 20
21
  },
22
  {
23
+ "epoch": 0.06,
24
+ "learning_rate": 5.681818181818182e-06,
25
+ "loss": 1.2545,
26
  "step": 30
27
  },
28
  {
29
+ "epoch": 0.08,
30
+ "learning_rate": 7.5757575757575764e-06,
31
+ "loss": 1.1428,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 0.09,
36
+ "learning_rate": 9.46969696969697e-06,
37
+ "loss": 0.9846,
38
  "step": 50
39
  },
40
  {
41
+ "epoch": 0.11,
42
+ "learning_rate": 1.1363636363636365e-05,
43
+ "loss": 0.8566,
44
  "step": 60
45
  },
46
  {
47
+ "epoch": 0.13,
48
+ "learning_rate": 1.3257575757575758e-05,
49
+ "loss": 0.8184,
50
  "step": 70
51
  },
52
  {
53
+ "epoch": 0.15,
54
+ "learning_rate": 1.5151515151515153e-05,
55
+ "loss": 0.8055,
56
  "step": 80
57
  },
58
  {
59
+ "epoch": 0.17,
60
+ "learning_rate": 1.7045454545454546e-05,
61
+ "loss": 0.7846,
62
  "step": 90
63
  },
64
  {
65
+ "epoch": 0.19,
66
+ "learning_rate": 1.893939393939394e-05,
67
+ "loss": 0.7953,
68
  "step": 100
69
  },
70
  {
71
+ "epoch": 0.21,
72
+ "learning_rate": 2.0833333333333336e-05,
73
+ "loss": 0.7205,
74
  "step": 110
75
  },
76
  {
77
+ "epoch": 0.23,
78
+ "learning_rate": 2.272727272727273e-05,
79
+ "loss": 0.699,
80
  "step": 120
81
  },
82
  {
83
+ "epoch": 0.25,
84
+ "learning_rate": 2.4621212121212123e-05,
85
+ "loss": 0.698,
86
  "step": 130
87
  },
88
  {
89
+ "epoch": 0.27,
90
+ "learning_rate": 2.6515151515151516e-05,
91
+ "loss": 0.7095,
92
  "step": 140
93
  },
94
  {
95
+ "epoch": 0.28,
96
+ "learning_rate": 2.8409090909090912e-05,
97
+ "loss": 0.6753,
98
  "step": 150
99
  },
100
  {
101
+ "epoch": 0.3,
102
+ "learning_rate": 3.0303030303030306e-05,
103
+ "loss": 0.6655,
104
  "step": 160
105
  },
106
  {
107
+ "epoch": 0.32,
108
+ "learning_rate": 3.2196969696969696e-05,
109
+ "loss": 0.6625,
110
  "step": 170
111
  },
112
  {
113
+ "epoch": 0.34,
114
+ "learning_rate": 3.409090909090909e-05,
115
+ "loss": 0.6881,
116
  "step": 180
117
  },
118
  {
119
+ "epoch": 0.36,
120
+ "learning_rate": 3.598484848484849e-05,
121
+ "loss": 0.6698,
122
  "step": 190
123
  },
124
  {
125
+ "epoch": 0.38,
126
+ "learning_rate": 3.787878787878788e-05,
127
+ "loss": 0.6548,
128
  "step": 200
129
  },
130
  {
131
+ "epoch": 0.4,
132
+ "learning_rate": 3.9772727272727275e-05,
133
+ "loss": 0.6373,
134
  "step": 210
135
  },
136
  {
137
+ "epoch": 0.42,
138
+ "learning_rate": 4.166666666666667e-05,
139
+ "loss": 0.6437,
140
  "step": 220
141
  },
142
  {
143
+ "epoch": 0.44,
144
+ "learning_rate": 4.356060606060606e-05,
145
+ "loss": 0.6308,
146
  "step": 230
147
  },
148
  {
149
+ "epoch": 0.45,
150
+ "learning_rate": 4.545454545454546e-05,
151
+ "loss": 0.6119,
152
  "step": 240
153
  },
154
  {
155
+ "epoch": 0.47,
156
+ "learning_rate": 4.7348484848484855e-05,
157
+ "loss": 0.5998,
158
  "step": 250
159
  },
160
  {
161
+ "epoch": 0.49,
162
+ "learning_rate": 4.9242424242424245e-05,
163
+ "loss": 0.6188,
164
  "step": 260
165
  },
166
  {
167
+ "epoch": 0.51,
168
+ "learning_rate": 4.987347110923661e-05,
169
+ "loss": 0.5837,
170
  "step": 270
171
  },
172
  {
173
+ "epoch": 0.53,
174
+ "learning_rate": 4.966258962463096e-05,
175
+ "loss": 0.585,
176
  "step": 280
177
  },
178
  {
179
+ "epoch": 0.55,
180
+ "learning_rate": 4.945170814002531e-05,
181
+ "loss": 0.6001,
182
  "step": 290
183
  },
184
  {
185
+ "epoch": 0.57,
186
+ "learning_rate": 4.924082665541966e-05,
187
+ "loss": 0.5664,
188
  "step": 300
189
  },
190
  {
191
+ "epoch": 0.59,
192
+ "learning_rate": 4.9029945170814005e-05,
193
+ "loss": 0.5603,
194
  "step": 310
195
  },
196
  {
197
+ "epoch": 0.61,
198
+ "learning_rate": 4.8819063686208354e-05,
199
+ "loss": 0.601,
200
  "step": 320
201
  },
202
  {
203
+ "epoch": 0.63,
204
+ "learning_rate": 4.86081822016027e-05,
205
+ "loss": 0.5489,
206
  "step": 330
207
  },
208
  {
209
+ "epoch": 0.64,
210
+ "learning_rate": 4.839730071699705e-05,
211
+ "loss": 0.5525,
212
  "step": 340
213
  },
214
  {
215
+ "epoch": 0.66,
216
+ "learning_rate": 4.81864192323914e-05,
217
+ "loss": 0.5451,
218
  "step": 350
219
  },
220
  {
221
+ "epoch": 0.68,
222
+ "learning_rate": 4.797553774778575e-05,
223
+ "loss": 0.5515,
224
  "step": 360
225
  },
226
  {
227
+ "epoch": 0.7,
228
+ "learning_rate": 4.7764656263180096e-05,
229
+ "loss": 0.4817,
230
  "step": 370
231
  },
232
  {
233
+ "epoch": 0.72,
234
+ "learning_rate": 4.7553774778574444e-05,
235
+ "loss": 0.5398,
236
  "step": 380
237
  },
238
  {
239
+ "epoch": 0.74,
240
+ "learning_rate": 4.734289329396879e-05,
241
+ "loss": 0.4987,
242
  "step": 390
243
  },
244
  {
245
+ "epoch": 0.76,
246
+ "learning_rate": 4.713201180936314e-05,
247
+ "loss": 0.5202,
248
  "step": 400
249
  },
250
  {
251
+ "epoch": 0.78,
252
+ "learning_rate": 4.692113032475749e-05,
253
+ "loss": 0.5143,
254
  "step": 410
255
  },
256
  {
257
+ "epoch": 0.8,
258
+ "learning_rate": 4.671024884015184e-05,
259
+ "loss": 0.4946,
260
  "step": 420
261
  },
262
  {
263
+ "epoch": 0.81,
264
+ "learning_rate": 4.6499367355546186e-05,
265
+ "loss": 0.5009,
266
  "step": 430
267
  },
268
  {
269
+ "epoch": 0.83,
270
+ "learning_rate": 4.6288485870940535e-05,
271
+ "loss": 0.5015,
272
  "step": 440
273
  },
274
  {
275
+ "epoch": 0.85,
276
+ "learning_rate": 4.607760438633488e-05,
277
+ "loss": 0.4837,
278
  "step": 450
279
  },
280
  {
281
+ "epoch": 0.87,
282
+ "learning_rate": 4.586672290172923e-05,
283
+ "loss": 0.4814,
284
  "step": 460
285
  },
286
  {
287
+ "epoch": 0.89,
288
+ "learning_rate": 4.565584141712358e-05,
289
+ "loss": 0.4827,
290
  "step": 470
291
  },
292
  {
293
+ "epoch": 0.91,
294
+ "learning_rate": 4.544495993251793e-05,
295
+ "loss": 0.5304,
296
  "step": 480
297
  },
298
  {
299
+ "epoch": 0.93,
300
+ "learning_rate": 4.523407844791228e-05,
301
+ "loss": 0.4592,
302
  "step": 490
303
  },
304
  {
305
+ "epoch": 0.95,
306
+ "learning_rate": 4.5023196963306626e-05,
307
+ "loss": 0.4759,
308
  "step": 500
309
  },
310
  {
311
+ "epoch": 0.97,
312
+ "learning_rate": 4.4812315478700974e-05,
313
+ "loss": 0.4431,
314
  "step": 510
315
  },
316
  {
317
+ "epoch": 0.99,
318
+ "learning_rate": 4.460143399409532e-05,
319
+ "loss": 0.4378,
320
  "step": 520
321
  },
322
  {
323
+ "epoch": 1.0,
324
+ "eval_accuracy": 0.8507462686567164,
325
+ "eval_loss": 0.36681434512138367,
326
+ "eval_runtime": 136.5295,
327
+ "eval_samples_per_second": 70.666,
328
+ "eval_steps_per_second": 2.212,
329
+ "step": 527
330
+ },
331
+ {
332
+ "epoch": 1.01,
333
+ "learning_rate": 4.439055250948967e-05,
334
+ "loss": 0.486,
335
  "step": 530
336
  },
337
  {
338
+ "epoch": 1.02,
339
+ "learning_rate": 4.417967102488402e-05,
340
+ "loss": 0.4533,
341
  "step": 540
342
  },
343
  {
344
+ "epoch": 1.04,
345
+ "learning_rate": 4.396878954027837e-05,
346
+ "loss": 0.4119,
347
  "step": 550
348
  },
349
  {
350
+ "epoch": 1.06,
351
+ "learning_rate": 4.3757908055672716e-05,
352
+ "loss": 0.4279,
353
  "step": 560
354
  },
355
  {
356
+ "epoch": 1.08,
357
+ "learning_rate": 4.3547026571067065e-05,
358
+ "loss": 0.4389,
359
  "step": 570
360
  },
361
  {
362
+ "epoch": 1.1,
363
+ "learning_rate": 4.333614508646141e-05,
364
+ "loss": 0.4344,
365
  "step": 580
366
  },
367
  {
368
+ "epoch": 1.12,
369
+ "learning_rate": 4.312526360185576e-05,
370
+ "loss": 0.3977,
371
  "step": 590
372
  },
373
  {
374
+ "epoch": 1.14,
375
+ "learning_rate": 4.291438211725011e-05,
376
+ "loss": 0.413,
377
  "step": 600
378
  },
379
  {
380
+ "epoch": 1.16,
381
+ "learning_rate": 4.270350063264446e-05,
382
+ "loss": 0.4302,
383
  "step": 610
384
  },
385
  {
386
+ "epoch": 1.18,
387
+ "learning_rate": 4.249261914803881e-05,
388
+ "loss": 0.4229,
389
  "step": 620
390
  },
391
  {
392
+ "epoch": 1.2,
393
+ "learning_rate": 4.2281737663433155e-05,
394
+ "loss": 0.4015,
395
  "step": 630
396
  },
397
  {
398
+ "epoch": 1.21,
399
+ "learning_rate": 4.20708561788275e-05,
400
+ "loss": 0.3929,
401
  "step": 640
402
  },
403
  {
404
+ "epoch": 1.23,
405
+ "learning_rate": 4.1859974694221845e-05,
406
+ "loss": 0.4133,
407
  "step": 650
408
  },
409
  {
410
+ "epoch": 1.25,
411
+ "learning_rate": 4.1649093209616194e-05,
412
+ "loss": 0.384,
413
  "step": 660
414
  },
415
  {
416
+ "epoch": 1.27,
417
+ "learning_rate": 4.143821172501054e-05,
418
+ "loss": 0.4043,
 
 
 
 
 
 
 
 
 
419
  "step": 670
420
  },
421
  {
422
+ "epoch": 1.29,
423
+ "learning_rate": 4.122733024040489e-05,
424
+ "loss": 0.3696,
425
  "step": 680
426
  },
427
  {
428
+ "epoch": 1.31,
429
+ "learning_rate": 4.101644875579924e-05,
430
+ "loss": 0.3942,
431
  "step": 690
432
  },
433
  {
434
+ "epoch": 1.33,
435
+ "learning_rate": 4.080556727119359e-05,
436
+ "loss": 0.3915,
437
  "step": 700
438
  },
439
  {
440
+ "epoch": 1.35,
441
+ "learning_rate": 4.0594685786587936e-05,
442
+ "loss": 0.3431,
443
  "step": 710
444
  },
445
  {
446
+ "epoch": 1.37,
447
+ "learning_rate": 4.0383804301982284e-05,
448
+ "loss": 0.352,
449
  "step": 720
450
  },
451
  {
452
+ "epoch": 1.38,
453
+ "learning_rate": 4.017292281737663e-05,
454
+ "loss": 0.3876,
455
  "step": 730
456
  },
457
  {
458
+ "epoch": 1.4,
459
+ "learning_rate": 3.996204133277098e-05,
460
+ "loss": 0.3589,
461
  "step": 740
462
  },
463
  {
464
+ "epoch": 1.42,
465
+ "learning_rate": 3.975115984816533e-05,
466
+ "loss": 0.3519,
467
  "step": 750
468
  },
469
  {
470
+ "epoch": 1.44,
471
+ "learning_rate": 3.954027836355968e-05,
472
+ "loss": 0.3908,
473
  "step": 760
474
  },
475
  {
476
+ "epoch": 1.46,
477
+ "learning_rate": 3.9329396878954027e-05,
478
+ "loss": 0.3677,
479
  "step": 770
480
  },
481
  {
482
+ "epoch": 1.48,
483
+ "learning_rate": 3.9118515394348375e-05,
484
+ "loss": 0.3549,
485
  "step": 780
486
  },
487
  {
488
+ "epoch": 1.5,
489
+ "learning_rate": 3.8907633909742723e-05,
490
+ "loss": 0.3453,
491
  "step": 790
492
  },
493
  {
494
+ "epoch": 1.52,
495
+ "learning_rate": 3.869675242513707e-05,
496
+ "loss": 0.3366,
497
  "step": 800
498
  },
499
  {
500
+ "epoch": 1.54,
501
+ "learning_rate": 3.848587094053142e-05,
502
+ "loss": 0.3553,
503
  "step": 810
504
  },
505
  {
506
+ "epoch": 1.56,
507
+ "learning_rate": 3.827498945592577e-05,
508
+ "loss": 0.3224,
509
  "step": 820
510
  },
511
  {
512
+ "epoch": 1.57,
513
+ "learning_rate": 3.806410797132012e-05,
514
+ "loss": 0.345,
515
  "step": 830
516
  },
517
  {
518
+ "epoch": 1.59,
519
+ "learning_rate": 3.7853226486714466e-05,
520
+ "loss": 0.3298,
521
  "step": 840
522
  },
523
  {
524
+ "epoch": 1.61,
525
+ "learning_rate": 3.7642345002108814e-05,
526
+ "loss": 0.3417,
527
  "step": 850
528
  },
529
  {
530
+ "epoch": 1.63,
531
+ "learning_rate": 3.743146351750316e-05,
532
+ "loss": 0.3832,
533
  "step": 860
534
  },
535
  {
536
+ "epoch": 1.65,
537
+ "learning_rate": 3.722058203289751e-05,
538
+ "loss": 0.3343,
539
  "step": 870
540
  },
541
  {
542
+ "epoch": 1.67,
543
+ "learning_rate": 3.700970054829186e-05,
544
+ "loss": 0.3112,
545
  "step": 880
546
  },
547
  {
548
+ "epoch": 1.69,
549
+ "learning_rate": 3.679881906368621e-05,
550
+ "loss": 0.3355,
551
  "step": 890
552
  },
553
  {
554
+ "epoch": 1.71,
555
+ "learning_rate": 3.658793757908056e-05,
556
+ "loss": 0.3223,
557
  "step": 900
558
  },
559
  {
560
+ "epoch": 1.73,
561
+ "learning_rate": 3.637705609447491e-05,
562
+ "loss": 0.3229,
563
  "step": 910
564
  },
565
  {
566
+ "epoch": 1.74,
567
+ "learning_rate": 3.616617460986926e-05,
568
+ "loss": 0.3395,
569
  "step": 920
570
  },
571
  {
572
+ "epoch": 1.76,
573
+ "learning_rate": 3.595529312526361e-05,
574
+ "loss": 0.3092,
575
  "step": 930
576
  },
577
  {
578
+ "epoch": 1.78,
579
+ "learning_rate": 3.574441164065796e-05,
580
+ "loss": 0.3006,
581
  "step": 940
582
  },
583
  {
584
+ "epoch": 1.8,
585
+ "learning_rate": 3.5533530156052305e-05,
586
+ "loss": 0.3557,
587
  "step": 950
588
  },
589
  {
590
+ "epoch": 1.82,
591
+ "learning_rate": 3.5322648671446654e-05,
592
+ "loss": 0.325,
593
  "step": 960
594
  },
595
  {
596
+ "epoch": 1.84,
597
+ "learning_rate": 3.5111767186841e-05,
598
+ "loss": 0.3039,
599
  "step": 970
600
  },
601
  {
602
+ "epoch": 1.86,
603
+ "learning_rate": 3.490088570223535e-05,
604
+ "loss": 0.3506,
605
  "step": 980
606
  },
607
  {
608
+ "epoch": 1.88,
609
+ "learning_rate": 3.46900042176297e-05,
610
+ "loss": 0.3268,
611
  "step": 990
612
  },
613
  {
614
+ "epoch": 1.9,
615
+ "learning_rate": 3.447912273302405e-05,
616
+ "loss": 0.3007,
617
  "step": 1000
618
  },
619
  {
620
+ "epoch": 1.92,
621
+ "learning_rate": 3.426824124841839e-05,
622
+ "loss": 0.3202,
623
  "step": 1010
624
  },
625
  {
626
+ "epoch": 1.93,
627
+ "learning_rate": 3.405735976381274e-05,
628
+ "loss": 0.3118,
629
  "step": 1020
630
  },
631
  {
632
+ "epoch": 1.95,
633
+ "learning_rate": 3.3846478279207086e-05,
634
+ "loss": 0.3278,
635
  "step": 1030
636
  },
637
  {
638
+ "epoch": 1.97,
639
+ "learning_rate": 3.3635596794601434e-05,
640
+ "loss": 0.292,
641
  "step": 1040
642
  },
643
  {
644
+ "epoch": 1.99,
645
+ "learning_rate": 3.342471530999578e-05,
646
+ "loss": 0.3133,
647
  "step": 1050
648
  },
649
  {
650
+ "epoch": 2.0,
651
+ "eval_accuracy": 0.9429933665008292,
652
+ "eval_loss": 0.15862515568733215,
653
+ "eval_runtime": 141.8451,
654
+ "eval_samples_per_second": 68.018,
655
+ "eval_steps_per_second": 2.129,
656
+ "step": 1054
657
+ },
658
+ {
659
+ "epoch": 2.01,
660
+ "learning_rate": 3.321383382539013e-05,
661
+ "loss": 0.3435,
662
  "step": 1060
663
  },
664
  {
665
+ "epoch": 2.03,
666
+ "learning_rate": 3.300295234078448e-05,
667
+ "loss": 0.31,
668
  "step": 1070
669
  },
670
  {
671
+ "epoch": 2.05,
672
+ "learning_rate": 3.279207085617883e-05,
673
+ "loss": 0.3035,
674
  "step": 1080
675
  },
676
  {
677
+ "epoch": 2.07,
678
+ "learning_rate": 3.2581189371573177e-05,
679
+ "loss": 0.3138,
680
  "step": 1090
681
  },
682
  {
683
+ "epoch": 2.09,
684
+ "learning_rate": 3.2370307886967525e-05,
685
+ "loss": 0.309,
686
  "step": 1100
687
  },
688
  {
689
+ "epoch": 2.11,
690
+ "learning_rate": 3.2159426402361873e-05,
691
+ "loss": 0.3034,
692
  "step": 1110
693
  },
694
  {
695
+ "epoch": 2.13,
696
+ "learning_rate": 3.194854491775622e-05,
697
+ "loss": 0.2805,
698
  "step": 1120
699
  },
700
  {
701
+ "epoch": 2.14,
702
+ "learning_rate": 3.173766343315057e-05,
703
+ "loss": 0.2889,
704
  "step": 1130
705
  },
706
  {
707
+ "epoch": 2.16,
708
+ "learning_rate": 3.152678194854492e-05,
709
+ "loss": 0.2864,
710
  "step": 1140
711
  },
712
  {
713
+ "epoch": 2.18,
714
+ "learning_rate": 3.131590046393927e-05,
715
+ "loss": 0.2723,
716
  "step": 1150
717
  },
718
  {
719
+ "epoch": 2.2,
720
+ "learning_rate": 3.1105018979333616e-05,
721
+ "loss": 0.2763,
722
  "step": 1160
723
  },
724
  {
725
+ "epoch": 2.22,
726
+ "learning_rate": 3.0894137494727964e-05,
727
+ "loss": 0.2561,
728
  "step": 1170
729
  },
730
  {
731
+ "epoch": 2.24,
732
+ "learning_rate": 3.068325601012231e-05,
733
+ "loss": 0.2732,
734
  "step": 1180
735
  },
736
  {
737
+ "epoch": 2.26,
738
+ "learning_rate": 3.047237452551666e-05,
739
+ "loss": 0.2458,
740
  "step": 1190
741
  },
742
  {
743
+ "epoch": 2.28,
744
+ "learning_rate": 3.026149304091101e-05,
745
+ "loss": 0.2632,
746
  "step": 1200
747
  },
748
  {
749
+ "epoch": 2.3,
750
+ "learning_rate": 3.0050611556305358e-05,
751
+ "loss": 0.2889,
752
  "step": 1210
753
  },
754
  {
755
+ "epoch": 2.31,
756
+ "learning_rate": 2.9839730071699706e-05,
757
+ "loss": 0.2406,
758
  "step": 1220
759
  },
760
  {
761
+ "epoch": 2.33,
762
+ "learning_rate": 2.9628848587094055e-05,
763
+ "loss": 0.2566,
764
  "step": 1230
765
  },
766
  {
767
+ "epoch": 2.35,
768
+ "learning_rate": 2.9417967102488403e-05,
769
+ "loss": 0.2576,
770
  "step": 1240
771
  },
772
  {
773
+ "epoch": 2.37,
774
+ "learning_rate": 2.920708561788275e-05,
775
+ "loss": 0.268,
776
  "step": 1250
777
  },
778
  {
779
+ "epoch": 2.39,
780
+ "learning_rate": 2.89962041332771e-05,
781
+ "loss": 0.2556,
782
  "step": 1260
783
  },
784
  {
785
+ "epoch": 2.41,
786
+ "learning_rate": 2.878532264867145e-05,
787
+ "loss": 0.2577,
788
  "step": 1270
789
  },
790
  {
791
+ "epoch": 2.43,
792
+ "learning_rate": 2.8574441164065797e-05,
793
+ "loss": 0.2507,
794
  "step": 1280
795
  },
796
  {
797
+ "epoch": 2.45,
798
+ "learning_rate": 2.8363559679460145e-05,
799
+ "loss": 0.2271,
800
  "step": 1290
801
  },
802
  {
803
+ "epoch": 2.47,
804
+ "learning_rate": 2.8152678194854494e-05,
805
+ "loss": 0.2792,
806
  "step": 1300
807
  },
808
  {
809
+ "epoch": 2.49,
810
+ "learning_rate": 2.7941796710248842e-05,
811
+ "loss": 0.2755,
812
  "step": 1310
813
  },
814
  {
815
+ "epoch": 2.5,
816
+ "learning_rate": 2.773091522564319e-05,
817
+ "loss": 0.2456,
818
  "step": 1320
819
  },
820
  {
821
+ "epoch": 2.52,
822
+ "learning_rate": 2.752003374103754e-05,
823
+ "loss": 0.2463,
 
 
 
 
 
 
 
 
 
824
  "step": 1330
825
  },
826
  {
827
+ "epoch": 2.54,
828
+ "learning_rate": 2.7309152256431884e-05,
829
+ "loss": 0.2569,
830
  "step": 1340
831
  },
832
  {
833
+ "epoch": 2.56,
834
+ "learning_rate": 2.7098270771826233e-05,
835
+ "loss": 0.2609,
836
  "step": 1350
837
  },
838
  {
839
+ "epoch": 2.58,
840
+ "learning_rate": 2.688738928722058e-05,
841
+ "loss": 0.2437,
842
  "step": 1360
843
  },
844
  {
845
+ "epoch": 2.6,
846
+ "learning_rate": 2.667650780261493e-05,
847
+ "loss": 0.2491,
848
  "step": 1370
849
  },
850
  {
851
+ "epoch": 2.62,
852
+ "learning_rate": 2.6465626318009278e-05,
853
+ "loss": 0.1923,
854
  "step": 1380
855
  },
856
  {
857
+ "epoch": 2.64,
858
+ "learning_rate": 2.6254744833403626e-05,
859
+ "loss": 0.2435,
860
  "step": 1390
861
  },
862
  {
863
+ "epoch": 2.66,
864
+ "learning_rate": 2.6043863348797975e-05,
865
+ "loss": 0.2382,
866
  "step": 1400
867
  },
868
  {
869
+ "epoch": 2.67,
870
+ "learning_rate": 2.5832981864192323e-05,
871
+ "loss": 0.2316,
872
  "step": 1410
873
  },
874
  {
875
+ "epoch": 2.69,
876
+ "learning_rate": 2.562210037958667e-05,
877
+ "loss": 0.2294,
878
  "step": 1420
879
  },
880
  {
881
+ "epoch": 2.71,
882
+ "learning_rate": 2.541121889498102e-05,
883
+ "loss": 0.2187,
884
  "step": 1430
885
  },
886
  {
887
+ "epoch": 2.73,
888
+ "learning_rate": 2.520033741037537e-05,
889
+ "loss": 0.2595,
890
  "step": 1440
891
  },
892
  {
893
+ "epoch": 2.75,
894
+ "learning_rate": 2.498945592576972e-05,
895
+ "loss": 0.2401,
896
  "step": 1450
897
  },
898
  {
899
+ "epoch": 2.77,
900
+ "learning_rate": 2.477857444116407e-05,
901
+ "loss": 0.2344,
902
  "step": 1460
903
  },
904
  {
905
+ "epoch": 2.79,
906
+ "learning_rate": 2.4567692956558417e-05,
907
+ "loss": 0.2334,
908
  "step": 1470
909
  },
910
  {
911
+ "epoch": 2.81,
912
+ "learning_rate": 2.4356811471952766e-05,
913
+ "loss": 0.2697,
914
  "step": 1480
915
  },
916
  {
917
+ "epoch": 2.83,
918
+ "learning_rate": 2.4145929987347114e-05,
919
+ "loss": 0.2407,
920
  "step": 1490
921
  },
922
  {
923
+ "epoch": 2.85,
924
+ "learning_rate": 2.3935048502741463e-05,
925
+ "loss": 0.2434,
926
  "step": 1500
927
  },
928
  {
929
+ "epoch": 2.86,
930
+ "learning_rate": 2.372416701813581e-05,
931
+ "loss": 0.2781,
932
  "step": 1510
933
  },
934
  {
935
+ "epoch": 2.88,
936
+ "learning_rate": 2.351328553353016e-05,
937
+ "loss": 0.2377,
938
  "step": 1520
939
  },
940
  {
941
+ "epoch": 2.9,
942
+ "learning_rate": 2.3302404048924504e-05,
943
+ "loss": 0.2478,
944
  "step": 1530
945
  },
946
  {
947
+ "epoch": 2.92,
948
+ "learning_rate": 2.3091522564318853e-05,
949
+ "loss": 0.2558,
950
  "step": 1540
951
  },
952
  {
953
+ "epoch": 2.94,
954
+ "learning_rate": 2.28806410797132e-05,
955
+ "loss": 0.2378,
956
  "step": 1550
957
  },
958
  {
959
+ "epoch": 2.96,
960
+ "learning_rate": 2.266975959510755e-05,
961
+ "loss": 0.2303,
962
  "step": 1560
963
  },
964
  {
965
+ "epoch": 2.98,
966
+ "learning_rate": 2.2458878110501898e-05,
967
+ "loss": 0.271,
968
  "step": 1570
969
  },
970
  {
971
+ "epoch": 3.0,
972
+ "learning_rate": 2.2247996625896247e-05,
973
+ "loss": 0.2065,
974
  "step": 1580
975
  },
976
  {
977
+ "epoch": 3.0,
978
+ "eval_accuracy": 0.9607172470978441,
979
+ "eval_loss": 0.1048618033528328,
980
+ "eval_runtime": 131.6181,
981
+ "eval_samples_per_second": 73.303,
982
+ "eval_steps_per_second": 2.295,
983
+ "step": 1581
984
+ },
985
+ {
986
+ "epoch": 3.02,
987
+ "learning_rate": 2.2037115141290595e-05,
988
+ "loss": 0.211,
989
  "step": 1590
990
  },
991
  {
992
+ "epoch": 3.04,
993
+ "learning_rate": 2.1826233656684943e-05,
994
+ "loss": 0.2139,
995
  "step": 1600
996
  },
997
  {
998
+ "epoch": 3.05,
999
+ "learning_rate": 2.1615352172079292e-05,
1000
+ "loss": 0.226,
1001
  "step": 1610
1002
  },
1003
  {
1004
+ "epoch": 3.07,
1005
+ "learning_rate": 2.140447068747364e-05,
1006
+ "loss": 0.2295,
1007
  "step": 1620
1008
  },
1009
  {
1010
+ "epoch": 3.09,
1011
+ "learning_rate": 2.119358920286799e-05,
1012
+ "loss": 0.2284,
1013
  "step": 1630
1014
  },
1015
  {
1016
+ "epoch": 3.11,
1017
+ "learning_rate": 2.0982707718262337e-05,
1018
+ "loss": 0.2181,
1019
  "step": 1640
1020
  },
1021
  {
1022
+ "epoch": 3.13,
1023
+ "learning_rate": 2.0771826233656686e-05,
1024
+ "loss": 0.2174,
1025
  "step": 1650
1026
  },
1027
  {
1028
+ "epoch": 3.15,
1029
+ "learning_rate": 2.0560944749051034e-05,
1030
+ "loss": 0.2301,
1031
  "step": 1660
1032
  },
1033
  {
1034
+ "epoch": 3.17,
1035
+ "learning_rate": 2.0350063264445383e-05,
1036
+ "loss": 0.2078,
1037
  "step": 1670
1038
  },
1039
  {
1040
+ "epoch": 3.19,
1041
+ "learning_rate": 2.013918177983973e-05,
1042
+ "loss": 0.2338,
1043
  "step": 1680
1044
  },
1045
  {
1046
+ "epoch": 3.21,
1047
+ "learning_rate": 1.992830029523408e-05,
1048
+ "loss": 0.235,
1049
  "step": 1690
1050
  },
1051
  {
1052
+ "epoch": 3.23,
1053
+ "learning_rate": 1.9717418810628428e-05,
1054
+ "loss": 0.2152,
1055
  "step": 1700
1056
  },
1057
  {
1058
+ "epoch": 3.24,
1059
+ "learning_rate": 1.9506537326022776e-05,
1060
+ "loss": 0.2154,
1061
  "step": 1710
1062
  },
1063
  {
1064
+ "epoch": 3.26,
1065
+ "learning_rate": 1.9295655841417125e-05,
1066
+ "loss": 0.2146,
1067
  "step": 1720
1068
  },
1069
  {
1070
+ "epoch": 3.28,
1071
+ "learning_rate": 1.9084774356811473e-05,
1072
+ "loss": 0.2279,
1073
  "step": 1730
1074
  },
1075
  {
1076
+ "epoch": 3.3,
1077
+ "learning_rate": 1.887389287220582e-05,
1078
+ "loss": 0.2176,
1079
  "step": 1740
1080
  },
1081
  {
1082
+ "epoch": 3.32,
1083
+ "learning_rate": 1.8663011387600167e-05,
1084
  "loss": 0.2409,
1085
  "step": 1750
1086
  },
1087
  {
1088
+ "epoch": 3.34,
1089
+ "learning_rate": 1.8452129902994515e-05,
1090
+ "loss": 0.2294,
1091
  "step": 1760
1092
  },
1093
  {
1094
+ "epoch": 3.36,
1095
+ "learning_rate": 1.8241248418388867e-05,
1096
+ "loss": 0.2118,
1097
  "step": 1770
1098
  },
1099
  {
1100
+ "epoch": 3.38,
1101
+ "learning_rate": 1.8030366933783215e-05,
1102
+ "loss": 0.2141,
1103
  "step": 1780
1104
  },
1105
  {
1106
+ "epoch": 3.4,
1107
+ "learning_rate": 1.7819485449177564e-05,
1108
+ "loss": 0.1921,
1109
  "step": 1790
1110
  },
1111
  {
1112
+ "epoch": 3.41,
1113
+ "learning_rate": 1.7608603964571912e-05,
1114
+ "loss": 0.184,
1115
  "step": 1800
1116
  },
1117
  {
1118
+ "epoch": 3.43,
1119
+ "learning_rate": 1.739772247996626e-05,
1120
+ "loss": 0.2109,
1121
  "step": 1810
1122
  },
1123
  {
1124
+ "epoch": 3.45,
1125
+ "learning_rate": 1.718684099536061e-05,
1126
+ "loss": 0.2119,
1127
  "step": 1820
1128
  },
1129
  {
1130
+ "epoch": 3.47,
1131
+ "learning_rate": 1.6975959510754958e-05,
1132
+ "loss": 0.2294,
1133
  "step": 1830
1134
  },
1135
  {
1136
+ "epoch": 3.49,
1137
+ "learning_rate": 1.6765078026149306e-05,
1138
+ "loss": 0.234,
1139
  "step": 1840
1140
  },
1141
  {
1142
+ "epoch": 3.51,
1143
+ "learning_rate": 1.6554196541543654e-05,
1144
+ "loss": 0.1865,
1145
  "step": 1850
1146
  },
1147
  {
1148
+ "epoch": 3.53,
1149
+ "learning_rate": 1.6343315056938003e-05,
1150
+ "loss": 0.22,
1151
  "step": 1860
1152
  },
1153
  {
1154
+ "epoch": 3.55,
1155
+ "learning_rate": 1.613243357233235e-05,
1156
+ "loss": 0.2034,
1157
  "step": 1870
1158
  },
1159
  {
1160
+ "epoch": 3.57,
1161
+ "learning_rate": 1.59215520877267e-05,
1162
+ "loss": 0.2088,
1163
  "step": 1880
1164
  },
1165
  {
1166
+ "epoch": 3.59,
1167
+ "learning_rate": 1.5710670603121048e-05,
1168
+ "loss": 0.1922,
1169
  "step": 1890
1170
  },
1171
  {
1172
+ "epoch": 3.6,
1173
+ "learning_rate": 1.5499789118515397e-05,
1174
+ "loss": 0.2146,
1175
  "step": 1900
1176
  },
1177
  {
1178
+ "epoch": 3.62,
1179
+ "learning_rate": 1.5288907633909745e-05,
1180
+ "loss": 0.2198,
1181
  "step": 1910
1182
  },
1183
  {
1184
+ "epoch": 3.64,
1185
+ "learning_rate": 1.5078026149304092e-05,
1186
+ "loss": 0.2008,
1187
  "step": 1920
1188
  },
1189
  {
1190
+ "epoch": 3.66,
1191
+ "learning_rate": 1.486714466469844e-05,
1192
+ "loss": 0.204,
1193
  "step": 1930
1194
  },
1195
  {
1196
+ "epoch": 3.68,
1197
+ "learning_rate": 1.4656263180092789e-05,
1198
+ "loss": 0.2053,
1199
  "step": 1940
1200
  },
1201
  {
1202
+ "epoch": 3.7,
1203
+ "learning_rate": 1.4445381695487137e-05,
1204
+ "loss": 0.2194,
1205
  "step": 1950
1206
  },
1207
  {
1208
+ "epoch": 3.72,
1209
+ "learning_rate": 1.4234500210881486e-05,
1210
+ "loss": 0.1869,
1211
  "step": 1960
1212
  },
1213
  {
1214
+ "epoch": 3.74,
1215
+ "learning_rate": 1.4023618726275834e-05,
1216
+ "loss": 0.2008,
1217
  "step": 1970
1218
  },
1219
  {
1220
+ "epoch": 3.76,
1221
+ "learning_rate": 1.3812737241670182e-05,
1222
+ "loss": 0.2011,
1223
  "step": 1980
1224
  },
1225
  {
1226
+ "epoch": 3.77,
1227
+ "learning_rate": 1.360185575706453e-05,
1228
+ "loss": 0.1867,
1229
+ "step": 1990
 
 
 
1230
  },
1231
  {
1232
+ "epoch": 3.79,
1233
+ "learning_rate": 1.3390974272458878e-05,
1234
+ "loss": 0.1892,
1235
+ "step": 2000
1236
+ },
1237
+ {
1238
+ "epoch": 3.81,
1239
+ "learning_rate": 1.3180092787853226e-05,
1240
+ "loss": 0.1646,
1241
+ "step": 2010
1242
+ },
1243
+ {
1244
+ "epoch": 3.83,
1245
+ "learning_rate": 1.2969211303247574e-05,
1246
+ "loss": 0.2096,
1247
+ "step": 2020
1248
+ },
1249
+ {
1250
+ "epoch": 3.85,
1251
+ "learning_rate": 1.2758329818641923e-05,
1252
+ "loss": 0.1834,
1253
+ "step": 2030
1254
+ },
1255
+ {
1256
+ "epoch": 3.87,
1257
+ "learning_rate": 1.2547448334036271e-05,
1258
+ "loss": 0.1909,
1259
+ "step": 2040
1260
+ },
1261
+ {
1262
+ "epoch": 3.89,
1263
+ "learning_rate": 1.2336566849430621e-05,
1264
+ "loss": 0.1867,
1265
+ "step": 2050
1266
+ },
1267
+ {
1268
+ "epoch": 3.91,
1269
+ "learning_rate": 1.212568536482497e-05,
1270
+ "loss": 0.197,
1271
+ "step": 2060
1272
+ },
1273
+ {
1274
+ "epoch": 3.93,
1275
+ "learning_rate": 1.1914803880219318e-05,
1276
+ "loss": 0.2021,
1277
+ "step": 2070
1278
+ },
1279
+ {
1280
+ "epoch": 3.95,
1281
+ "learning_rate": 1.1703922395613665e-05,
1282
+ "loss": 0.1906,
1283
+ "step": 2080
1284
+ },
1285
+ {
1286
+ "epoch": 3.96,
1287
+ "learning_rate": 1.1493040911008014e-05,
1288
+ "loss": 0.1851,
1289
+ "step": 2090
1290
+ },
1291
+ {
1292
+ "epoch": 3.98,
1293
+ "learning_rate": 1.1282159426402362e-05,
1294
+ "loss": 0.22,
1295
+ "step": 2100
1296
+ },
1297
+ {
1298
+ "epoch": 4.0,
1299
+ "eval_accuracy": 0.9701492537313433,
1300
+ "eval_loss": 0.08384203910827637,
1301
+ "eval_runtime": 131.303,
1302
+ "eval_samples_per_second": 73.479,
1303
+ "eval_steps_per_second": 2.3,
1304
+ "step": 2108
1305
+ },
1306
+ {
1307
+ "epoch": 4.0,
1308
+ "learning_rate": 1.107127794179671e-05,
1309
+ "loss": 0.2109,
1310
+ "step": 2110
1311
+ },
1312
+ {
1313
+ "epoch": 4.02,
1314
+ "learning_rate": 1.0860396457191059e-05,
1315
+ "loss": 0.1934,
1316
+ "step": 2120
1317
+ },
1318
+ {
1319
+ "epoch": 4.04,
1320
+ "learning_rate": 1.0649514972585407e-05,
1321
+ "loss": 0.1984,
1322
+ "step": 2130
1323
+ },
1324
+ {
1325
+ "epoch": 4.06,
1326
+ "learning_rate": 1.0438633487979756e-05,
1327
+ "loss": 0.2011,
1328
+ "step": 2140
1329
+ },
1330
+ {
1331
+ "epoch": 4.08,
1332
+ "learning_rate": 1.0227752003374104e-05,
1333
+ "loss": 0.1784,
1334
+ "step": 2150
1335
+ },
1336
+ {
1337
+ "epoch": 4.1,
1338
+ "learning_rate": 1.0016870518768453e-05,
1339
+ "loss": 0.2025,
1340
+ "step": 2160
1341
+ },
1342
+ {
1343
+ "epoch": 4.12,
1344
+ "learning_rate": 9.805989034162801e-06,
1345
+ "loss": 0.1766,
1346
+ "step": 2170
1347
+ },
1348
+ {
1349
+ "epoch": 4.14,
1350
+ "learning_rate": 9.59510754955715e-06,
1351
+ "loss": 0.1866,
1352
+ "step": 2180
1353
+ },
1354
+ {
1355
+ "epoch": 4.16,
1356
+ "learning_rate": 9.384226064951498e-06,
1357
+ "loss": 0.1985,
1358
+ "step": 2190
1359
+ },
1360
+ {
1361
+ "epoch": 4.17,
1362
+ "learning_rate": 9.173344580345846e-06,
1363
+ "loss": 0.174,
1364
+ "step": 2200
1365
+ },
1366
+ {
1367
+ "epoch": 4.19,
1368
+ "learning_rate": 8.962463095740195e-06,
1369
+ "loss": 0.2045,
1370
+ "step": 2210
1371
+ },
1372
+ {
1373
+ "epoch": 4.21,
1374
+ "learning_rate": 8.751581611134543e-06,
1375
+ "loss": 0.1847,
1376
+ "step": 2220
1377
+ },
1378
+ {
1379
+ "epoch": 4.23,
1380
+ "learning_rate": 8.540700126528892e-06,
1381
+ "loss": 0.1785,
1382
+ "step": 2230
1383
+ },
1384
+ {
1385
+ "epoch": 4.25,
1386
+ "learning_rate": 8.32981864192324e-06,
1387
+ "loss": 0.1421,
1388
+ "step": 2240
1389
+ },
1390
+ {
1391
+ "epoch": 4.27,
1392
+ "learning_rate": 8.118937157317589e-06,
1393
+ "loss": 0.162,
1394
+ "step": 2250
1395
+ },
1396
+ {
1397
+ "epoch": 4.29,
1398
+ "learning_rate": 7.908055672711937e-06,
1399
+ "loss": 0.1989,
1400
+ "step": 2260
1401
+ },
1402
+ {
1403
+ "epoch": 4.31,
1404
+ "learning_rate": 7.697174188106285e-06,
1405
+ "loss": 0.1748,
1406
+ "step": 2270
1407
+ },
1408
+ {
1409
+ "epoch": 4.33,
1410
+ "learning_rate": 7.486292703500633e-06,
1411
+ "loss": 0.1702,
1412
+ "step": 2280
1413
+ },
1414
+ {
1415
+ "epoch": 4.34,
1416
+ "learning_rate": 7.2754112188949814e-06,
1417
+ "loss": 0.1721,
1418
+ "step": 2290
1419
+ },
1420
+ {
1421
+ "epoch": 4.36,
1422
+ "learning_rate": 7.06452973428933e-06,
1423
+ "loss": 0.1754,
1424
+ "step": 2300
1425
+ },
1426
+ {
1427
+ "epoch": 4.38,
1428
+ "learning_rate": 6.853648249683678e-06,
1429
+ "loss": 0.175,
1430
+ "step": 2310
1431
+ },
1432
+ {
1433
+ "epoch": 4.4,
1434
+ "learning_rate": 6.642766765078026e-06,
1435
+ "loss": 0.1906,
1436
+ "step": 2320
1437
+ },
1438
+ {
1439
+ "epoch": 4.42,
1440
+ "learning_rate": 6.431885280472374e-06,
1441
+ "loss": 0.1789,
1442
+ "step": 2330
1443
+ },
1444
+ {
1445
+ "epoch": 4.44,
1446
+ "learning_rate": 6.221003795866724e-06,
1447
+ "loss": 0.1607,
1448
+ "step": 2340
1449
+ },
1450
+ {
1451
+ "epoch": 4.46,
1452
+ "learning_rate": 6.010122311261072e-06,
1453
+ "loss": 0.1747,
1454
+ "step": 2350
1455
+ },
1456
+ {
1457
+ "epoch": 4.48,
1458
+ "learning_rate": 5.79924082665542e-06,
1459
+ "loss": 0.1611,
1460
+ "step": 2360
1461
+ },
1462
+ {
1463
+ "epoch": 4.5,
1464
+ "learning_rate": 5.588359342049768e-06,
1465
+ "loss": 0.1611,
1466
+ "step": 2370
1467
+ },
1468
+ {
1469
+ "epoch": 4.52,
1470
+ "learning_rate": 5.3774778574441165e-06,
1471
+ "loss": 0.1674,
1472
+ "step": 2380
1473
+ },
1474
+ {
1475
+ "epoch": 4.53,
1476
+ "learning_rate": 5.166596372838465e-06,
1477
+ "loss": 0.1797,
1478
+ "step": 2390
1479
+ },
1480
+ {
1481
+ "epoch": 4.55,
1482
+ "learning_rate": 4.955714888232813e-06,
1483
+ "loss": 0.2051,
1484
+ "step": 2400
1485
+ },
1486
+ {
1487
+ "epoch": 4.57,
1488
+ "learning_rate": 4.744833403627162e-06,
1489
+ "loss": 0.171,
1490
+ "step": 2410
1491
+ },
1492
+ {
1493
+ "epoch": 4.59,
1494
+ "learning_rate": 4.53395191902151e-06,
1495
+ "loss": 0.1622,
1496
+ "step": 2420
1497
+ },
1498
+ {
1499
+ "epoch": 4.61,
1500
+ "learning_rate": 4.323070434415859e-06,
1501
+ "loss": 0.1891,
1502
+ "step": 2430
1503
+ },
1504
+ {
1505
+ "epoch": 4.63,
1506
+ "learning_rate": 4.112188949810207e-06,
1507
+ "loss": 0.1884,
1508
+ "step": 2440
1509
+ },
1510
+ {
1511
+ "epoch": 4.65,
1512
+ "learning_rate": 3.901307465204556e-06,
1513
+ "loss": 0.1946,
1514
+ "step": 2450
1515
+ },
1516
+ {
1517
+ "epoch": 4.67,
1518
+ "learning_rate": 3.6904259805989036e-06,
1519
+ "loss": 0.1769,
1520
+ "step": 2460
1521
+ },
1522
+ {
1523
+ "epoch": 4.69,
1524
+ "learning_rate": 3.479544495993252e-06,
1525
+ "loss": 0.19,
1526
+ "step": 2470
1527
+ },
1528
+ {
1529
+ "epoch": 4.7,
1530
+ "learning_rate": 3.2686630113876e-06,
1531
+ "loss": 0.1741,
1532
+ "step": 2480
1533
+ },
1534
+ {
1535
+ "epoch": 4.72,
1536
+ "learning_rate": 3.057781526781949e-06,
1537
+ "loss": 0.1697,
1538
+ "step": 2490
1539
+ },
1540
+ {
1541
+ "epoch": 4.74,
1542
+ "learning_rate": 2.846900042176297e-06,
1543
+ "loss": 0.1901,
1544
+ "step": 2500
1545
+ },
1546
+ {
1547
+ "epoch": 4.76,
1548
+ "learning_rate": 2.6360185575706454e-06,
1549
+ "loss": 0.1794,
1550
+ "step": 2510
1551
+ },
1552
+ {
1553
+ "epoch": 4.78,
1554
+ "learning_rate": 2.425137072964994e-06,
1555
+ "loss": 0.1514,
1556
+ "step": 2520
1557
+ },
1558
+ {
1559
+ "epoch": 4.8,
1560
+ "learning_rate": 2.2142555883593423e-06,
1561
+ "loss": 0.1841,
1562
+ "step": 2530
1563
+ },
1564
+ {
1565
+ "epoch": 4.82,
1566
+ "learning_rate": 2.0033741037536907e-06,
1567
+ "loss": 0.1676,
1568
+ "step": 2540
1569
+ },
1570
+ {
1571
+ "epoch": 4.84,
1572
+ "learning_rate": 1.792492619148039e-06,
1573
+ "loss": 0.1723,
1574
+ "step": 2550
1575
+ },
1576
+ {
1577
+ "epoch": 4.86,
1578
+ "learning_rate": 1.5816111345423871e-06,
1579
+ "loss": 0.186,
1580
+ "step": 2560
1581
+ },
1582
+ {
1583
+ "epoch": 4.88,
1584
+ "learning_rate": 1.3707296499367356e-06,
1585
+ "loss": 0.1669,
1586
+ "step": 2570
1587
+ },
1588
+ {
1589
+ "epoch": 4.89,
1590
+ "learning_rate": 1.1598481653310838e-06,
1591
+ "loss": 0.1801,
1592
+ "step": 2580
1593
+ },
1594
+ {
1595
+ "epoch": 4.91,
1596
+ "learning_rate": 9.489666807254323e-07,
1597
+ "loss": 0.1469,
1598
+ "step": 2590
1599
+ },
1600
+ {
1601
+ "epoch": 4.93,
1602
+ "learning_rate": 7.380851961197808e-07,
1603
+ "loss": 0.1642,
1604
+ "step": 2600
1605
+ },
1606
+ {
1607
+ "epoch": 4.95,
1608
+ "learning_rate": 5.272037115141291e-07,
1609
+ "loss": 0.1691,
1610
+ "step": 2610
1611
+ },
1612
+ {
1613
+ "epoch": 4.97,
1614
+ "learning_rate": 3.1632222690847745e-07,
1615
+ "loss": 0.1644,
1616
+ "step": 2620
1617
+ },
1618
+ {
1619
+ "epoch": 4.99,
1620
+ "learning_rate": 1.0544074230282581e-07,
1621
+ "loss": 0.173,
1622
+ "step": 2630
1623
+ },
1624
+ {
1625
+ "epoch": 5.0,
1626
+ "eval_accuracy": 0.9741915422885572,
1627
+ "eval_loss": 0.06838314980268478,
1628
+ "eval_runtime": 133.6073,
1629
+ "eval_samples_per_second": 72.212,
1630
+ "eval_steps_per_second": 2.26,
1631
+ "step": 2635
1632
+ },
1633
+ {
1634
+ "epoch": 5.0,
1635
+ "step": 2635,
1636
+ "total_flos": 8.391154825876193e+18,
1637
+ "train_loss": 0.33317647269826234,
1638
+ "train_runtime": 8035.3313,
1639
+ "train_samples_per_second": 42.02,
1640
+ "train_steps_per_second": 0.328
1641
  }
1642
  ],
1643
+ "max_steps": 2635,
1644
+ "num_train_epochs": 5,
1645
+ "total_flos": 8.391154825876193e+18,
1646
  "trial_name": null,
1647
  "trial_params": null
1648
  }