flavioferlin commited on
Commit
7ed05ff
·
verified ·
1 Parent(s): c6260ba

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 6.666666666666667,
3
- "eval_accuracy": 0.2857142857142857,
4
- "eval_loss": 1.9166103601455688,
5
- "eval_runtime": 1.8725,
6
- "eval_samples_per_second": 11.215,
7
- "eval_steps_per_second": 0.534
8
  }
 
1
  {
2
  "epoch": 6.666666666666667,
3
+ "total_flos": 3.032854323351552e+16,
4
+ "train_loss": 1.8625539779663085,
5
+ "train_runtime": 349.1269,
6
+ "train_samples_per_second": 5.213,
7
+ "train_steps_per_second": 0.029
8
  }
config.json CHANGED
@@ -17,24 +17,26 @@
17
  "hidden_dropout_prob": 0.0,
18
  "hidden_size": 768,
19
  "id2label": {
20
- "0": "Angry",
21
- "1": "Disgust",
22
- "2": "Fear",
23
- "3": "Happy",
24
- "4": "Neutral",
25
- "5": "Sad",
26
- "6": "Surprise"
 
27
  },
28
  "image_size": 224,
29
  "initializer_range": 0.02,
30
  "label2id": {
31
- "Angry": 0,
32
- "Disgust": 1,
33
- "Fear": 2,
34
- "Happy": 3,
35
- "Neutral": 4,
36
- "Sad": 5,
37
- "Surprise": 6
 
38
  },
39
  "layer_norm_eps": 1e-05,
40
  "mlp_ratio": 4.0,
 
17
  "hidden_dropout_prob": 0.0,
18
  "hidden_size": 768,
19
  "id2label": {
20
+ "0": "anger",
21
+ "1": "surprise",
22
+ "2": "contempt",
23
+ "3": "happy",
24
+ "4": "neutral",
25
+ "5": "fear",
26
+ "6": "sad",
27
+ "7": "disgust"
28
  },
29
  "image_size": 224,
30
  "initializer_range": 0.02,
31
  "label2id": {
32
+ "anger": 0,
33
+ "contempt": 2,
34
+ "disgust": 7,
35
+ "fear": 5,
36
+ "happy": 3,
37
+ "neutral": 4,
38
+ "sad": 6,
39
+ "surprise": 1
40
  },
41
  "layer_norm_eps": 1e-05,
42
  "mlp_ratio": 4.0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d38d9611ef31094553af5575d1da14182032c9c7f6710a53f45d40d5533d553
3
- size 110358212
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5100a5639b8a67ad81543bb7a1e7e67fdb516e892031d70e1ca0733c7d4e639
3
+ size 110361288
runs/Dec15_01-14-35_ef188d9b7084/events.out.tfevents.1734225287.ef188d9b7084.405.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e00bbc475c5b43bcea30e74677d45159a7fce14cc826295142b660eb69436f4
3
+ size 9389
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.99388379204893,
3
- "total_flos": 1.5560468970586767e+18,
4
- "train_loss": 1.1774282864997723,
5
- "train_runtime": 1319.6364,
6
- "train_samples_per_second": 47.534,
7
- "train_steps_per_second": 0.371
8
  }
 
1
  {
2
+ "epoch": 6.666666666666667,
3
+ "total_flos": 3.032854323351552e+16,
4
+ "train_loss": 1.8625539779663085,
5
+ "train_runtime": 349.1269,
6
+ "train_samples_per_second": 5.213,
7
+ "train_steps_per_second": 0.029
8
  }
trainer_state.json CHANGED
@@ -1,390 +1,97 @@
1
  {
2
- "best_metric": 0.6587779690189329,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-489",
4
- "epoch": 2.99388379204893,
5
  "eval_steps": 500,
6
- "global_step": 489,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06116207951070336,
13
- "grad_norm": 15.902920722961426,
14
- "learning_rate": 1.0204081632653061e-05,
15
- "loss": 2.118,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.12232415902140673,
20
- "grad_norm": 15.299479484558105,
21
- "learning_rate": 2.0408163265306123e-05,
22
- "loss": 2.0253,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.1834862385321101,
27
- "grad_norm": 15.808608055114746,
28
- "learning_rate": 3.061224489795919e-05,
29
- "loss": 1.9515,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.24464831804281345,
34
- "grad_norm": 23.111419677734375,
35
- "learning_rate": 4.0816326530612245e-05,
36
- "loss": 1.7452,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.3058103975535168,
41
- "grad_norm": 24.703895568847656,
42
- "learning_rate": 4.988636363636364e-05,
43
- "loss": 1.5294,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.3669724770642202,
48
- "grad_norm": 26.341184616088867,
49
- "learning_rate": 4.875e-05,
50
- "loss": 1.4046,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.42813455657492355,
55
- "grad_norm": 23.89405059814453,
56
- "learning_rate": 4.7613636363636367e-05,
57
- "loss": 1.3451,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.4892966360856269,
62
- "grad_norm": 22.658981323242188,
63
- "learning_rate": 4.647727272727273e-05,
64
- "loss": 1.315,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.5504587155963303,
69
- "grad_norm": 32.74925231933594,
70
- "learning_rate": 4.5340909090909095e-05,
71
- "loss": 1.3286,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.6116207951070336,
76
- "grad_norm": 34.46269607543945,
77
- "learning_rate": 4.4204545454545455e-05,
78
- "loss": 1.253,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.672782874617737,
83
- "grad_norm": 20.414508819580078,
84
- "learning_rate": 4.3068181818181816e-05,
85
- "loss": 1.2188,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.7339449541284404,
90
- "grad_norm": 22.735107421875,
91
- "learning_rate": 4.193181818181818e-05,
92
- "loss": 1.246,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.7951070336391437,
97
- "grad_norm": 23.86024284362793,
98
- "learning_rate": 4.079545454545455e-05,
99
- "loss": 1.1803,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.8562691131498471,
104
- "grad_norm": 34.048885345458984,
105
- "learning_rate": 3.965909090909091e-05,
106
- "loss": 1.1822,
107
- "step": 140
108
- },
109
- {
110
- "epoch": 0.9174311926605505,
111
- "grad_norm": 27.799007415771484,
112
- "learning_rate": 3.852272727272728e-05,
113
- "loss": 1.1625,
114
- "step": 150
115
- },
116
- {
117
- "epoch": 0.9785932721712538,
118
- "grad_norm": 27.95413589477539,
119
- "learning_rate": 3.738636363636363e-05,
120
- "loss": 1.1631,
121
- "step": 160
122
- },
123
- {
124
- "epoch": 0.9969418960244648,
125
- "eval_accuracy": 0.5851979345955249,
126
- "eval_loss": 1.0535521507263184,
127
- "eval_runtime": 15.0707,
128
- "eval_samples_per_second": 154.206,
129
- "eval_steps_per_second": 4.844,
130
- "step": 163
131
- },
132
- {
133
- "epoch": 1.0412844036697249,
134
- "grad_norm": 24.71836280822754,
135
- "learning_rate": 3.625e-05,
136
- "loss": 1.1357,
137
- "step": 170
138
- },
139
- {
140
- "epoch": 1.1024464831804281,
141
- "grad_norm": 22.25391387939453,
142
- "learning_rate": 3.511363636363637e-05,
143
- "loss": 1.1481,
144
- "step": 180
145
- },
146
- {
147
- "epoch": 1.1636085626911314,
148
- "grad_norm": 22.642160415649414,
149
- "learning_rate": 3.397727272727273e-05,
150
- "loss": 1.1071,
151
- "step": 190
152
- },
153
- {
154
- "epoch": 1.224770642201835,
155
- "grad_norm": 23.1135311126709,
156
- "learning_rate": 3.2840909090909096e-05,
157
- "loss": 1.1349,
158
- "step": 200
159
- },
160
- {
161
- "epoch": 1.2859327217125383,
162
- "grad_norm": 19.403104782104492,
163
- "learning_rate": 3.1704545454545456e-05,
164
- "loss": 1.1109,
165
- "step": 210
166
- },
167
- {
168
- "epoch": 1.3470948012232415,
169
- "grad_norm": 29.44010353088379,
170
- "learning_rate": 3.056818181818182e-05,
171
- "loss": 1.1196,
172
- "step": 220
173
- },
174
- {
175
- "epoch": 1.408256880733945,
176
- "grad_norm": 29.79566764831543,
177
- "learning_rate": 2.943181818181818e-05,
178
- "loss": 1.0698,
179
- "step": 230
180
- },
181
- {
182
- "epoch": 1.4694189602446484,
183
- "grad_norm": 28.45584487915039,
184
- "learning_rate": 2.829545454545455e-05,
185
- "loss": 1.0802,
186
- "step": 240
187
- },
188
- {
189
- "epoch": 1.5305810397553516,
190
- "grad_norm": 22.51582145690918,
191
- "learning_rate": 2.7159090909090913e-05,
192
- "loss": 1.1015,
193
- "step": 250
194
- },
195
- {
196
- "epoch": 1.591743119266055,
197
- "grad_norm": 21.58193016052246,
198
- "learning_rate": 2.6022727272727277e-05,
199
- "loss": 1.0757,
200
- "step": 260
201
- },
202
- {
203
- "epoch": 1.6529051987767585,
204
- "grad_norm": 17.50481605529785,
205
- "learning_rate": 2.4886363636363637e-05,
206
- "loss": 1.0788,
207
- "step": 270
208
- },
209
- {
210
- "epoch": 1.7140672782874617,
211
- "grad_norm": 22.194198608398438,
212
- "learning_rate": 2.375e-05,
213
- "loss": 1.0431,
214
- "step": 280
215
- },
216
- {
217
- "epoch": 1.7752293577981653,
218
- "grad_norm": 28.903148651123047,
219
- "learning_rate": 2.2613636363636365e-05,
220
- "loss": 1.051,
221
- "step": 290
222
- },
223
- {
224
- "epoch": 1.8363914373088686,
225
- "grad_norm": 19.929737091064453,
226
- "learning_rate": 2.147727272727273e-05,
227
- "loss": 1.0269,
228
- "step": 300
229
- },
230
- {
231
- "epoch": 1.8975535168195719,
232
- "grad_norm": 23.347131729125977,
233
- "learning_rate": 2.034090909090909e-05,
234
- "loss": 1.0226,
235
- "step": 310
236
- },
237
- {
238
- "epoch": 1.9587155963302751,
239
- "grad_norm": 22.177705764770508,
240
- "learning_rate": 1.9204545454545454e-05,
241
- "loss": 1.096,
242
- "step": 320
243
- },
244
- {
245
- "epoch": 1.9954128440366974,
246
- "eval_accuracy": 0.641566265060241,
247
- "eval_loss": 0.9225364327430725,
248
- "eval_runtime": 15.4942,
249
- "eval_samples_per_second": 149.992,
250
- "eval_steps_per_second": 4.711,
251
- "step": 326
252
- },
253
- {
254
- "epoch": 2.021406727828746,
255
- "grad_norm": 24.717140197753906,
256
- "learning_rate": 1.806818181818182e-05,
257
- "loss": 0.9788,
258
- "step": 330
259
- },
260
- {
261
- "epoch": 2.0825688073394497,
262
- "grad_norm": 30.048473358154297,
263
- "learning_rate": 1.6931818181818182e-05,
264
- "loss": 1.0206,
265
- "step": 340
266
- },
267
- {
268
- "epoch": 2.143730886850153,
269
- "grad_norm": 24.10106086730957,
270
- "learning_rate": 1.5795454545454546e-05,
271
- "loss": 0.963,
272
- "step": 350
273
- },
274
- {
275
- "epoch": 2.2048929663608563,
276
- "grad_norm": 26.799161911010742,
277
- "learning_rate": 1.4659090909090909e-05,
278
- "loss": 1.0239,
279
- "step": 360
280
- },
281
- {
282
- "epoch": 2.2660550458715596,
283
- "grad_norm": 24.41145896911621,
284
- "learning_rate": 1.3522727272727273e-05,
285
- "loss": 0.9914,
286
- "step": 370
287
- },
288
- {
289
- "epoch": 2.327217125382263,
290
- "grad_norm": 25.299041748046875,
291
- "learning_rate": 1.2386363636363638e-05,
292
- "loss": 1.0049,
293
- "step": 380
294
- },
295
- {
296
- "epoch": 2.388379204892966,
297
- "grad_norm": 20.791305541992188,
298
- "learning_rate": 1.125e-05,
299
- "loss": 1.0156,
300
- "step": 390
301
- },
302
- {
303
- "epoch": 2.44954128440367,
304
- "grad_norm": 27.16722297668457,
305
- "learning_rate": 1.0113636363636365e-05,
306
- "loss": 1.059,
307
- "step": 400
308
- },
309
- {
310
- "epoch": 2.510703363914373,
311
- "grad_norm": 23.442485809326172,
312
- "learning_rate": 8.977272727272727e-06,
313
- "loss": 0.9931,
314
- "step": 410
315
- },
316
- {
317
- "epoch": 2.5718654434250765,
318
- "grad_norm": 26.62813377380371,
319
- "learning_rate": 7.840909090909091e-06,
320
- "loss": 1.0159,
321
- "step": 420
322
- },
323
- {
324
- "epoch": 2.63302752293578,
325
- "grad_norm": 19.204635620117188,
326
- "learning_rate": 6.704545454545455e-06,
327
- "loss": 1.0279,
328
- "step": 430
329
- },
330
- {
331
- "epoch": 2.694189602446483,
332
- "grad_norm": 23.251445770263672,
333
- "learning_rate": 5.568181818181818e-06,
334
- "loss": 0.9971,
335
- "step": 440
336
- },
337
- {
338
- "epoch": 2.7553516819571864,
339
- "grad_norm": 25.26046371459961,
340
- "learning_rate": 4.4318181818181824e-06,
341
- "loss": 1.0267,
342
- "step": 450
343
- },
344
- {
345
- "epoch": 2.81651376146789,
346
- "grad_norm": 19.80088233947754,
347
- "learning_rate": 3.295454545454545e-06,
348
- "loss": 0.9795,
349
- "step": 460
350
- },
351
- {
352
- "epoch": 2.8776758409785934,
353
- "grad_norm": 23.87816047668457,
354
- "learning_rate": 2.1590909090909092e-06,
355
- "loss": 1.0087,
356
- "step": 470
357
- },
358
- {
359
- "epoch": 2.9388379204892967,
360
- "grad_norm": 21.836708068847656,
361
- "learning_rate": 1.0227272727272729e-06,
362
- "loss": 0.9571,
363
- "step": 480
364
- },
365
- {
366
- "epoch": 2.99388379204893,
367
- "eval_accuracy": 0.6587779690189329,
368
- "eval_loss": 0.8806350827217102,
369
- "eval_runtime": 16.023,
370
- "eval_samples_per_second": 145.041,
371
- "eval_steps_per_second": 4.556,
372
- "step": 489
373
  },
374
  {
375
- "epoch": 2.99388379204893,
376
- "step": 489,
377
- "total_flos": 1.5560468970586767e+18,
378
- "train_loss": 1.1774282864997723,
379
- "train_runtime": 1319.6364,
380
- "train_samples_per_second": 47.534,
381
- "train_steps_per_second": 0.371
382
  }
383
  ],
384
  "logging_steps": 10,
385
- "max_steps": 489,
386
  "num_input_tokens_seen": 0,
387
- "num_train_epochs": 3,
388
  "save_steps": 500,
389
  "stateful_callbacks": {
390
  "TrainerControl": {
@@ -398,7 +105,7 @@
398
  "attributes": {}
399
  }
400
  },
401
- "total_flos": 1.5560468970586767e+18,
402
  "train_batch_size": 32,
403
  "trial_name": null,
404
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2857142857142857,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-10",
4
+ "epoch": 6.666666666666667,
5
  "eval_steps": 500,
6
+ "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.6666666666666666,
13
+ "eval_accuracy": 0.09523809523809523,
14
+ "eval_loss": 1.8175530433654785,
15
+ "eval_runtime": 18.3681,
16
+ "eval_samples_per_second": 1.143,
17
+ "eval_steps_per_second": 0.054,
18
+ "step": 1
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.14285714285714285,
23
+ "eval_loss": 1.8961031436920166,
24
+ "eval_runtime": 0.9567,
25
+ "eval_samples_per_second": 21.951,
26
+ "eval_steps_per_second": 1.045,
27
+ "step": 3
28
+ },
29
+ {
30
+ "epoch": 2.6666666666666665,
31
+ "eval_accuracy": 0.14285714285714285,
32
+ "eval_loss": 1.9159153699874878,
33
+ "eval_runtime": 0.9471,
34
+ "eval_samples_per_second": 22.173,
35
+ "eval_steps_per_second": 1.056,
36
+ "step": 4
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "eval_accuracy": 0.19047619047619047,
41
+ "eval_loss": 1.8906145095825195,
42
+ "eval_runtime": 0.9581,
43
+ "eval_samples_per_second": 21.919,
44
+ "eval_steps_per_second": 1.044,
45
+ "step": 6
46
+ },
47
+ {
48
+ "epoch": 4.666666666666667,
49
+ "eval_accuracy": 0.19047619047619047,
50
+ "eval_loss": 1.8720425367355347,
51
+ "eval_runtime": 1.0672,
52
+ "eval_samples_per_second": 19.678,
53
+ "eval_steps_per_second": 0.937,
54
+ "step": 7
55
+ },
56
+ {
57
+ "epoch": 6.0,
58
+ "eval_accuracy": 0.19047619047619047,
59
+ "eval_loss": 1.8452097177505493,
60
+ "eval_runtime": 1.0316,
61
+ "eval_samples_per_second": 20.357,
62
+ "eval_steps_per_second": 0.969,
63
+ "step": 9
64
+ },
65
+ {
66
+ "epoch": 6.666666666666667,
67
+ "grad_norm": 13.042926788330078,
68
+ "learning_rate": 0.0,
69
+ "loss": 1.8626,
70
  "step": 10
71
  },
72
  {
73
+ "epoch": 6.666666666666667,
74
+ "eval_accuracy": 0.2857142857142857,
75
+ "eval_loss": 1.8388439416885376,
76
+ "eval_runtime": 1.2038,
77
+ "eval_samples_per_second": 17.445,
78
+ "eval_steps_per_second": 0.831,
79
+ "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  },
81
  {
82
+ "epoch": 6.666666666666667,
83
+ "step": 10,
84
+ "total_flos": 3.032854323351552e+16,
85
+ "train_loss": 1.8625539779663085,
86
+ "train_runtime": 349.1269,
87
+ "train_samples_per_second": 5.213,
88
+ "train_steps_per_second": 0.029
89
  }
90
  ],
91
  "logging_steps": 10,
92
+ "max_steps": 10,
93
  "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 10,
95
  "save_steps": 500,
96
  "stateful_callbacks": {
97
  "TrainerControl": {
 
105
  "attributes": {}
106
  }
107
  },
108
+ "total_flos": 3.032854323351552e+16,
109
  "train_batch_size": 32,
110
  "trial_name": null,
111
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:056918aa1b6849b752eb9cb7a84e900476d40514926035a7b22c7218385953ff
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53581cdaaa08f2df72a2c1a9225fbf542987cceb74268d020c6a0047c0fe6214
3
  size 5368