JEdward7777 commited on
Commit
4fb1ae9
·
1 Parent(s): c9ce9e9

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +258 -258
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 39.8,
3
- "eval_accuracy": 0.9714285714285714,
4
- "eval_loss": 0.1382756382226944,
5
- "eval_runtime": 6.6641,
6
- "eval_samples_per_second": 5.252,
7
- "eval_steps_per_second": 0.3,
8
- "total_flos": 3.088453228308726e+17,
9
- "train_loss": 0.15159874260425568,
10
- "train_runtime": 6474.8036,
11
- "train_samples_per_second": 1.927,
12
- "train_steps_per_second": 0.012
13
  }
 
1
  {
2
+ "epoch": 39.73,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.0415542908012867,
5
+ "eval_runtime": 7.8769,
6
+ "eval_samples_per_second": 4.697,
7
+ "eval_steps_per_second": 0.254,
8
+ "total_flos": 3.262961773565706e+17,
9
+ "train_loss": 0.18748833239078522,
10
+ "train_runtime": 7500.5041,
11
+ "train_samples_per_second": 1.76,
12
+ "train_steps_per_second": 0.011
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.8,
3
- "eval_accuracy": 0.9714285714285714,
4
- "eval_loss": 0.1382756382226944,
5
- "eval_runtime": 6.6641,
6
- "eval_samples_per_second": 5.252,
7
- "eval_steps_per_second": 0.3
8
  }
 
1
  {
2
+ "epoch": 39.73,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.0415542908012867,
5
+ "eval_runtime": 7.8769,
6
+ "eval_samples_per_second": 4.697,
7
+ "eval_steps_per_second": 0.254
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 39.8,
3
- "total_flos": 3.088453228308726e+17,
4
- "train_loss": 0.15159874260425568,
5
- "train_runtime": 6474.8036,
6
- "train_samples_per_second": 1.927,
7
- "train_steps_per_second": 0.012
8
  }
 
1
  {
2
+ "epoch": 39.73,
3
+ "total_flos": 3.262961773565706e+17,
4
+ "train_loss": 0.18748833239078522,
5
+ "train_runtime": 7500.5041,
6
+ "train_samples_per_second": 1.76,
7
+ "train_steps_per_second": 0.011
8
  }
trainer_state.json CHANGED
@@ -1,409 +1,409 @@
1
  {
2
- "best_metric": 0.9714285714285714,
3
- "best_model_checkpoint": "delivery_truck_classification\\checkpoint-4",
4
- "epoch": 39.8,
5
  "global_step": 80,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.8,
12
- "eval_accuracy": 0.9428571428571428,
13
- "eval_loss": 0.19194960594177246,
14
- "eval_runtime": 8.8411,
15
- "eval_samples_per_second": 3.959,
16
- "eval_steps_per_second": 0.226,
17
  "step": 2
18
  },
19
  {
20
- "epoch": 1.8,
21
- "eval_accuracy": 0.9714285714285714,
22
- "eval_loss": 0.1382756382226944,
23
- "eval_runtime": 7.2367,
24
- "eval_samples_per_second": 4.836,
25
- "eval_steps_per_second": 0.276,
26
  "step": 4
27
  },
28
  {
29
- "epoch": 2.8,
30
- "eval_accuracy": 0.9142857142857143,
31
- "eval_loss": 0.19303296506404877,
32
- "eval_runtime": 7.0677,
33
- "eval_samples_per_second": 4.952,
34
- "eval_steps_per_second": 0.283,
35
  "step": 6
36
  },
37
  {
38
- "epoch": 3.8,
39
- "eval_accuracy": 0.9714285714285714,
40
- "eval_loss": 0.1462627798318863,
41
- "eval_runtime": 7.1631,
42
- "eval_samples_per_second": 4.886,
43
- "eval_steps_per_second": 0.279,
44
  "step": 8
45
  },
46
  {
47
- "epoch": 4.8,
48
- "eval_accuracy": 0.9714285714285714,
49
- "eval_loss": 0.1735035479068756,
50
- "eval_runtime": 6.91,
51
- "eval_samples_per_second": 5.065,
52
- "eval_steps_per_second": 0.289,
53
  "step": 10
54
  },
55
  {
56
- "epoch": 5.8,
57
- "eval_accuracy": 0.9714285714285714,
58
- "eval_loss": 0.1692284345626831,
59
- "eval_runtime": 6.8605,
60
- "eval_samples_per_second": 5.102,
61
- "eval_steps_per_second": 0.292,
62
  "step": 12
63
  },
64
  {
65
- "epoch": 6.8,
66
- "eval_accuracy": 0.9714285714285714,
67
- "eval_loss": 0.1626075655221939,
68
- "eval_runtime": 6.8284,
69
- "eval_samples_per_second": 5.126,
70
- "eval_steps_per_second": 0.293,
71
  "step": 14
72
  },
73
  {
74
- "epoch": 7.8,
75
- "eval_accuracy": 0.9714285714285714,
76
- "eval_loss": 0.16586509346961975,
77
- "eval_runtime": 6.3075,
78
- "eval_samples_per_second": 5.549,
79
- "eval_steps_per_second": 0.317,
80
  "step": 16
81
  },
82
  {
83
- "epoch": 8.8,
84
- "eval_accuracy": 0.9714285714285714,
85
- "eval_loss": 0.1622493714094162,
86
- "eval_runtime": 6.8493,
87
- "eval_samples_per_second": 5.11,
88
- "eval_steps_per_second": 0.292,
89
  "step": 18
90
  },
91
  {
92
- "epoch": 9.8,
93
  "learning_rate": 4.166666666666667e-05,
94
- "loss": 0.2046,
95
  "step": 20
96
  },
97
  {
98
- "epoch": 9.8,
99
- "eval_accuracy": 0.9714285714285714,
100
- "eval_loss": 0.1598205715417862,
101
- "eval_runtime": 6.8584,
102
- "eval_samples_per_second": 5.103,
103
- "eval_steps_per_second": 0.292,
104
  "step": 20
105
  },
106
  {
107
- "epoch": 10.8,
108
- "eval_accuracy": 0.9714285714285714,
109
- "eval_loss": 0.16681121289730072,
110
- "eval_runtime": 7.2746,
111
- "eval_samples_per_second": 4.811,
112
- "eval_steps_per_second": 0.275,
113
  "step": 22
114
  },
115
  {
116
- "epoch": 11.8,
117
- "eval_accuracy": 0.9714285714285714,
118
- "eval_loss": 0.1746995747089386,
119
- "eval_runtime": 7.1258,
120
- "eval_samples_per_second": 4.912,
121
- "eval_steps_per_second": 0.281,
122
  "step": 24
123
  },
124
  {
125
- "epoch": 12.8,
126
- "eval_accuracy": 0.9714285714285714,
127
- "eval_loss": 0.18037545680999756,
128
- "eval_runtime": 6.9863,
129
- "eval_samples_per_second": 5.01,
130
- "eval_steps_per_second": 0.286,
131
  "step": 26
132
  },
133
  {
134
- "epoch": 13.8,
135
- "eval_accuracy": 0.9714285714285714,
136
- "eval_loss": 0.18370455503463745,
137
- "eval_runtime": 6.1139,
138
- "eval_samples_per_second": 5.725,
139
- "eval_steps_per_second": 0.327,
140
  "step": 28
141
  },
142
  {
143
- "epoch": 14.8,
144
- "eval_accuracy": 0.9714285714285714,
145
- "eval_loss": 0.1837157905101776,
146
- "eval_runtime": 6.7444,
147
- "eval_samples_per_second": 5.189,
148
- "eval_steps_per_second": 0.297,
149
  "step": 30
150
  },
151
  {
152
- "epoch": 15.8,
153
- "eval_accuracy": 0.9714285714285714,
154
- "eval_loss": 0.18111634254455566,
155
- "eval_runtime": 7.0847,
156
- "eval_samples_per_second": 4.94,
157
- "eval_steps_per_second": 0.282,
158
  "step": 32
159
  },
160
  {
161
- "epoch": 16.8,
162
- "eval_accuracy": 0.9714285714285714,
163
- "eval_loss": 0.18009454011917114,
164
- "eval_runtime": 6.8211,
165
- "eval_samples_per_second": 5.131,
166
- "eval_steps_per_second": 0.293,
167
  "step": 34
168
  },
169
  {
170
- "epoch": 17.8,
171
- "eval_accuracy": 0.9714285714285714,
172
- "eval_loss": 0.18408751487731934,
173
- "eval_runtime": 6.9341,
174
- "eval_samples_per_second": 5.048,
175
- "eval_steps_per_second": 0.288,
176
  "step": 36
177
  },
178
  {
179
- "epoch": 18.8,
180
- "eval_accuracy": 0.9714285714285714,
181
- "eval_loss": 0.1899442970752716,
182
- "eval_runtime": 7.0358,
183
- "eval_samples_per_second": 4.975,
184
- "eval_steps_per_second": 0.284,
185
  "step": 38
186
  },
187
  {
188
- "epoch": 19.8,
189
  "learning_rate": 2.777777777777778e-05,
190
- "loss": 0.1657,
191
  "step": 40
192
  },
193
  {
194
- "epoch": 19.8,
195
- "eval_accuracy": 0.9714285714285714,
196
- "eval_loss": 0.19598019123077393,
197
- "eval_runtime": 7.9601,
198
- "eval_samples_per_second": 4.397,
199
- "eval_steps_per_second": 0.251,
200
  "step": 40
201
  },
202
  {
203
- "epoch": 20.8,
204
- "eval_accuracy": 0.9714285714285714,
205
- "eval_loss": 0.19925238192081451,
206
- "eval_runtime": 7.4131,
207
- "eval_samples_per_second": 4.721,
208
- "eval_steps_per_second": 0.27,
209
  "step": 42
210
  },
211
  {
212
- "epoch": 21.8,
213
- "eval_accuracy": 0.9714285714285714,
214
- "eval_loss": 0.20172713696956635,
215
- "eval_runtime": 6.7891,
216
- "eval_samples_per_second": 5.155,
217
- "eval_steps_per_second": 0.295,
218
  "step": 44
219
  },
220
  {
221
- "epoch": 22.8,
222
- "eval_accuracy": 0.9714285714285714,
223
- "eval_loss": 0.2004331350326538,
224
- "eval_runtime": 6.2296,
225
- "eval_samples_per_second": 5.618,
226
- "eval_steps_per_second": 0.321,
227
  "step": 46
228
  },
229
  {
230
- "epoch": 23.8,
231
- "eval_accuracy": 0.9714285714285714,
232
- "eval_loss": 0.19216616451740265,
233
- "eval_runtime": 7.0857,
234
- "eval_samples_per_second": 4.94,
235
- "eval_steps_per_second": 0.282,
236
  "step": 48
237
  },
238
  {
239
- "epoch": 24.8,
240
- "eval_accuracy": 0.9714285714285714,
241
- "eval_loss": 0.18559373915195465,
242
- "eval_runtime": 7.3612,
243
- "eval_samples_per_second": 4.755,
244
- "eval_steps_per_second": 0.272,
245
  "step": 50
246
  },
247
  {
248
- "epoch": 25.8,
249
- "eval_accuracy": 0.9714285714285714,
250
- "eval_loss": 0.18343603610992432,
251
- "eval_runtime": 6.8088,
252
- "eval_samples_per_second": 5.14,
253
- "eval_steps_per_second": 0.294,
254
  "step": 52
255
  },
256
  {
257
- "epoch": 26.8,
258
- "eval_accuracy": 0.9714285714285714,
259
- "eval_loss": 0.18461596965789795,
260
- "eval_runtime": 6.6873,
261
- "eval_samples_per_second": 5.234,
262
- "eval_steps_per_second": 0.299,
263
  "step": 54
264
  },
265
  {
266
- "epoch": 27.8,
267
- "eval_accuracy": 0.9714285714285714,
268
- "eval_loss": 0.18977026641368866,
269
- "eval_runtime": 6.826,
270
- "eval_samples_per_second": 5.127,
271
- "eval_steps_per_second": 0.293,
272
  "step": 56
273
  },
274
  {
275
- "epoch": 28.8,
276
- "eval_accuracy": 0.9714285714285714,
277
- "eval_loss": 0.1951347291469574,
278
- "eval_runtime": 6.8913,
279
- "eval_samples_per_second": 5.079,
280
- "eval_steps_per_second": 0.29,
281
  "step": 58
282
  },
283
  {
284
- "epoch": 29.8,
285
  "learning_rate": 1.388888888888889e-05,
286
- "loss": 0.1308,
287
  "step": 60
288
  },
289
  {
290
- "epoch": 29.8,
291
- "eval_accuracy": 0.9714285714285714,
292
- "eval_loss": 0.20185869932174683,
293
- "eval_runtime": 6.8601,
294
- "eval_samples_per_second": 5.102,
295
- "eval_steps_per_second": 0.292,
296
  "step": 60
297
  },
298
  {
299
- "epoch": 30.8,
300
- "eval_accuracy": 0.9714285714285714,
301
- "eval_loss": 0.2095019370317459,
302
- "eval_runtime": 7.2055,
303
- "eval_samples_per_second": 4.857,
304
- "eval_steps_per_second": 0.278,
305
  "step": 62
306
  },
307
  {
308
- "epoch": 31.8,
309
- "eval_accuracy": 0.9714285714285714,
310
- "eval_loss": 0.21445579826831818,
311
- "eval_runtime": 7.4924,
312
- "eval_samples_per_second": 4.671,
313
- "eval_steps_per_second": 0.267,
314
  "step": 64
315
  },
316
  {
317
- "epoch": 32.8,
318
- "eval_accuracy": 0.9714285714285714,
319
- "eval_loss": 0.21541449427604675,
320
- "eval_runtime": 6.8812,
321
- "eval_samples_per_second": 5.086,
322
- "eval_steps_per_second": 0.291,
323
  "step": 66
324
  },
325
  {
326
- "epoch": 33.8,
327
- "eval_accuracy": 0.9714285714285714,
328
- "eval_loss": 0.21372175216674805,
329
- "eval_runtime": 6.9147,
330
- "eval_samples_per_second": 5.062,
331
- "eval_steps_per_second": 0.289,
332
  "step": 68
333
  },
334
  {
335
- "epoch": 34.8,
336
- "eval_accuracy": 0.9714285714285714,
337
- "eval_loss": 0.2116171419620514,
338
- "eval_runtime": 7.0628,
339
- "eval_samples_per_second": 4.956,
340
- "eval_steps_per_second": 0.283,
341
  "step": 70
342
  },
343
  {
344
- "epoch": 35.8,
345
- "eval_accuracy": 0.9714285714285714,
346
- "eval_loss": 0.20960116386413574,
347
- "eval_runtime": 7.1202,
348
- "eval_samples_per_second": 4.916,
349
- "eval_steps_per_second": 0.281,
350
  "step": 72
351
  },
352
  {
353
- "epoch": 36.8,
354
- "eval_accuracy": 0.9714285714285714,
355
- "eval_loss": 0.20841823518276215,
356
- "eval_runtime": 7.1749,
357
- "eval_samples_per_second": 4.878,
358
- "eval_steps_per_second": 0.279,
359
  "step": 74
360
  },
361
  {
362
- "epoch": 37.8,
363
- "eval_accuracy": 0.9714285714285714,
364
- "eval_loss": 0.20780029892921448,
365
- "eval_runtime": 6.9083,
366
- "eval_samples_per_second": 5.066,
367
- "eval_steps_per_second": 0.29,
368
  "step": 76
369
  },
370
  {
371
- "epoch": 38.8,
372
- "eval_accuracy": 0.9714285714285714,
373
- "eval_loss": 0.2074960172176361,
374
- "eval_runtime": 6.9495,
375
- "eval_samples_per_second": 5.036,
376
- "eval_steps_per_second": 0.288,
377
  "step": 78
378
  },
379
  {
380
- "epoch": 39.8,
381
  "learning_rate": 0.0,
382
- "loss": 0.1053,
383
  "step": 80
384
  },
385
  {
386
- "epoch": 39.8,
387
- "eval_accuracy": 0.9714285714285714,
388
- "eval_loss": 0.20739802718162537,
389
- "eval_runtime": 6.9547,
390
- "eval_samples_per_second": 5.033,
391
- "eval_steps_per_second": 0.288,
392
  "step": 80
393
  },
394
  {
395
- "epoch": 39.8,
396
  "step": 80,
397
- "total_flos": 3.088453228308726e+17,
398
- "train_loss": 0.15159874260425568,
399
- "train_runtime": 6474.8036,
400
- "train_samples_per_second": 1.927,
401
- "train_steps_per_second": 0.012
402
  }
403
  ],
404
  "max_steps": 80,
405
  "num_train_epochs": 40,
406
- "total_flos": 3.088453228308726e+17,
407
  "trial_name": null,
408
  "trial_params": null
409
  }
 
1
  {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "delivery_truck_classification\\checkpoint-2",
4
+ "epoch": 39.72727272727273,
5
  "global_step": 80,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.73,
12
+ "eval_accuracy": 1.0,
13
+ "eval_loss": 0.0415542908012867,
14
+ "eval_runtime": 7.7969,
15
+ "eval_samples_per_second": 4.745,
16
+ "eval_steps_per_second": 0.257,
17
  "step": 2
18
  },
19
  {
20
+ "epoch": 1.73,
21
+ "eval_accuracy": 1.0,
22
+ "eval_loss": 0.03460519760847092,
23
+ "eval_runtime": 9.6408,
24
+ "eval_samples_per_second": 3.838,
25
+ "eval_steps_per_second": 0.207,
26
  "step": 4
27
  },
28
  {
29
+ "epoch": 2.73,
30
+ "eval_accuracy": 1.0,
31
+ "eval_loss": 0.029270131140947342,
32
+ "eval_runtime": 11.5968,
33
+ "eval_samples_per_second": 3.191,
34
+ "eval_steps_per_second": 0.172,
35
  "step": 6
36
  },
37
  {
38
+ "epoch": 3.73,
39
+ "eval_accuracy": 1.0,
40
+ "eval_loss": 0.01861225999891758,
41
+ "eval_runtime": 8.4881,
42
+ "eval_samples_per_second": 4.359,
43
+ "eval_steps_per_second": 0.236,
44
  "step": 8
45
  },
46
  {
47
+ "epoch": 4.73,
48
+ "eval_accuracy": 1.0,
49
+ "eval_loss": 0.020498055964708328,
50
+ "eval_runtime": 9.9982,
51
+ "eval_samples_per_second": 3.701,
52
+ "eval_steps_per_second": 0.2,
53
  "step": 10
54
  },
55
  {
56
+ "epoch": 5.73,
57
+ "eval_accuracy": 0.972972972972973,
58
+ "eval_loss": 0.06043216958642006,
59
+ "eval_runtime": 8.9914,
60
+ "eval_samples_per_second": 4.115,
61
+ "eval_steps_per_second": 0.222,
62
  "step": 12
63
  },
64
  {
65
+ "epoch": 6.73,
66
+ "eval_accuracy": 1.0,
67
+ "eval_loss": 0.0332246832549572,
68
+ "eval_runtime": 8.8254,
69
+ "eval_samples_per_second": 4.192,
70
+ "eval_steps_per_second": 0.227,
71
  "step": 14
72
  },
73
  {
74
+ "epoch": 7.73,
75
+ "eval_accuracy": 1.0,
76
+ "eval_loss": 0.025004582479596138,
77
+ "eval_runtime": 11.8616,
78
+ "eval_samples_per_second": 3.119,
79
+ "eval_steps_per_second": 0.169,
80
  "step": 16
81
  },
82
  {
83
+ "epoch": 8.73,
84
+ "eval_accuracy": 1.0,
85
+ "eval_loss": 0.03864024579524994,
86
+ "eval_runtime": 8.8843,
87
+ "eval_samples_per_second": 4.165,
88
+ "eval_steps_per_second": 0.225,
89
  "step": 18
90
  },
91
  {
92
+ "epoch": 9.73,
93
  "learning_rate": 4.166666666666667e-05,
94
+ "loss": 0.2483,
95
  "step": 20
96
  },
97
  {
98
+ "epoch": 9.73,
99
+ "eval_accuracy": 1.0,
100
+ "eval_loss": 0.04379289597272873,
101
+ "eval_runtime": 8.6752,
102
+ "eval_samples_per_second": 4.265,
103
+ "eval_steps_per_second": 0.231,
104
  "step": 20
105
  },
106
  {
107
+ "epoch": 10.73,
108
+ "eval_accuracy": 1.0,
109
+ "eval_loss": 0.04468226432800293,
110
+ "eval_runtime": 8.2704,
111
+ "eval_samples_per_second": 4.474,
112
+ "eval_steps_per_second": 0.242,
113
  "step": 22
114
  },
115
  {
116
+ "epoch": 11.73,
117
+ "eval_accuracy": 0.972972972972973,
118
+ "eval_loss": 0.06756877154111862,
119
+ "eval_runtime": 8.2231,
120
+ "eval_samples_per_second": 4.5,
121
+ "eval_steps_per_second": 0.243,
122
  "step": 24
123
  },
124
  {
125
+ "epoch": 12.73,
126
+ "eval_accuracy": 0.972972972972973,
127
+ "eval_loss": 0.07861027866601944,
128
+ "eval_runtime": 8.9079,
129
+ "eval_samples_per_second": 4.154,
130
+ "eval_steps_per_second": 0.225,
131
  "step": 26
132
  },
133
  {
134
+ "epoch": 13.73,
135
+ "eval_accuracy": 1.0,
136
+ "eval_loss": 0.03886393457651138,
137
+ "eval_runtime": 7.4091,
138
+ "eval_samples_per_second": 4.994,
139
+ "eval_steps_per_second": 0.27,
140
  "step": 28
141
  },
142
  {
143
+ "epoch": 14.73,
144
+ "eval_accuracy": 1.0,
145
+ "eval_loss": 0.02784094214439392,
146
+ "eval_runtime": 9.7376,
147
+ "eval_samples_per_second": 3.8,
148
+ "eval_steps_per_second": 0.205,
149
  "step": 30
150
  },
151
  {
152
+ "epoch": 15.73,
153
+ "eval_accuracy": 1.0,
154
+ "eval_loss": 0.02497038058936596,
155
+ "eval_runtime": 7.3256,
156
+ "eval_samples_per_second": 5.051,
157
+ "eval_steps_per_second": 0.273,
158
  "step": 32
159
  },
160
  {
161
+ "epoch": 16.73,
162
+ "eval_accuracy": 1.0,
163
+ "eval_loss": 0.02828327752649784,
164
+ "eval_runtime": 11.0099,
165
+ "eval_samples_per_second": 3.361,
166
+ "eval_steps_per_second": 0.182,
167
  "step": 34
168
  },
169
  {
170
+ "epoch": 17.73,
171
+ "eval_accuracy": 0.972972972972973,
172
+ "eval_loss": 0.05021832138299942,
173
+ "eval_runtime": 9.5468,
174
+ "eval_samples_per_second": 3.876,
175
+ "eval_steps_per_second": 0.209,
176
  "step": 36
177
  },
178
  {
179
+ "epoch": 18.73,
180
+ "eval_accuracy": 0.972972972972973,
181
+ "eval_loss": 0.07113233208656311,
182
+ "eval_runtime": 7.5069,
183
+ "eval_samples_per_second": 4.929,
184
+ "eval_steps_per_second": 0.266,
185
  "step": 38
186
  },
187
  {
188
+ "epoch": 19.73,
189
  "learning_rate": 2.777777777777778e-05,
190
+ "loss": 0.1759,
191
  "step": 40
192
  },
193
  {
194
+ "epoch": 19.73,
195
+ "eval_accuracy": 0.972972972972973,
196
+ "eval_loss": 0.06368651241064072,
197
+ "eval_runtime": 10.8688,
198
+ "eval_samples_per_second": 3.404,
199
+ "eval_steps_per_second": 0.184,
200
  "step": 40
201
  },
202
  {
203
+ "epoch": 20.73,
204
+ "eval_accuracy": 1.0,
205
+ "eval_loss": 0.04590895399451256,
206
+ "eval_runtime": 7.36,
207
+ "eval_samples_per_second": 5.027,
208
+ "eval_steps_per_second": 0.272,
209
  "step": 42
210
  },
211
  {
212
+ "epoch": 21.73,
213
+ "eval_accuracy": 1.0,
214
+ "eval_loss": 0.03937483951449394,
215
+ "eval_runtime": 7.6664,
216
+ "eval_samples_per_second": 4.826,
217
+ "eval_steps_per_second": 0.261,
218
  "step": 44
219
  },
220
  {
221
+ "epoch": 22.73,
222
+ "eval_accuracy": 1.0,
223
+ "eval_loss": 0.04189879819750786,
224
+ "eval_runtime": 7.6918,
225
+ "eval_samples_per_second": 4.81,
226
+ "eval_steps_per_second": 0.26,
227
  "step": 46
228
  },
229
  {
230
+ "epoch": 23.73,
231
+ "eval_accuracy": 1.0,
232
+ "eval_loss": 0.042252812534570694,
233
+ "eval_runtime": 9.2536,
234
+ "eval_samples_per_second": 3.998,
235
+ "eval_steps_per_second": 0.216,
236
  "step": 48
237
  },
238
  {
239
+ "epoch": 24.73,
240
+ "eval_accuracy": 0.972972972972973,
241
+ "eval_loss": 0.046256761997938156,
242
+ "eval_runtime": 8.404,
243
+ "eval_samples_per_second": 4.403,
244
+ "eval_steps_per_second": 0.238,
245
  "step": 50
246
  },
247
  {
248
+ "epoch": 25.73,
249
+ "eval_accuracy": 0.972972972972973,
250
+ "eval_loss": 0.050322916358709335,
251
+ "eval_runtime": 7.2832,
252
+ "eval_samples_per_second": 5.08,
253
+ "eval_steps_per_second": 0.275,
254
  "step": 52
255
  },
256
  {
257
+ "epoch": 26.73,
258
+ "eval_accuracy": 0.972972972972973,
259
+ "eval_loss": 0.061594847589731216,
260
+ "eval_runtime": 7.736,
261
+ "eval_samples_per_second": 4.783,
262
+ "eval_steps_per_second": 0.259,
263
  "step": 54
264
  },
265
  {
266
+ "epoch": 27.73,
267
+ "eval_accuracy": 0.972972972972973,
268
+ "eval_loss": 0.0641237199306488,
269
+ "eval_runtime": 8.3429,
270
+ "eval_samples_per_second": 4.435,
271
+ "eval_steps_per_second": 0.24,
272
  "step": 56
273
  },
274
  {
275
+ "epoch": 28.73,
276
+ "eval_accuracy": 0.972972972972973,
277
+ "eval_loss": 0.05289805307984352,
278
+ "eval_runtime": 6.6939,
279
+ "eval_samples_per_second": 5.527,
280
+ "eval_steps_per_second": 0.299,
281
  "step": 58
282
  },
283
  {
284
+ "epoch": 29.73,
285
  "learning_rate": 1.388888888888889e-05,
286
+ "loss": 0.1669,
287
  "step": 60
288
  },
289
  {
290
+ "epoch": 29.73,
291
+ "eval_accuracy": 0.972972972972973,
292
+ "eval_loss": 0.048487674444913864,
293
+ "eval_runtime": 8.2178,
294
+ "eval_samples_per_second": 4.502,
295
+ "eval_steps_per_second": 0.243,
296
  "step": 60
297
  },
298
  {
299
+ "epoch": 30.73,
300
+ "eval_accuracy": 0.972972972972973,
301
+ "eval_loss": 0.04654627665877342,
302
+ "eval_runtime": 9.0621,
303
+ "eval_samples_per_second": 4.083,
304
+ "eval_steps_per_second": 0.221,
305
  "step": 62
306
  },
307
  {
308
+ "epoch": 31.73,
309
+ "eval_accuracy": 0.972972972972973,
310
+ "eval_loss": 0.045613404363393784,
311
+ "eval_runtime": 7.5057,
312
+ "eval_samples_per_second": 4.93,
313
+ "eval_steps_per_second": 0.266,
314
  "step": 64
315
  },
316
  {
317
+ "epoch": 32.73,
318
+ "eval_accuracy": 0.972972972972973,
319
+ "eval_loss": 0.047752730548381805,
320
+ "eval_runtime": 7.6794,
321
+ "eval_samples_per_second": 4.818,
322
+ "eval_steps_per_second": 0.26,
323
  "step": 66
324
  },
325
  {
326
+ "epoch": 33.73,
327
+ "eval_accuracy": 0.972972972972973,
328
+ "eval_loss": 0.0467178151011467,
329
+ "eval_runtime": 7.7621,
330
+ "eval_samples_per_second": 4.767,
331
+ "eval_steps_per_second": 0.258,
332
  "step": 68
333
  },
334
  {
335
+ "epoch": 34.73,
336
+ "eval_accuracy": 0.972972972972973,
337
+ "eval_loss": 0.04733948037028313,
338
+ "eval_runtime": 8.0246,
339
+ "eval_samples_per_second": 4.611,
340
+ "eval_steps_per_second": 0.249,
341
  "step": 70
342
  },
343
  {
344
+ "epoch": 35.73,
345
+ "eval_accuracy": 0.972972972972973,
346
+ "eval_loss": 0.048583876341581345,
347
+ "eval_runtime": 8.5589,
348
+ "eval_samples_per_second": 4.323,
349
+ "eval_steps_per_second": 0.234,
350
  "step": 72
351
  },
352
  {
353
+ "epoch": 36.73,
354
+ "eval_accuracy": 0.972972972972973,
355
+ "eval_loss": 0.05004884675145149,
356
+ "eval_runtime": 7.4868,
357
+ "eval_samples_per_second": 4.942,
358
+ "eval_steps_per_second": 0.267,
359
  "step": 74
360
  },
361
  {
362
+ "epoch": 37.73,
363
+ "eval_accuracy": 0.972972972972973,
364
+ "eval_loss": 0.0501551553606987,
365
+ "eval_runtime": 8.2774,
366
+ "eval_samples_per_second": 4.47,
367
+ "eval_steps_per_second": 0.242,
368
  "step": 76
369
  },
370
  {
371
+ "epoch": 38.73,
372
+ "eval_accuracy": 0.972972972972973,
373
+ "eval_loss": 0.05001495033502579,
374
+ "eval_runtime": 7.8563,
375
+ "eval_samples_per_second": 4.71,
376
+ "eval_steps_per_second": 0.255,
377
  "step": 78
378
  },
379
  {
380
+ "epoch": 39.73,
381
  "learning_rate": 0.0,
382
+ "loss": 0.1589,
383
  "step": 80
384
  },
385
  {
386
+ "epoch": 39.73,
387
+ "eval_accuracy": 0.972972972972973,
388
+ "eval_loss": 0.04929113760590553,
389
+ "eval_runtime": 8.4253,
390
+ "eval_samples_per_second": 4.392,
391
+ "eval_steps_per_second": 0.237,
392
  "step": 80
393
  },
394
  {
395
+ "epoch": 39.73,
396
  "step": 80,
397
+ "total_flos": 3.262961773565706e+17,
398
+ "train_loss": 0.18748833239078522,
399
+ "train_runtime": 7500.5041,
400
+ "train_samples_per_second": 1.76,
401
+ "train_steps_per_second": 0.011
402
  }
403
  ],
404
  "max_steps": 80,
405
  "num_train_epochs": 40,
406
+ "total_flos": 3.262961773565706e+17,
407
  "trial_name": null,
408
  "trial_params": null
409
  }