nc7777 commited on
Commit
12e8800
·
verified ·
1 Parent(s): c175820

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-large-patch16-224",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "calcificaciones",
13
+ "1": "masas",
14
+ "2": "no_encontrado"
15
+ },
16
+ "image_size": 224,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 4096,
19
+ "label2id": {
20
+ "calcificaciones": 0,
21
+ "masas": 1,
22
+ "no_encontrado": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 16,
27
+ "num_channels": 3,
28
+ "num_hidden_layers": 24,
29
+ "patch_size": 16,
30
+ "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.45.1"
34
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b0782e374d9537f21dd2c122bf3722f292e6e2c171e16ccde56cea965bcb0d
3
+ size 1213265372
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6de43352acbfe72d0cb1676cf10eeeb4519111c291d231b24be87a7a4a1645
3
+ size 503940154
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessorFast",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ce3a3c05ac42196e5ce20babc07ba298e3f4439fa1b4712cb2e14c382e65881
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0edc849a55daef37f3913f72b0ae54d705cb645843af555e57da8ec176190095
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,798 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6901677250862122,
3
+ "best_model_checkpoint": "SavedModels/ViT-large-patch16-224_B/checkpoint-1128",
4
+ "epoch": 20.0,
5
+ "eval_steps": 500,
6
+ "global_step": 7520,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.26595744680851063,
13
+ "grad_norm": 16.918853759765625,
14
+ "learning_rate": 0.0002920212765957447,
15
+ "loss": 1.0273,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.5319148936170213,
20
+ "grad_norm": 8.12959098815918,
21
+ "learning_rate": 0.00028404255319148934,
22
+ "loss": 0.8052,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.7978723404255319,
27
+ "grad_norm": 4.803711891174316,
28
+ "learning_rate": 0.00027606382978723404,
29
+ "loss": 0.7818,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_accuracy": 0.6693333333333333,
35
+ "eval_f1": 0.6679277175440698,
36
+ "eval_loss": 0.7451461553573608,
37
+ "eval_precision": 0.6665516663433287,
38
+ "eval_recall": 0.6708336668000534,
39
+ "eval_runtime": 34.5448,
40
+ "eval_samples_per_second": 21.711,
41
+ "eval_steps_per_second": 2.721,
42
+ "step": 376
43
+ },
44
+ {
45
+ "epoch": 1.0638297872340425,
46
+ "grad_norm": 5.264636516571045,
47
+ "learning_rate": 0.0002680851063829787,
48
+ "loss": 0.7518,
49
+ "step": 400
50
+ },
51
+ {
52
+ "epoch": 1.3297872340425532,
53
+ "grad_norm": 6.721948146820068,
54
+ "learning_rate": 0.0002601063829787234,
55
+ "loss": 0.7153,
56
+ "step": 500
57
+ },
58
+ {
59
+ "epoch": 1.5957446808510638,
60
+ "grad_norm": 5.855480670928955,
61
+ "learning_rate": 0.00025212765957446806,
62
+ "loss": 0.6854,
63
+ "step": 600
64
+ },
65
+ {
66
+ "epoch": 1.8617021276595744,
67
+ "grad_norm": 5.206683158874512,
68
+ "learning_rate": 0.0002441489361702127,
69
+ "loss": 0.6975,
70
+ "step": 700
71
+ },
72
+ {
73
+ "epoch": 2.0,
74
+ "eval_accuracy": 0.652,
75
+ "eval_f1": 0.6527448850868924,
76
+ "eval_loss": 0.737027108669281,
77
+ "eval_precision": 0.6598586784457597,
78
+ "eval_recall": 0.6519173002534348,
79
+ "eval_runtime": 34.5417,
80
+ "eval_samples_per_second": 21.713,
81
+ "eval_steps_per_second": 2.721,
82
+ "step": 752
83
+ },
84
+ {
85
+ "epoch": 2.127659574468085,
86
+ "grad_norm": 6.592344760894775,
87
+ "learning_rate": 0.00023617021276595742,
88
+ "loss": 0.6174,
89
+ "step": 800
90
+ },
91
+ {
92
+ "epoch": 2.393617021276596,
93
+ "grad_norm": 4.732657432556152,
94
+ "learning_rate": 0.0002281914893617021,
95
+ "loss": 0.644,
96
+ "step": 900
97
+ },
98
+ {
99
+ "epoch": 2.6595744680851063,
100
+ "grad_norm": 5.618279933929443,
101
+ "learning_rate": 0.00022021276595744679,
102
+ "loss": 0.6098,
103
+ "step": 1000
104
+ },
105
+ {
106
+ "epoch": 2.925531914893617,
107
+ "grad_norm": 4.702358245849609,
108
+ "learning_rate": 0.0002122340425531915,
109
+ "loss": 0.5934,
110
+ "step": 1100
111
+ },
112
+ {
113
+ "epoch": 3.0,
114
+ "eval_accuracy": 0.6826666666666666,
115
+ "eval_f1": 0.6716423210919009,
116
+ "eval_loss": 0.6901677250862122,
117
+ "eval_precision": 0.680882951010493,
118
+ "eval_recall": 0.6834541816726691,
119
+ "eval_runtime": 35.1911,
120
+ "eval_samples_per_second": 21.312,
121
+ "eval_steps_per_second": 2.671,
122
+ "step": 1128
123
+ },
124
+ {
125
+ "epoch": 3.1914893617021276,
126
+ "grad_norm": 4.94950008392334,
127
+ "learning_rate": 0.00020425531914893615,
128
+ "loss": 0.4998,
129
+ "step": 1200
130
+ },
131
+ {
132
+ "epoch": 3.4574468085106385,
133
+ "grad_norm": 4.887601375579834,
134
+ "learning_rate": 0.00019627659574468083,
135
+ "loss": 0.5384,
136
+ "step": 1300
137
+ },
138
+ {
139
+ "epoch": 3.723404255319149,
140
+ "grad_norm": 5.762096881866455,
141
+ "learning_rate": 0.0001882978723404255,
142
+ "loss": 0.5035,
143
+ "step": 1400
144
+ },
145
+ {
146
+ "epoch": 3.9893617021276597,
147
+ "grad_norm": 5.000032424926758,
148
+ "learning_rate": 0.0001803191489361702,
149
+ "loss": 0.4943,
150
+ "step": 1500
151
+ },
152
+ {
153
+ "epoch": 4.0,
154
+ "eval_accuracy": 0.68,
155
+ "eval_f1": 0.6607042226524314,
156
+ "eval_loss": 0.7506471872329712,
157
+ "eval_precision": 0.6727589541740554,
158
+ "eval_recall": 0.6823892223556088,
159
+ "eval_runtime": 34.4906,
160
+ "eval_samples_per_second": 21.745,
161
+ "eval_steps_per_second": 2.725,
162
+ "step": 1504
163
+ },
164
+ {
165
+ "epoch": 4.25531914893617,
166
+ "grad_norm": 7.737178325653076,
167
+ "learning_rate": 0.0001723404255319149,
168
+ "loss": 0.3581,
169
+ "step": 1600
170
+ },
171
+ {
172
+ "epoch": 4.5212765957446805,
173
+ "grad_norm": 5.353149890899658,
174
+ "learning_rate": 0.00016436170212765956,
175
+ "loss": 0.3857,
176
+ "step": 1700
177
+ },
178
+ {
179
+ "epoch": 4.787234042553192,
180
+ "grad_norm": 5.475671768188477,
181
+ "learning_rate": 0.00015638297872340426,
182
+ "loss": 0.3625,
183
+ "step": 1800
184
+ },
185
+ {
186
+ "epoch": 5.0,
187
+ "eval_accuracy": 0.6693333333333333,
188
+ "eval_f1": 0.6583454266829142,
189
+ "eval_loss": 0.9006826281547546,
190
+ "eval_precision": 0.6655033443649184,
191
+ "eval_recall": 0.6717070828331333,
192
+ "eval_runtime": 34.3021,
193
+ "eval_samples_per_second": 21.865,
194
+ "eval_steps_per_second": 2.74,
195
+ "step": 1880
196
+ },
197
+ {
198
+ "epoch": 5.053191489361702,
199
+ "grad_norm": 8.006985664367676,
200
+ "learning_rate": 0.00014840425531914892,
201
+ "loss": 0.3767,
202
+ "step": 1900
203
+ },
204
+ {
205
+ "epoch": 5.319148936170213,
206
+ "grad_norm": 8.51475715637207,
207
+ "learning_rate": 0.0001404255319148936,
208
+ "loss": 0.2824,
209
+ "step": 2000
210
+ },
211
+ {
212
+ "epoch": 5.585106382978723,
213
+ "grad_norm": 8.186362266540527,
214
+ "learning_rate": 0.00013244680851063828,
215
+ "loss": 0.2434,
216
+ "step": 2100
217
+ },
218
+ {
219
+ "epoch": 5.851063829787234,
220
+ "grad_norm": 6.735799312591553,
221
+ "learning_rate": 0.00012446808510638296,
222
+ "loss": 0.2717,
223
+ "step": 2200
224
+ },
225
+ {
226
+ "epoch": 6.0,
227
+ "eval_accuracy": 0.6586666666666666,
228
+ "eval_f1": 0.6614425775783358,
229
+ "eval_loss": 1.0333963632583618,
230
+ "eval_precision": 0.6652982915561002,
231
+ "eval_recall": 0.6589942643724156,
232
+ "eval_runtime": 34.3254,
233
+ "eval_samples_per_second": 21.85,
234
+ "eval_steps_per_second": 2.738,
235
+ "step": 2256
236
+ },
237
+ {
238
+ "epoch": 6.117021276595745,
239
+ "grad_norm": 6.987758636474609,
240
+ "learning_rate": 0.00011648936170212764,
241
+ "loss": 0.2191,
242
+ "step": 2300
243
+ },
244
+ {
245
+ "epoch": 6.382978723404255,
246
+ "grad_norm": 8.753083229064941,
247
+ "learning_rate": 0.00010851063829787234,
248
+ "loss": 0.1532,
249
+ "step": 2400
250
+ },
251
+ {
252
+ "epoch": 6.648936170212766,
253
+ "grad_norm": 6.87877082824707,
254
+ "learning_rate": 0.00010053191489361702,
255
+ "loss": 0.172,
256
+ "step": 2500
257
+ },
258
+ {
259
+ "epoch": 6.914893617021277,
260
+ "grad_norm": 8.248693466186523,
261
+ "learning_rate": 9.25531914893617e-05,
262
+ "loss": 0.188,
263
+ "step": 2600
264
+ },
265
+ {
266
+ "epoch": 7.0,
267
+ "eval_accuracy": 0.648,
268
+ "eval_f1": 0.6480604224938936,
269
+ "eval_loss": 1.3646624088287354,
270
+ "eval_precision": 0.6474310419368785,
271
+ "eval_recall": 0.6488264639189009,
272
+ "eval_runtime": 34.2754,
273
+ "eval_samples_per_second": 21.882,
274
+ "eval_steps_per_second": 2.742,
275
+ "step": 2632
276
+ },
277
+ {
278
+ "epoch": 7.180851063829787,
279
+ "grad_norm": 9.565648078918457,
280
+ "learning_rate": 8.457446808510637e-05,
281
+ "loss": 0.1344,
282
+ "step": 2700
283
+ },
284
+ {
285
+ "epoch": 7.446808510638298,
286
+ "grad_norm": 10.819112777709961,
287
+ "learning_rate": 7.659574468085105e-05,
288
+ "loss": 0.0791,
289
+ "step": 2800
290
+ },
291
+ {
292
+ "epoch": 7.712765957446808,
293
+ "grad_norm": 11.498022079467773,
294
+ "learning_rate": 6.861702127659574e-05,
295
+ "loss": 0.0917,
296
+ "step": 2900
297
+ },
298
+ {
299
+ "epoch": 7.9787234042553195,
300
+ "grad_norm": 10.500368118286133,
301
+ "learning_rate": 6.063829787234042e-05,
302
+ "loss": 0.0966,
303
+ "step": 3000
304
+ },
305
+ {
306
+ "epoch": 8.0,
307
+ "eval_accuracy": 0.6506666666666666,
308
+ "eval_f1": 0.6510622640996298,
309
+ "eval_loss": 1.7121126651763916,
310
+ "eval_precision": 0.6518027984453844,
311
+ "eval_recall": 0.6519786581299186,
312
+ "eval_runtime": 34.2979,
313
+ "eval_samples_per_second": 21.867,
314
+ "eval_steps_per_second": 2.741,
315
+ "step": 3008
316
+ },
317
+ {
318
+ "epoch": 8.24468085106383,
319
+ "grad_norm": 10.034985542297363,
320
+ "learning_rate": 5.26595744680851e-05,
321
+ "loss": 0.0589,
322
+ "step": 3100
323
+ },
324
+ {
325
+ "epoch": 8.51063829787234,
326
+ "grad_norm": 8.84762954711914,
327
+ "learning_rate": 4.468085106382978e-05,
328
+ "loss": 0.0444,
329
+ "step": 3200
330
+ },
331
+ {
332
+ "epoch": 8.77659574468085,
333
+ "grad_norm": 8.717729568481445,
334
+ "learning_rate": 3.670212765957446e-05,
335
+ "loss": 0.0492,
336
+ "step": 3300
337
+ },
338
+ {
339
+ "epoch": 9.0,
340
+ "eval_accuracy": 0.664,
341
+ "eval_f1": 0.6634717422597086,
342
+ "eval_loss": 2.092434883117676,
343
+ "eval_precision": 0.6624493384142015,
344
+ "eval_recall": 0.6648558089902628,
345
+ "eval_runtime": 34.1372,
346
+ "eval_samples_per_second": 21.97,
347
+ "eval_steps_per_second": 2.754,
348
+ "step": 3384
349
+ },
350
+ {
351
+ "epoch": 9.042553191489361,
352
+ "grad_norm": 10.311498641967773,
353
+ "learning_rate": 2.8723404255319147e-05,
354
+ "loss": 0.0461,
355
+ "step": 3400
356
+ },
357
+ {
358
+ "epoch": 9.308510638297872,
359
+ "grad_norm": 9.477750778198242,
360
+ "learning_rate": 2.0744680851063828e-05,
361
+ "loss": 0.0215,
362
+ "step": 3500
363
+ },
364
+ {
365
+ "epoch": 9.574468085106384,
366
+ "grad_norm": 10.069836616516113,
367
+ "learning_rate": 1.276595744680851e-05,
368
+ "loss": 0.0275,
369
+ "step": 3600
370
+ },
371
+ {
372
+ "epoch": 9.840425531914894,
373
+ "grad_norm": 9.309320449829102,
374
+ "learning_rate": 4.7872340425531906e-06,
375
+ "loss": 0.0198,
376
+ "step": 3700
377
+ },
378
+ {
379
+ "epoch": 10.0,
380
+ "eval_accuracy": 0.6733333333333333,
381
+ "eval_f1": 0.671461456478878,
382
+ "eval_loss": 2.349191427230835,
383
+ "eval_precision": 0.6697804784739011,
384
+ "eval_recall": 0.6742686407896491,
385
+ "eval_runtime": 34.2831,
386
+ "eval_samples_per_second": 21.877,
387
+ "eval_steps_per_second": 2.742,
388
+ "step": 3760
389
+ },
390
+ {
391
+ "epoch": 10.106382978723405,
392
+ "grad_norm": 4.254685878753662,
393
+ "learning_rate": 0.00014840425531914892,
394
+ "loss": 0.1783,
395
+ "step": 3800
396
+ },
397
+ {
398
+ "epoch": 10.372340425531915,
399
+ "grad_norm": 6.5849409103393555,
400
+ "learning_rate": 0.00014441489361702127,
401
+ "loss": 0.4254,
402
+ "step": 3900
403
+ },
404
+ {
405
+ "epoch": 10.638297872340425,
406
+ "grad_norm": 6.008872985839844,
407
+ "learning_rate": 0.0001404255319148936,
408
+ "loss": 0.3892,
409
+ "step": 4000
410
+ },
411
+ {
412
+ "epoch": 10.904255319148936,
413
+ "grad_norm": 4.886096000671387,
414
+ "learning_rate": 0.00013643617021276593,
415
+ "loss": 0.3308,
416
+ "step": 4100
417
+ },
418
+ {
419
+ "epoch": 11.0,
420
+ "eval_accuracy": 0.6413333333333333,
421
+ "eval_f1": 0.6379586335182581,
422
+ "eval_loss": 1.2812837362289429,
423
+ "eval_precision": 0.6360221570265966,
424
+ "eval_recall": 0.6426586634653862,
425
+ "eval_runtime": 34.4135,
426
+ "eval_samples_per_second": 21.794,
427
+ "eval_steps_per_second": 2.731,
428
+ "step": 4136
429
+ },
430
+ {
431
+ "epoch": 11.170212765957446,
432
+ "grad_norm": 8.697183609008789,
433
+ "learning_rate": 0.00013244680851063828,
434
+ "loss": 0.3418,
435
+ "step": 4200
436
+ },
437
+ {
438
+ "epoch": 11.436170212765958,
439
+ "grad_norm": 1.831715703010559,
440
+ "learning_rate": 0.00012845744680851063,
441
+ "loss": 0.2892,
442
+ "step": 4300
443
+ },
444
+ {
445
+ "epoch": 11.702127659574469,
446
+ "grad_norm": 5.119130611419678,
447
+ "learning_rate": 0.00012446808510638296,
448
+ "loss": 0.2597,
449
+ "step": 4400
450
+ },
451
+ {
452
+ "epoch": 11.96808510638298,
453
+ "grad_norm": 4.450387001037598,
454
+ "learning_rate": 0.00012047872340425532,
455
+ "loss": 0.2468,
456
+ "step": 4500
457
+ },
458
+ {
459
+ "epoch": 12.0,
460
+ "eval_accuracy": 0.6693333333333333,
461
+ "eval_f1": 0.6644005202701936,
462
+ "eval_loss": 1.343381404876709,
463
+ "eval_precision": 0.6702673209494008,
464
+ "eval_recall": 0.6696177137521676,
465
+ "eval_runtime": 34.1977,
466
+ "eval_samples_per_second": 21.931,
467
+ "eval_steps_per_second": 2.749,
468
+ "step": 4512
469
+ },
470
+ {
471
+ "epoch": 12.23404255319149,
472
+ "grad_norm": 0.5507918000221252,
473
+ "learning_rate": 0.00011648936170212764,
474
+ "loss": 0.161,
475
+ "step": 4600
476
+ },
477
+ {
478
+ "epoch": 12.5,
479
+ "grad_norm": 3.424229621887207,
480
+ "learning_rate": 0.0001125,
481
+ "loss": 0.1698,
482
+ "step": 4700
483
+ },
484
+ {
485
+ "epoch": 12.76595744680851,
486
+ "grad_norm": 10.304464340209961,
487
+ "learning_rate": 0.00010851063829787234,
488
+ "loss": 0.1992,
489
+ "step": 4800
490
+ },
491
+ {
492
+ "epoch": 13.0,
493
+ "eval_accuracy": 0.6773333333333333,
494
+ "eval_f1": 0.6757724702487522,
495
+ "eval_loss": 1.5172154903411865,
496
+ "eval_precision": 0.6749943823114556,
497
+ "eval_recall": 0.6780813658796853,
498
+ "eval_runtime": 34.5579,
499
+ "eval_samples_per_second": 21.703,
500
+ "eval_steps_per_second": 2.72,
501
+ "step": 4888
502
+ },
503
+ {
504
+ "epoch": 13.03191489361702,
505
+ "grad_norm": 0.58838951587677,
506
+ "learning_rate": 0.00010452127659574466,
507
+ "loss": 0.153,
508
+ "step": 4900
509
+ },
510
+ {
511
+ "epoch": 13.297872340425531,
512
+ "grad_norm": 4.339015007019043,
513
+ "learning_rate": 0.00010053191489361702,
514
+ "loss": 0.0729,
515
+ "step": 5000
516
+ },
517
+ {
518
+ "epoch": 13.563829787234042,
519
+ "grad_norm": 5.353603839874268,
520
+ "learning_rate": 9.654255319148935e-05,
521
+ "loss": 0.1182,
522
+ "step": 5100
523
+ },
524
+ {
525
+ "epoch": 13.829787234042554,
526
+ "grad_norm": 0.11711510270833969,
527
+ "learning_rate": 9.25531914893617e-05,
528
+ "loss": 0.1391,
529
+ "step": 5200
530
+ },
531
+ {
532
+ "epoch": 14.0,
533
+ "eval_accuracy": 0.688,
534
+ "eval_f1": 0.6861192542213884,
535
+ "eval_loss": 1.8793449401855469,
536
+ "eval_precision": 0.6887025095519409,
537
+ "eval_recall": 0.6882689075630252,
538
+ "eval_runtime": 35.0719,
539
+ "eval_samples_per_second": 21.385,
540
+ "eval_steps_per_second": 2.68,
541
+ "step": 5264
542
+ },
543
+ {
544
+ "epoch": 14.095744680851064,
545
+ "grad_norm": 16.545928955078125,
546
+ "learning_rate": 8.856382978723404e-05,
547
+ "loss": 0.0919,
548
+ "step": 5300
549
+ },
550
+ {
551
+ "epoch": 14.361702127659575,
552
+ "grad_norm": 0.11285369098186493,
553
+ "learning_rate": 8.457446808510637e-05,
554
+ "loss": 0.0724,
555
+ "step": 5400
556
+ },
557
+ {
558
+ "epoch": 14.627659574468085,
559
+ "grad_norm": 1.0866966247558594,
560
+ "learning_rate": 8.058510638297872e-05,
561
+ "loss": 0.0615,
562
+ "step": 5500
563
+ },
564
+ {
565
+ "epoch": 14.893617021276595,
566
+ "grad_norm": 0.0466451533138752,
567
+ "learning_rate": 7.659574468085105e-05,
568
+ "loss": 0.062,
569
+ "step": 5600
570
+ },
571
+ {
572
+ "epoch": 15.0,
573
+ "eval_accuracy": 0.6826666666666666,
574
+ "eval_f1": 0.6784899933134545,
575
+ "eval_loss": 2.1949727535247803,
576
+ "eval_precision": 0.6780378052909217,
577
+ "eval_recall": 0.6832503668133919,
578
+ "eval_runtime": 34.2882,
579
+ "eval_samples_per_second": 21.873,
580
+ "eval_steps_per_second": 2.741,
581
+ "step": 5640
582
+ },
583
+ {
584
+ "epoch": 15.159574468085106,
585
+ "grad_norm": 1.6346490383148193,
586
+ "learning_rate": 7.26063829787234e-05,
587
+ "loss": 0.0477,
588
+ "step": 5700
589
+ },
590
+ {
591
+ "epoch": 15.425531914893616,
592
+ "grad_norm": 0.004013681318610907,
593
+ "learning_rate": 6.861702127659574e-05,
594
+ "loss": 0.0484,
595
+ "step": 5800
596
+ },
597
+ {
598
+ "epoch": 15.691489361702128,
599
+ "grad_norm": 0.6463019847869873,
600
+ "learning_rate": 6.462765957446807e-05,
601
+ "loss": 0.0389,
602
+ "step": 5900
603
+ },
604
+ {
605
+ "epoch": 15.957446808510639,
606
+ "grad_norm": 0.4977071285247803,
607
+ "learning_rate": 6.063829787234042e-05,
608
+ "loss": 0.0392,
609
+ "step": 6000
610
+ },
611
+ {
612
+ "epoch": 16.0,
613
+ "eval_accuracy": 0.6626666666666666,
614
+ "eval_f1": 0.6599147266673199,
615
+ "eval_loss": 2.2853217124938965,
616
+ "eval_precision": 0.6593416101537274,
617
+ "eval_recall": 0.6640821661998133,
618
+ "eval_runtime": 34.2142,
619
+ "eval_samples_per_second": 21.921,
620
+ "eval_steps_per_second": 2.747,
621
+ "step": 6016
622
+ },
623
+ {
624
+ "epoch": 16.22340425531915,
625
+ "grad_norm": 0.18373289704322815,
626
+ "learning_rate": 5.6648936170212766e-05,
627
+ "loss": 0.035,
628
+ "step": 6100
629
+ },
630
+ {
631
+ "epoch": 16.48936170212766,
632
+ "grad_norm": 11.437871932983398,
633
+ "learning_rate": 5.26595744680851e-05,
634
+ "loss": 0.0323,
635
+ "step": 6200
636
+ },
637
+ {
638
+ "epoch": 16.75531914893617,
639
+ "grad_norm": 0.04929669201374054,
640
+ "learning_rate": 4.867021276595744e-05,
641
+ "loss": 0.0286,
642
+ "step": 6300
643
+ },
644
+ {
645
+ "epoch": 17.0,
646
+ "eval_accuracy": 0.6706666666666666,
647
+ "eval_f1": 0.6665467903773287,
648
+ "eval_loss": 2.356433153152466,
649
+ "eval_precision": 0.6646925224621034,
650
+ "eval_recall": 0.6718655462184874,
651
+ "eval_runtime": 34.2754,
652
+ "eval_samples_per_second": 21.882,
653
+ "eval_steps_per_second": 2.742,
654
+ "step": 6392
655
+ },
656
+ {
657
+ "epoch": 17.02127659574468,
658
+ "grad_norm": 0.003023180877789855,
659
+ "learning_rate": 4.468085106382978e-05,
660
+ "loss": 0.0232,
661
+ "step": 6400
662
+ },
663
+ {
664
+ "epoch": 17.28723404255319,
665
+ "grad_norm": 0.009346798993647099,
666
+ "learning_rate": 4.069148936170212e-05,
667
+ "loss": 0.0195,
668
+ "step": 6500
669
+ },
670
+ {
671
+ "epoch": 17.5531914893617,
672
+ "grad_norm": 0.0396982878446579,
673
+ "learning_rate": 3.670212765957446e-05,
674
+ "loss": 0.02,
675
+ "step": 6600
676
+ },
677
+ {
678
+ "epoch": 17.819148936170212,
679
+ "grad_norm": 0.014023613184690475,
680
+ "learning_rate": 3.271276595744681e-05,
681
+ "loss": 0.025,
682
+ "step": 6700
683
+ },
684
+ {
685
+ "epoch": 18.0,
686
+ "eval_accuracy": 0.676,
687
+ "eval_f1": 0.6721775597644245,
688
+ "eval_loss": 2.5857865810394287,
689
+ "eval_precision": 0.6713915090790397,
690
+ "eval_recall": 0.6767165532879819,
691
+ "eval_runtime": 34.2491,
692
+ "eval_samples_per_second": 21.898,
693
+ "eval_steps_per_second": 2.745,
694
+ "step": 6768
695
+ },
696
+ {
697
+ "epoch": 18.085106382978722,
698
+ "grad_norm": 0.47041425108909607,
699
+ "learning_rate": 2.8723404255319147e-05,
700
+ "loss": 0.0202,
701
+ "step": 6800
702
+ },
703
+ {
704
+ "epoch": 18.351063829787233,
705
+ "grad_norm": 0.06168466433882713,
706
+ "learning_rate": 2.4734042553191484e-05,
707
+ "loss": 0.019,
708
+ "step": 6900
709
+ },
710
+ {
711
+ "epoch": 18.617021276595743,
712
+ "grad_norm": 0.0014604219468310475,
713
+ "learning_rate": 2.0744680851063828e-05,
714
+ "loss": 0.0178,
715
+ "step": 7000
716
+ },
717
+ {
718
+ "epoch": 18.882978723404257,
719
+ "grad_norm": 0.0028070323169231415,
720
+ "learning_rate": 1.675531914893617e-05,
721
+ "loss": 0.0214,
722
+ "step": 7100
723
+ },
724
+ {
725
+ "epoch": 19.0,
726
+ "eval_accuracy": 0.668,
727
+ "eval_f1": 0.6658631291530153,
728
+ "eval_loss": 2.6788387298583984,
729
+ "eval_precision": 0.6642259414225942,
730
+ "eval_recall": 0.6688787515006003,
731
+ "eval_runtime": 34.249,
732
+ "eval_samples_per_second": 21.898,
733
+ "eval_steps_per_second": 2.745,
734
+ "step": 7144
735
+ },
736
+ {
737
+ "epoch": 19.148936170212767,
738
+ "grad_norm": 0.0015123536577448249,
739
+ "learning_rate": 1.276595744680851e-05,
740
+ "loss": 0.0204,
741
+ "step": 7200
742
+ },
743
+ {
744
+ "epoch": 19.414893617021278,
745
+ "grad_norm": 0.0018817168893292546,
746
+ "learning_rate": 8.77659574468085e-06,
747
+ "loss": 0.0141,
748
+ "step": 7300
749
+ },
750
+ {
751
+ "epoch": 19.680851063829788,
752
+ "grad_norm": 0.6008268594741821,
753
+ "learning_rate": 4.7872340425531906e-06,
754
+ "loss": 0.0203,
755
+ "step": 7400
756
+ },
757
+ {
758
+ "epoch": 19.9468085106383,
759
+ "grad_norm": 1.0317440032958984,
760
+ "learning_rate": 7.978723404255318e-07,
761
+ "loss": 0.0155,
762
+ "step": 7500
763
+ },
764
+ {
765
+ "epoch": 20.0,
766
+ "eval_accuracy": 0.672,
767
+ "eval_f1": 0.6695170440310667,
768
+ "eval_loss": 2.725527048110962,
769
+ "eval_precision": 0.66779120407287,
770
+ "eval_recall": 0.6728787515006003,
771
+ "eval_runtime": 34.5602,
772
+ "eval_samples_per_second": 21.701,
773
+ "eval_steps_per_second": 2.72,
774
+ "step": 7520
775
+ }
776
+ ],
777
+ "logging_steps": 100,
778
+ "max_steps": 7520,
779
+ "num_input_tokens_seen": 0,
780
+ "num_train_epochs": 20,
781
+ "save_steps": 500,
782
+ "stateful_callbacks": {
783
+ "TrainerControl": {
784
+ "args": {
785
+ "should_epoch_stop": false,
786
+ "should_evaluate": false,
787
+ "should_log": false,
788
+ "should_save": true,
789
+ "should_training_stop": true
790
+ },
791
+ "attributes": {}
792
+ }
793
+ },
794
+ "total_flos": 3.288864877974981e+19,
795
+ "train_batch_size": 16,
796
+ "trial_name": null,
797
+ "trial_params": null
798
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6161a2d7c96f418e5516fcd350adef0b7734e918251a4c3ab6e45395587780
3
+ size 5240