brigettesegovia commited on
Commit
d4ff2af
·
verified ·
1 Parent(s): 73f7cd1

Training in progress, step 100

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_accuracy": 0.9473684210526315,
4
- "eval_loss": 0.10724472999572754,
5
  "eval_model_preparation_time": 0.0027,
6
- "eval_runtime": 4.3982,
7
- "eval_samples_per_second": 30.24,
8
- "eval_steps_per_second": 3.865,
9
- "total_flos": 1.0280724304719053e+17,
10
- "train_loss": 0.19260400934861258,
11
- "train_runtime": 411.1303,
12
- "train_samples_per_second": 10.06,
13
- "train_steps_per_second": 0.632
14
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9774436090225563,
4
+ "eval_loss": 0.10054443776607513,
5
  "eval_model_preparation_time": 0.0027,
6
+ "eval_runtime": 4.4521,
7
+ "eval_samples_per_second": 29.874,
8
+ "eval_steps_per_second": 3.818,
9
+ "total_flos": 5.140362152359526e+16,
10
+ "train_loss": 0.4239132670255808,
11
+ "train_runtime": 196.2668,
12
+ "train_samples_per_second": 10.537,
13
+ "train_steps_per_second": 0.662
14
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_accuracy": 0.9473684210526315,
4
- "eval_loss": 0.10724472999572754,
5
- "eval_runtime": 4.3982,
6
- "eval_samples_per_second": 30.24,
7
- "eval_steps_per_second": 3.865
8
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9774436090225563,
4
+ "eval_loss": 0.10054443776607513,
5
+ "eval_runtime": 4.4521,
6
+ "eval_samples_per_second": 29.874,
7
+ "eval_steps_per_second": 3.818
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a99098aa47336f9980bbdd2fee4d8adfdd583275ee12efe6dbb383b0f05ccd
3
  size 110345908
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4399eee272a4751bd7bc0cc01a962610eacb47f7ec0d99e454f020ef7b40927
3
  size 110345908
runs/Dec04_11-46-10_l-hvjvvxgp4c.local/events.out.tfevents.1733330976.l-hvjvvxgp4c.local.19687.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028453e2e525c84e0fe3aa46ccb2f0039d7c195780ded7fbf28c398697daee06
3
+ size 411
runs/Dec04_11-52-05_l-hvjvvxgp4c.local/events.out.tfevents.1733331126.l-hvjvvxgp4c.local.19687.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088c47d38e6fe65893d52248ea61e4008c259e1211028fd965f4b038cedb2f7f
3
+ size 7898
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "total_flos": 1.0280724304719053e+17,
4
- "train_loss": 0.19260400934861258,
5
- "train_runtime": 411.1303,
6
- "train_samples_per_second": 10.06,
7
- "train_steps_per_second": 0.632
8
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "total_flos": 5.140362152359526e+16,
4
+ "train_loss": 0.4239132670255808,
5
+ "train_runtime": 196.2668,
6
+ "train_samples_per_second": 10.537,
7
+ "train_steps_per_second": 0.662
8
  }
trainer_state.json CHANGED
@@ -1,227 +1,127 @@
1
  {
2
- "best_metric": 0.046604182571172714,
3
- "best_model_checkpoint": "plant_classification/checkpoint-200",
4
- "epoch": 4.0,
5
  "eval_steps": 100,
6
- "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.15384615384615385,
13
- "grad_norm": 0.22338886559009552,
14
- "learning_rate": 7.692307692307693e-05,
15
- "loss": 0.044,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.3076923076923077,
20
- "grad_norm": 20.6887149810791,
21
- "learning_rate": 0.00015384615384615385,
22
- "loss": 0.1327,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.46153846153846156,
27
- "grad_norm": 7.553480625152588,
28
- "learning_rate": 0.00019658119658119659,
29
- "loss": 0.3924,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.6153846153846154,
34
- "grad_norm": 44.926761627197266,
35
- "learning_rate": 0.00018803418803418803,
36
- "loss": 0.2107,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.7692307692307693,
41
- "grad_norm": 7.68371057510376,
42
- "learning_rate": 0.0001794871794871795,
43
- "loss": 0.2243,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.9230769230769231,
48
- "grad_norm": 14.441496849060059,
49
- "learning_rate": 0.00017094017094017094,
50
- "loss": 0.2507,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 1.0769230769230769,
55
- "grad_norm": 14.163393020629883,
56
- "learning_rate": 0.0001623931623931624,
57
- "loss": 0.3464,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 1.2307692307692308,
62
- "grad_norm": 0.6214488744735718,
63
- "learning_rate": 0.00015384615384615385,
64
- "loss": 0.2514,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 1.3846153846153846,
69
- "grad_norm": 15.280985832214355,
70
- "learning_rate": 0.0001452991452991453,
71
- "loss": 0.3392,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 1.5384615384615383,
76
- "grad_norm": 13.597776412963867,
77
- "learning_rate": 0.00013675213675213676,
78
- "loss": 0.3668,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 1.5384615384615383,
83
- "eval_accuracy": 0.924812030075188,
84
- "eval_loss": 0.249382883310318,
85
- "eval_runtime": 5.5701,
86
- "eval_samples_per_second": 23.877,
87
- "eval_steps_per_second": 3.052,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.6923076923076923,
92
- "grad_norm": 0.9829044342041016,
93
- "learning_rate": 0.00012820512820512823,
94
- "loss": 0.2045,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 1.8461538461538463,
99
- "grad_norm": 3.837667465209961,
100
- "learning_rate": 0.00011965811965811966,
101
- "loss": 0.3534,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 2.0,
106
- "grad_norm": 4.407589435577393,
107
- "learning_rate": 0.00011111111111111112,
108
- "loss": 0.0817,
109
- "step": 130
110
- },
111
- {
112
- "epoch": 2.1538461538461537,
113
- "grad_norm": 11.72974967956543,
114
- "learning_rate": 0.00010256410256410256,
115
- "loss": 0.1323,
116
- "step": 140
117
- },
118
- {
119
- "epoch": 2.3076923076923075,
120
- "grad_norm": 6.462613105773926,
121
- "learning_rate": 9.401709401709401e-05,
122
- "loss": 0.1889,
123
- "step": 150
124
- },
125
- {
126
- "epoch": 2.4615384615384617,
127
- "grad_norm": 0.08825964480638504,
128
- "learning_rate": 8.547008547008547e-05,
129
- "loss": 0.0902,
130
- "step": 160
131
- },
132
- {
133
- "epoch": 2.6153846153846154,
134
- "grad_norm": 21.940948486328125,
135
- "learning_rate": 7.692307692307693e-05,
136
- "loss": 0.1333,
137
- "step": 170
138
- },
139
- {
140
- "epoch": 2.769230769230769,
141
- "grad_norm": 20.016550064086914,
142
- "learning_rate": 6.837606837606838e-05,
143
- "loss": 0.2801,
144
- "step": 180
145
- },
146
- {
147
- "epoch": 2.9230769230769234,
148
- "grad_norm": 17.858781814575195,
149
- "learning_rate": 5.982905982905983e-05,
150
- "loss": 0.1068,
151
- "step": 190
152
- },
153
- {
154
- "epoch": 3.076923076923077,
155
- "grad_norm": 0.7944952845573425,
156
- "learning_rate": 5.128205128205128e-05,
157
- "loss": 0.1412,
158
- "step": 200
159
- },
160
- {
161
- "epoch": 3.076923076923077,
162
- "eval_accuracy": 0.9849624060150376,
163
- "eval_loss": 0.046604182571172714,
164
- "eval_runtime": 5.7449,
165
- "eval_samples_per_second": 23.151,
166
- "eval_steps_per_second": 2.959,
167
- "step": 200
168
- },
169
- {
170
- "epoch": 3.230769230769231,
171
- "grad_norm": 0.06452557444572449,
172
- "learning_rate": 4.2735042735042735e-05,
173
- "loss": 0.0656,
174
- "step": 210
175
- },
176
- {
177
- "epoch": 3.3846153846153846,
178
- "grad_norm": 7.774806499481201,
179
- "learning_rate": 3.418803418803419e-05,
180
- "loss": 0.1093,
181
- "step": 220
182
- },
183
- {
184
- "epoch": 3.5384615384615383,
185
- "grad_norm": 6.5291666984558105,
186
- "learning_rate": 2.564102564102564e-05,
187
- "loss": 0.109,
188
- "step": 230
189
- },
190
- {
191
- "epoch": 3.6923076923076925,
192
- "grad_norm": 10.082606315612793,
193
- "learning_rate": 1.7094017094017095e-05,
194
- "loss": 0.117,
195
- "step": 240
196
- },
197
- {
198
- "epoch": 3.8461538461538463,
199
- "grad_norm": 18.175029754638672,
200
- "learning_rate": 8.547008547008548e-06,
201
- "loss": 0.1722,
202
- "step": 250
203
- },
204
- {
205
- "epoch": 4.0,
206
- "grad_norm": 13.180197715759277,
207
  "learning_rate": 0.0,
208
- "loss": 0.1636,
209
- "step": 260
210
  },
211
  {
212
- "epoch": 4.0,
213
- "step": 260,
214
- "total_flos": 1.0280724304719053e+17,
215
- "train_loss": 0.19260400934861258,
216
- "train_runtime": 411.1303,
217
- "train_samples_per_second": 10.06,
218
- "train_steps_per_second": 0.632
219
  }
220
  ],
221
  "logging_steps": 10,
222
- "max_steps": 260,
223
  "num_input_tokens_seen": 0,
224
- "num_train_epochs": 4,
225
  "save_steps": 100,
226
  "stateful_callbacks": {
227
  "TrainerControl": {
@@ -235,7 +135,7 @@
235
  "attributes": {}
236
  }
237
  },
238
- "total_flos": 1.0280724304719053e+17,
239
  "train_batch_size": 16,
240
  "trial_name": null,
241
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.09293471276760101,
3
+ "best_model_checkpoint": "plant_classification/checkpoint-100",
4
+ "epoch": 2.0,
5
  "eval_steps": 100,
6
+ "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.15384615384615385,
13
+ "grad_norm": 6.1695780754089355,
14
+ "learning_rate": 1.5384615384615387e-05,
15
+ "loss": 1.1054,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.3076923076923077,
20
+ "grad_norm": 5.371831893920898,
21
+ "learning_rate": 1.8803418803418804e-05,
22
+ "loss": 0.9481,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.46153846153846156,
27
+ "grad_norm": 7.629484176635742,
28
+ "learning_rate": 1.7094017094017095e-05,
29
+ "loss": 0.738,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.6153846153846154,
34
+ "grad_norm": 11.11859130859375,
35
+ "learning_rate": 1.5384615384615387e-05,
36
+ "loss": 0.526,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.7692307692307693,
41
+ "grad_norm": 6.637124061584473,
42
+ "learning_rate": 1.3675213675213677e-05,
43
+ "loss": 0.3638,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.9230769230769231,
48
+ "grad_norm": 8.50536823272705,
49
+ "learning_rate": 1.1965811965811966e-05,
50
+ "loss": 0.3301,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 1.0769230769230769,
55
+ "grad_norm": 13.938777923583984,
56
+ "learning_rate": 1.0256410256410256e-05,
57
+ "loss": 0.2812,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 1.2307692307692308,
62
+ "grad_norm": 7.3434648513793945,
63
+ "learning_rate": 8.547008547008548e-06,
64
+ "loss": 0.2431,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 1.3846153846153846,
69
+ "grad_norm": 15.022695541381836,
70
+ "learning_rate": 6.837606837606839e-06,
71
+ "loss": 0.2091,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 1.5384615384615383,
76
+ "grad_norm": 9.071745872497559,
77
+ "learning_rate": 5.128205128205128e-06,
78
+ "loss": 0.192,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 1.5384615384615383,
83
+ "eval_accuracy": 0.9774436090225563,
84
+ "eval_loss": 0.09293471276760101,
85
+ "eval_runtime": 4.5986,
86
+ "eval_samples_per_second": 28.922,
87
+ "eval_steps_per_second": 3.697,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.6923076923076923,
92
+ "grad_norm": 2.740382432937622,
93
+ "learning_rate": 3.4188034188034193e-06,
94
+ "loss": 0.1832,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 1.8461538461538463,
99
+ "grad_norm": 6.225354194641113,
100
+ "learning_rate": 1.7094017094017097e-06,
101
+ "loss": 0.2292,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 2.0,
106
+ "grad_norm": 5.570672512054443,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "learning_rate": 0.0,
108
+ "loss": 0.1619,
109
+ "step": 130
110
  },
111
  {
112
+ "epoch": 2.0,
113
+ "step": 130,
114
+ "total_flos": 5.140362152359526e+16,
115
+ "train_loss": 0.4239132670255808,
116
+ "train_runtime": 196.2668,
117
+ "train_samples_per_second": 10.537,
118
+ "train_steps_per_second": 0.662
119
  }
120
  ],
121
  "logging_steps": 10,
122
+ "max_steps": 130,
123
  "num_input_tokens_seen": 0,
124
+ "num_train_epochs": 2,
125
  "save_steps": 100,
126
  "stateful_callbacks": {
127
  "TrainerControl": {
 
135
  "attributes": {}
136
  }
137
  },
138
+ "total_flos": 5.140362152359526e+16,
139
  "train_batch_size": 16,
140
  "trial_name": null,
141
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a647b258d44fa4b297d60d82aa5d3269ba7d12fb3709f3dca8db2a50c0089d1
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b44019e633b9c081c5b871a03caca471c7694ebad28fb64475d88584b51750a5
3
  size 5304