ngwgsang commited on
Commit
3c96319
·
verified ·
1 Parent(s): 8110179

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8d1313f274a21d0d8d09471175a9bd62427b83ff986e4d53c339e1e0ed1ebe4
3
  size 1583480280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac48ef4d9be7da10a2675530a9c29dbe744b7066875fd8093f8f3925161228f
3
  size 1583480280
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7103c19a3eeec8d178df723dd7add3a0d754ad342696f8bf8e3378c59f3b75
3
  size 3166950939
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50f03128116c85327f4a92b9502e6d5f025415604cca17d5c76bb781bdb19268
3
  size 3166950939
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef7221890a7134ea2f5d764d71e45a50a5a07c68ebdc6724cddfcf18524c9c50
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:101b85eefdb57b4174f8e56178ff56799122c7abb93c7765f3d596841254b287
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbaf1fe07c08cae80261838683ae5bba41ac45905af686e74e6d05f19f13430d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e60debad2e0820d2d64238ae4a209b8d40fdcf351db5f0394509952de535c45
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.11589660495519638,
3
  "best_model_checkpoint": "./vit5_qqp/checkpoint-6570",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 6570,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -114,6 +114,48 @@
114
  "eval_samples_per_second": 73.171,
115
  "eval_steps_per_second": 2.035,
116
  "step": 6570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
  ],
119
  "logging_steps": 500,
@@ -128,12 +170,12 @@
128
  "should_evaluate": false,
129
  "should_log": false,
130
  "should_save": true,
131
- "should_training_stop": false
132
  },
133
  "attributes": {}
134
  }
135
  },
136
- "total_flos": 5.00420738678784e+16,
137
  "train_batch_size": 36,
138
  "trial_name": null,
139
  "trial_params": null
 
1
  {
2
  "best_metric": 0.11589660495519638,
3
  "best_model_checkpoint": "./vit5_qqp/checkpoint-6570",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 9855,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
114
  "eval_samples_per_second": 73.171,
115
  "eval_steps_per_second": 2.035,
116
  "step": 6570
117
+ },
118
+ {
119
+ "epoch": 2.13089802130898,
120
+ "grad_norm": 0.3453280031681061,
121
+ "learning_rate": 1.4485032978183663e-05,
122
+ "loss": 0.101,
123
+ "step": 7000
124
+ },
125
+ {
126
+ "epoch": 2.2831050228310503,
127
+ "grad_norm": 0.3175284266471863,
128
+ "learning_rate": 1.1948249619482495e-05,
129
+ "loss": 0.097,
130
+ "step": 7500
131
+ },
132
+ {
133
+ "epoch": 2.43531202435312,
134
+ "grad_norm": 0.42786237597465515,
135
+ "learning_rate": 9.411466260781329e-06,
136
+ "loss": 0.0959,
137
+ "step": 8000
138
+ },
139
+ {
140
+ "epoch": 2.5875190258751903,
141
+ "grad_norm": 0.4301067590713501,
142
+ "learning_rate": 6.874682902080162e-06,
143
+ "loss": 0.095,
144
+ "step": 8500
145
+ },
146
+ {
147
+ "epoch": 2.73972602739726,
148
+ "grad_norm": 0.3909034729003906,
149
+ "learning_rate": 4.337899543378996e-06,
150
+ "loss": 0.0958,
151
+ "step": 9000
152
+ },
153
+ {
154
+ "epoch": 2.8919330289193304,
155
+ "grad_norm": 0.38906097412109375,
156
+ "learning_rate": 1.8011161846778284e-06,
157
+ "loss": 0.0939,
158
+ "step": 9500
159
  }
160
  ],
161
  "logging_steps": 500,
 
170
  "should_evaluate": false,
171
  "should_log": false,
172
  "should_save": true,
173
+ "should_training_stop": true
174
  },
175
  "attributes": {}
176
  }
177
  },
178
+ "total_flos": 7.50631108018176e+16,
179
  "train_batch_size": 36,
180
  "trial_name": null,
181
  "trial_params": null