rooftopcoder commited on
Commit
fabb564
1 Parent(s): 441a01f

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +23 -11
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.76,
3
- "train_loss": 0.0,
4
- "train_runtime": 33.2317,
5
  "train_samples": 156240,
6
- "train_samples_per_second": 14104.587,
7
- "train_steps_per_second": 110.226
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 5.942797746566532,
4
+ "train_runtime": 17726.9284,
5
  "train_samples": 156240,
6
+ "train_samples_per_second": 26.441,
7
+ "train_steps_per_second": 0.826
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.76,
3
- "train_loss": 0.0,
4
- "train_runtime": 33.2317,
5
  "train_samples": 156240,
6
- "train_samples_per_second": 14104.587,
7
- "train_steps_per_second": 110.226
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 5.942797746566532,
4
+ "train_runtime": 17726.9284,
5
  "train_samples": 156240,
6
+ "train_samples_per_second": 26.441,
7
+ "train_steps_per_second": 0.826
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.764693835756707,
5
- "global_step": 13500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -170,18 +170,30 @@
170
  "step": 13500
171
  },
172
  {
173
- "epoch": 2.76,
174
- "step": 13500,
175
- "total_flos": 8.465387806030234e+16,
176
- "train_loss": 0.0,
177
- "train_runtime": 33.2317,
178
- "train_samples_per_second": 14104.587,
179
- "train_steps_per_second": 110.226
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
  ],
182
- "max_steps": 3663,
183
  "num_train_epochs": 3,
184
- "total_flos": 8.465387806030234e+16,
185
  "trial_name": null,
186
  "trial_params": null
187
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 14649,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
170
  "step": 13500
171
  },
172
  {
173
+ "epoch": 2.87,
174
+ "learning_rate": 0.0,
175
+ "loss": 5.9514,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 2.97,
180
+ "learning_rate": 0.0,
181
+ "loss": 5.9511,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 3.0,
186
+ "step": 14649,
187
+ "total_flos": 9.185626186297344e+16,
188
+ "train_loss": 5.942797746566532,
189
+ "train_runtime": 17726.9284,
190
+ "train_samples_per_second": 26.441,
191
+ "train_steps_per_second": 0.826
192
  }
193
  ],
194
+ "max_steps": 14649,
195
  "num_train_epochs": 3,
196
+ "total_flos": 9.185626186297344e+16,
197
  "trial_name": null,
198
  "trial_params": null
199
  }