ngwgsang commited on
Commit
da8c523
·
verified ·
1 Parent(s): acc3fd5

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3ff95a4a44d74accc21047ab370d8017ebde971c734343bf11827f79f3d0aa1
3
  size 904009512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91660599b3077ad1745732e80389c0416616cc7517c50a554c407fe06bce1643
3
  size 904009512
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa5691e5de1c2ffdcdeb0af1aa03c70cf1086b75ffb011f0f320499c0c9373fd
3
  size 1808174394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:477890913dd59379fa51fdbcc844219c0cac53c13dc48295bb6501bcb465239b
3
  size 1808174394
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ec9c1445be0b55b92cf87051b6b197a5f45b675ec9c95cfe73b40b3a698350b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c65d4cf9d6a73374dc90d734d4cf0727b5471f1cc4e735d53b23807c304c22ad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:238a3d9d5d27b062089474e3b1f179ee6b549591914241d9518f7417141f6938
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a0213b99585fb590604f0f2d631130facda7981a07fff5f23620c8420e5cedf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.691973090171814,
3
- "best_model_checkpoint": "./vietquill/checkpoint-3125",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 3125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -57,6 +57,56 @@
57
  "eval_samples_per_second": 264.545,
58
  "eval_steps_per_second": 8.465,
59
  "step": 3125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  }
61
  ],
62
  "logging_steps": 500,
@@ -76,7 +126,7 @@
76
  "attributes": {}
77
  }
78
  },
79
- "total_flos": 5708980224000000.0,
80
  "train_batch_size": 32,
81
  "trial_name": null,
82
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6816325187683105,
3
+ "best_model_checkpoint": "./vietquill/checkpoint-6250",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 6250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
57
  "eval_samples_per_second": 264.545,
58
  "eval_steps_per_second": 8.465,
59
  "step": 3125
60
+ },
61
+ {
62
+ "epoch": 1.12,
63
+ "grad_norm": 1.739039659500122,
64
+ "learning_rate": 3.1333333333333334e-05,
65
+ "loss": 0.5487,
66
+ "step": 3500
67
+ },
68
+ {
69
+ "epoch": 1.28,
70
+ "grad_norm": 1.6791564226150513,
71
+ "learning_rate": 2.8666666666666668e-05,
72
+ "loss": 0.5273,
73
+ "step": 4000
74
+ },
75
+ {
76
+ "epoch": 1.44,
77
+ "grad_norm": 1.7030389308929443,
78
+ "learning_rate": 2.6000000000000002e-05,
79
+ "loss": 0.5231,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 1.6,
84
+ "grad_norm": 1.8268135786056519,
85
+ "learning_rate": 2.3333333333333336e-05,
86
+ "loss": 0.5201,
87
+ "step": 5000
88
+ },
89
+ {
90
+ "epoch": 1.76,
91
+ "grad_norm": 1.5327303409576416,
92
+ "learning_rate": 2.0666666666666666e-05,
93
+ "loss": 0.5208,
94
+ "step": 5500
95
+ },
96
+ {
97
+ "epoch": 1.92,
98
+ "grad_norm": 1.5141078233718872,
99
+ "learning_rate": 1.8e-05,
100
+ "loss": 0.5112,
101
+ "step": 6000
102
+ },
103
+ {
104
+ "epoch": 2.0,
105
+ "eval_loss": 0.6816325187683105,
106
+ "eval_runtime": 3.7827,
107
+ "eval_samples_per_second": 264.364,
108
+ "eval_steps_per_second": 8.46,
109
+ "step": 6250
110
  }
111
  ],
112
  "logging_steps": 500,
 
126
  "attributes": {}
127
  }
128
  },
129
+ "total_flos": 1.1417960448e+16,
130
  "train_batch_size": 32,
131
  "trial_name": null,
132
  "trial_params": null