QyQy commited on
Commit
9d6bd42
·
1 Parent(s): c23663a

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +220 -0
trainer_state.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 9.025093793163277,
3
+ "best_model_checkpoint": "./envi_checkpoints/checkpoint-31250",
4
+ "epoch": 2.0,
5
+ "global_step": 31250,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 3.1808000000000005e-06,
13
+ "loss": 7.9834,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.13,
18
+ "learning_rate": 6.380800000000001e-06,
19
+ "loss": 5.5297,
20
+ "step": 2000
21
+ },
22
+ {
23
+ "epoch": 0.19,
24
+ "learning_rate": 9.580800000000002e-06,
25
+ "loss": 4.7262,
26
+ "step": 3000
27
+ },
28
+ {
29
+ "epoch": 0.26,
30
+ "learning_rate": 1.2780800000000001e-05,
31
+ "loss": 4.4162,
32
+ "step": 4000
33
+ },
34
+ {
35
+ "epoch": 0.32,
36
+ "learning_rate": 1.59808e-05,
37
+ "loss": 4.2072,
38
+ "step": 5000
39
+ },
40
+ {
41
+ "epoch": 0.38,
42
+ "learning_rate": 1.91808e-05,
43
+ "loss": 4.0103,
44
+ "step": 6000
45
+ },
46
+ {
47
+ "epoch": 0.45,
48
+ "learning_rate": 2.23808e-05,
49
+ "loss": 3.7644,
50
+ "step": 7000
51
+ },
52
+ {
53
+ "epoch": 0.51,
54
+ "learning_rate": 2.55808e-05,
55
+ "loss": 3.3987,
56
+ "step": 8000
57
+ },
58
+ {
59
+ "epoch": 0.58,
60
+ "learning_rate": 2.87808e-05,
61
+ "loss": 2.8234,
62
+ "step": 9000
63
+ },
64
+ {
65
+ "epoch": 0.64,
66
+ "learning_rate": 3.19808e-05,
67
+ "loss": 2.0638,
68
+ "step": 10000
69
+ },
70
+ {
71
+ "epoch": 0.7,
72
+ "learning_rate": 3.51808e-05,
73
+ "loss": 1.5503,
74
+ "step": 11000
75
+ },
76
+ {
77
+ "epoch": 0.77,
78
+ "learning_rate": 3.83776e-05,
79
+ "loss": 1.2126,
80
+ "step": 12000
81
+ },
82
+ {
83
+ "epoch": 0.83,
84
+ "learning_rate": 4.15776e-05,
85
+ "loss": 0.9281,
86
+ "step": 13000
87
+ },
88
+ {
89
+ "epoch": 0.9,
90
+ "learning_rate": 4.47776e-05,
91
+ "loss": 0.713,
92
+ "step": 14000
93
+ },
94
+ {
95
+ "epoch": 0.96,
96
+ "learning_rate": 4.79776e-05,
97
+ "loss": 0.6009,
98
+ "step": 15000
99
+ },
100
+ {
101
+ "epoch": 1.0,
102
+ "eval_bleu": 8.6159300527749,
103
+ "eval_loss": 0.45428285002708435,
104
+ "eval_runtime": 93.2143,
105
+ "eval_samples_per_second": 26.82,
106
+ "eval_steps_per_second": 0.848,
107
+ "step": 15625
108
+ },
109
+ {
110
+ "epoch": 1.02,
111
+ "learning_rate": 4.88224e-05,
112
+ "loss": 0.54,
113
+ "step": 16000
114
+ },
115
+ {
116
+ "epoch": 1.09,
117
+ "learning_rate": 4.56256e-05,
118
+ "loss": 0.5017,
119
+ "step": 17000
120
+ },
121
+ {
122
+ "epoch": 1.15,
123
+ "learning_rate": 4.24288e-05,
124
+ "loss": 0.4777,
125
+ "step": 18000
126
+ },
127
+ {
128
+ "epoch": 1.22,
129
+ "learning_rate": 3.9228800000000006e-05,
130
+ "loss": 0.4661,
131
+ "step": 19000
132
+ },
133
+ {
134
+ "epoch": 1.28,
135
+ "learning_rate": 3.60288e-05,
136
+ "loss": 0.4526,
137
+ "step": 20000
138
+ },
139
+ {
140
+ "epoch": 1.34,
141
+ "learning_rate": 3.2828800000000007e-05,
142
+ "loss": 0.4457,
143
+ "step": 21000
144
+ },
145
+ {
146
+ "epoch": 1.41,
147
+ "learning_rate": 2.96288e-05,
148
+ "loss": 0.4386,
149
+ "step": 22000
150
+ },
151
+ {
152
+ "epoch": 1.47,
153
+ "learning_rate": 2.6428800000000004e-05,
154
+ "loss": 0.4365,
155
+ "step": 23000
156
+ },
157
+ {
158
+ "epoch": 1.54,
159
+ "learning_rate": 2.32288e-05,
160
+ "loss": 0.4305,
161
+ "step": 24000
162
+ },
163
+ {
164
+ "epoch": 1.6,
165
+ "learning_rate": 2.0032e-05,
166
+ "loss": 0.4216,
167
+ "step": 25000
168
+ },
169
+ {
170
+ "epoch": 1.66,
171
+ "learning_rate": 1.6832e-05,
172
+ "loss": 0.4096,
173
+ "step": 26000
174
+ },
175
+ {
176
+ "epoch": 1.73,
177
+ "learning_rate": 1.3632000000000001e-05,
178
+ "loss": 0.4066,
179
+ "step": 27000
180
+ },
181
+ {
182
+ "epoch": 1.79,
183
+ "learning_rate": 1.0432e-05,
184
+ "loss": 0.4071,
185
+ "step": 28000
186
+ },
187
+ {
188
+ "epoch": 1.86,
189
+ "learning_rate": 7.2319999999999995e-06,
190
+ "loss": 0.4048,
191
+ "step": 29000
192
+ },
193
+ {
194
+ "epoch": 1.92,
195
+ "learning_rate": 4.0320000000000005e-06,
196
+ "loss": 0.3997,
197
+ "step": 30000
198
+ },
199
+ {
200
+ "epoch": 1.98,
201
+ "learning_rate": 8.352000000000001e-07,
202
+ "loss": 0.4015,
203
+ "step": 31000
204
+ },
205
+ {
206
+ "epoch": 2.0,
207
+ "eval_bleu": 9.025093793163277,
208
+ "eval_loss": 0.36966755986213684,
209
+ "eval_runtime": 94.07,
210
+ "eval_samples_per_second": 26.576,
211
+ "eval_steps_per_second": 0.84,
212
+ "step": 31250
213
+ }
214
+ ],
215
+ "max_steps": 31250,
216
+ "num_train_epochs": 2,
217
+ "total_flos": 1.505048664e+17,
218
+ "trial_name": null,
219
+ "trial_params": null
220
+ }