GlycerinLOL commited on
Commit
2b9ec1c
·
verified ·
1 Parent(s): 5e33f7f

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +7 -0
  2. train_results.json +7 -0
  3. trainer_state.json +202 -0
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 2.436967770862177,
4
+ "train_runtime": 10313.084,
5
+ "train_samples_per_second": 14.709,
6
+ "train_steps_per_second": 0.919
7
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 2.436967770862177,
4
+ "train_runtime": 10313.084,
5
+ "train_samples_per_second": 14.709,
6
+ "train_steps_per_second": 0.919
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.999156296140055,
5
+ "eval_steps": 500,
6
+ "global_step": 9480,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.21,
13
+ "learning_rate": 1.8955696202531647e-05,
14
+ "loss": 2.9241,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.42,
19
+ "learning_rate": 1.7900843881856543e-05,
20
+ "loss": 2.7783,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.63,
25
+ "learning_rate": 1.6845991561181435e-05,
26
+ "loss": 2.7342,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.84,
31
+ "learning_rate": 1.579113924050633e-05,
32
+ "loss": 2.6968,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 1.0,
37
+ "eval_f1": 0.8701,
38
+ "eval_gen_len": 19.4437588989084,
39
+ "eval_loss": 2.538452386856079,
40
+ "eval_precision": 0.8766,
41
+ "eval_recall": 0.8641,
42
+ "eval_rouge1": 0.2634,
43
+ "eval_rouge2": 0.0907,
44
+ "eval_rougeL": 0.218,
45
+ "eval_rougeLsum": 0.2182,
46
+ "eval_runtime": 339.9314,
47
+ "eval_samples_per_second": 6.198,
48
+ "eval_steps_per_second": 1.55,
49
+ "step": 2370
50
+ },
51
+ {
52
+ "epoch": 1.05,
53
+ "learning_rate": 1.4736286919831224e-05,
54
+ "loss": 2.6435,
55
+ "step": 2500
56
+ },
57
+ {
58
+ "epoch": 1.27,
59
+ "learning_rate": 1.368354430379747e-05,
60
+ "loss": 2.4844,
61
+ "step": 3000
62
+ },
63
+ {
64
+ "epoch": 1.48,
65
+ "learning_rate": 1.2628691983122363e-05,
66
+ "loss": 2.4626,
67
+ "step": 3500
68
+ },
69
+ {
70
+ "epoch": 1.69,
71
+ "learning_rate": 1.1573839662447259e-05,
72
+ "loss": 2.4676,
73
+ "step": 4000
74
+ },
75
+ {
76
+ "epoch": 1.9,
77
+ "learning_rate": 1.0523206751054854e-05,
78
+ "loss": 2.4746,
79
+ "step": 4500
80
+ },
81
+ {
82
+ "epoch": 2.0,
83
+ "eval_f1": 0.8712,
84
+ "eval_gen_len": 19.25723777883246,
85
+ "eval_loss": 2.507711410522461,
86
+ "eval_precision": 0.8774,
87
+ "eval_recall": 0.8655,
88
+ "eval_rouge1": 0.273,
89
+ "eval_rouge2": 0.0941,
90
+ "eval_rougeL": 0.2238,
91
+ "eval_rougeLsum": 0.2239,
92
+ "eval_runtime": 335.7013,
93
+ "eval_samples_per_second": 6.276,
94
+ "eval_steps_per_second": 1.57,
95
+ "step": 4741
96
+ },
97
+ {
98
+ "epoch": 2.11,
99
+ "learning_rate": 9.468354430379748e-06,
100
+ "loss": 2.3813,
101
+ "step": 5000
102
+ },
103
+ {
104
+ "epoch": 2.32,
105
+ "learning_rate": 8.415611814345991e-06,
106
+ "loss": 2.3173,
107
+ "step": 5500
108
+ },
109
+ {
110
+ "epoch": 2.53,
111
+ "learning_rate": 7.360759493670887e-06,
112
+ "loss": 2.3064,
113
+ "step": 6000
114
+ },
115
+ {
116
+ "epoch": 2.74,
117
+ "learning_rate": 6.305907172995781e-06,
118
+ "loss": 2.3008,
119
+ "step": 6500
120
+ },
121
+ {
122
+ "epoch": 2.95,
123
+ "learning_rate": 5.251054852320675e-06,
124
+ "loss": 2.3066,
125
+ "step": 7000
126
+ },
127
+ {
128
+ "epoch": 3.0,
129
+ "eval_f1": 0.8696,
130
+ "eval_gen_len": 19.307071665875654,
131
+ "eval_loss": 2.501197099685669,
132
+ "eval_precision": 0.8756,
133
+ "eval_recall": 0.864,
134
+ "eval_rouge1": 0.2671,
135
+ "eval_rouge2": 0.0936,
136
+ "eval_rougeL": 0.221,
137
+ "eval_rougeLsum": 0.2211,
138
+ "eval_runtime": 336.0052,
139
+ "eval_samples_per_second": 6.271,
140
+ "eval_steps_per_second": 1.568,
141
+ "step": 7111
142
+ },
143
+ {
144
+ "epoch": 3.16,
145
+ "learning_rate": 4.19620253164557e-06,
146
+ "loss": 2.2216,
147
+ "step": 7500
148
+ },
149
+ {
150
+ "epoch": 3.37,
151
+ "learning_rate": 3.141350210970465e-06,
152
+ "loss": 2.2071,
153
+ "step": 8000
154
+ },
155
+ {
156
+ "epoch": 3.59,
157
+ "learning_rate": 2.0864978902953587e-06,
158
+ "loss": 2.1852,
159
+ "step": 8500
160
+ },
161
+ {
162
+ "epoch": 3.8,
163
+ "learning_rate": 1.0316455696202532e-06,
164
+ "loss": 2.2041,
165
+ "step": 9000
166
+ },
167
+ {
168
+ "epoch": 4.0,
169
+ "eval_f1": 0.8705,
170
+ "eval_gen_len": 19.35548172757475,
171
+ "eval_loss": 2.503497838973999,
172
+ "eval_precision": 0.8768,
173
+ "eval_recall": 0.8648,
174
+ "eval_rouge1": 0.2709,
175
+ "eval_rouge2": 0.0948,
176
+ "eval_rougeL": 0.2244,
177
+ "eval_rougeLsum": 0.2244,
178
+ "eval_runtime": 336.6782,
179
+ "eval_samples_per_second": 6.258,
180
+ "eval_steps_per_second": 1.565,
181
+ "step": 9480
182
+ },
183
+ {
184
+ "epoch": 4.0,
185
+ "step": 9480,
186
+ "total_flos": 2.372598264346706e+17,
187
+ "train_loss": 2.436967770862177,
188
+ "train_runtime": 10313.084,
189
+ "train_samples_per_second": 14.709,
190
+ "train_steps_per_second": 0.919
191
+ }
192
+ ],
193
+ "logging_steps": 500,
194
+ "max_steps": 9480,
195
+ "num_input_tokens_seen": 0,
196
+ "num_train_epochs": 4,
197
+ "save_steps": 500,
198
+ "total_flos": 2.372598264346706e+17,
199
+ "train_batch_size": 4,
200
+ "trial_name": null,
201
+ "trial_params": null
202
+ }