gokulsrinivasagan commited on
Commit
c507b0e
·
verified ·
1 Parent(s): c53437f

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,28 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: gokulsrinivasagan/bert_base_lda_100_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - spearmanr
8
  model-index:
9
  - name: bert_base_lda_100_v1_stsb
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,12 +30,12 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # bert_base_lda_100_v1_stsb
17
 
18
- This model is a fine-tuned version of [gokulsrinivasagan/bert_base_lda_100_v1](https://huggingface.co/gokulsrinivasagan/bert_base_lda_100_v1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 2.0560
21
- - Pearson: 0.5304
22
- - Spearmanr: 0.5350
23
- - Combined Score: 0.5327
24
 
25
  ## Model description
26
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: gokulsrinivasagan/bert_base_lda_100_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - spearmanr
12
  model-index:
13
  - name: bert_base_lda_100_v1_stsb
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE STSB
20
+ type: glue
21
+ args: stsb
22
+ metrics:
23
+ - name: Spearmanr
24
+ type: spearmanr
25
+ value: 0.5325439607950028
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # bert_base_lda_100_v1_stsb
32
 
33
+ This model is a fine-tuned version of [gokulsrinivasagan/bert_base_lda_100_v1](https://huggingface.co/gokulsrinivasagan/bert_base_lda_100_v1) on the GLUE STSB dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 1.6844
36
+ - Pearson: 0.5330
37
+ - Spearmanr: 0.5325
38
+ - Combined Score: 0.5328
39
 
40
  ## Model description
41
 
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_combined_score": NaN,
4
- "eval_loss": 2.302783966064453,
5
- "eval_pearson": NaN,
6
- "eval_runtime": 0.9609,
7
  "eval_samples": 1500,
8
- "eval_samples_per_second": 1561.041,
9
- "eval_spearmanr": NaN,
10
- "eval_steps_per_second": 6.244,
11
- "total_flos": 1.134458907008256e+16,
12
- "train_loss": 2.4118328260338826,
13
- "train_runtime": 190.6822,
14
  "train_samples": 5749,
15
- "train_samples_per_second": 1507.482,
16
- "train_steps_per_second": 6.031
17
  }
 
1
  {
2
+ "epoch": 13.0,
3
+ "eval_combined_score": 0.5327909501566208,
4
+ "eval_loss": 1.6844276189804077,
5
+ "eval_pearson": 0.5330379395182387,
6
+ "eval_runtime": 0.9731,
7
  "eval_samples": 1500,
8
+ "eval_samples_per_second": 1541.421,
9
+ "eval_spearmanr": 0.5325439607950028,
10
+ "eval_steps_per_second": 6.166,
11
+ "total_flos": 9831977194071552.0,
12
+ "train_loss": 0.9727236291636592,
13
+ "train_runtime": 168.3036,
14
  "train_samples": 5749,
15
+ "train_samples_per_second": 1707.925,
16
+ "train_steps_per_second": 6.833
17
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_combined_score": NaN,
4
- "eval_loss": 2.302783966064453,
5
- "eval_pearson": NaN,
6
- "eval_runtime": 0.9609,
7
  "eval_samples": 1500,
8
- "eval_samples_per_second": 1561.041,
9
- "eval_spearmanr": NaN,
10
- "eval_steps_per_second": 6.244
11
  }
 
1
  {
2
+ "epoch": 13.0,
3
+ "eval_combined_score": 0.5327909501566208,
4
+ "eval_loss": 1.6844276189804077,
5
+ "eval_pearson": 0.5330379395182387,
6
+ "eval_runtime": 0.9731,
7
  "eval_samples": 1500,
8
+ "eval_samples_per_second": 1541.421,
9
+ "eval_spearmanr": 0.5325439607950028,
10
+ "eval_steps_per_second": 6.166
11
  }
logs/events.out.tfevents.1733317752.ki-g0008.1208741.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d5a617a16b003c6e7f2013bc393f53fe33d3f42738480ea713ff5b5fdf39db
3
+ size 521
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "total_flos": 1.134458907008256e+16,
4
- "train_loss": 2.4118328260338826,
5
- "train_runtime": 190.6822,
6
  "train_samples": 5749,
7
- "train_samples_per_second": 1507.482,
8
- "train_steps_per_second": 6.031
9
  }
 
1
  {
2
+ "epoch": 13.0,
3
+ "total_flos": 9831977194071552.0,
4
+ "train_loss": 0.9727236291636592,
5
+ "train_runtime": 168.3036,
6
  "train_samples": 5749,
7
+ "train_samples_per_second": 1707.925,
8
+ "train_steps_per_second": 6.833
9
  }
trainer_state.json CHANGED
@@ -1,291 +1,255 @@
1
  {
2
- "best_metric": 2.302783966064453,
3
- "best_model_checkpoint": "bert_base_lda_100_v1_stsb/checkpoint-230",
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 345,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.897597312927246,
14
- "learning_rate": 0.00098,
15
- "loss": 5.4876,
16
  "step": 23
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_combined_score": NaN,
21
- "eval_loss": 2.5971333980560303,
22
- "eval_pearson": NaN,
23
- "eval_runtime": 0.953,
24
- "eval_samples_per_second": 1574.03,
25
- "eval_spearmanr": NaN,
26
- "eval_steps_per_second": 6.296,
27
  "step": 23
28
  },
29
  {
30
  "epoch": 2.0,
31
- "grad_norm": 16.058734893798828,
32
- "learning_rate": 0.00096,
33
- "loss": 2.2047,
34
  "step": 46
35
  },
36
  {
37
  "epoch": 2.0,
38
- "eval_combined_score": NaN,
39
- "eval_loss": 2.3758695125579834,
40
- "eval_pearson": NaN,
41
- "eval_runtime": 1.0004,
42
- "eval_samples_per_second": 1499.351,
43
- "eval_spearmanr": NaN,
44
- "eval_steps_per_second": 5.997,
45
  "step": 46
46
  },
47
  {
48
  "epoch": 3.0,
49
- "grad_norm": 11.513495445251465,
50
- "learning_rate": 0.00094,
51
- "loss": 2.2017,
52
  "step": 69
53
  },
54
  {
55
  "epoch": 3.0,
56
- "eval_combined_score": NaN,
57
- "eval_loss": 2.4511916637420654,
58
- "eval_pearson": NaN,
59
- "eval_runtime": 0.9743,
60
- "eval_samples_per_second": 1539.528,
61
- "eval_spearmanr": NaN,
62
- "eval_steps_per_second": 6.158,
63
  "step": 69
64
  },
65
  {
66
  "epoch": 4.0,
67
- "grad_norm": 5.998810768127441,
68
- "learning_rate": 0.00092,
69
- "loss": 2.1807,
70
  "step": 92
71
  },
72
  {
73
  "epoch": 4.0,
74
- "eval_combined_score": NaN,
75
- "eval_loss": 2.4511916637420654,
76
- "eval_pearson": NaN,
77
- "eval_runtime": 0.9891,
78
- "eval_samples_per_second": 1516.545,
79
- "eval_spearmanr": NaN,
80
- "eval_steps_per_second": 6.066,
81
  "step": 92
82
  },
83
  {
84
  "epoch": 5.0,
85
- "grad_norm": 17.433738708496094,
86
- "learning_rate": 0.0009000000000000001,
87
- "loss": 2.1807,
88
  "step": 115
89
  },
90
  {
91
  "epoch": 5.0,
92
- "eval_combined_score": NaN,
93
- "eval_loss": 2.511171817779541,
94
- "eval_pearson": NaN,
95
- "eval_runtime": 0.9632,
96
- "eval_samples_per_second": 1557.271,
97
- "eval_spearmanr": NaN,
98
- "eval_steps_per_second": 6.229,
99
  "step": 115
100
  },
101
  {
102
  "epoch": 6.0,
103
- "grad_norm": 6.638427734375,
104
- "learning_rate": 0.00088,
105
- "loss": 2.196,
106
  "step": 138
107
  },
108
  {
109
  "epoch": 6.0,
110
- "eval_combined_score": NaN,
111
- "eval_loss": 2.3448002338409424,
112
- "eval_pearson": NaN,
113
- "eval_runtime": 0.9594,
114
- "eval_samples_per_second": 1563.421,
115
- "eval_spearmanr": NaN,
116
- "eval_steps_per_second": 6.254,
117
  "step": 138
118
  },
119
  {
120
  "epoch": 7.0,
121
- "grad_norm": 16.561189651489258,
122
- "learning_rate": 0.00086,
123
- "loss": 2.1902,
124
  "step": 161
125
  },
126
  {
127
  "epoch": 7.0,
128
- "eval_combined_score": NaN,
129
- "eval_loss": 2.716400623321533,
130
- "eval_pearson": NaN,
131
- "eval_runtime": 0.9593,
132
- "eval_samples_per_second": 1563.575,
133
- "eval_spearmanr": NaN,
134
- "eval_steps_per_second": 6.254,
135
  "step": 161
136
  },
137
  {
138
  "epoch": 8.0,
139
- "grad_norm": 4.18385124206543,
140
- "learning_rate": 0.00084,
141
- "loss": 2.1899,
142
  "step": 184
143
  },
144
  {
145
  "epoch": 8.0,
146
- "eval_combined_score": NaN,
147
- "eval_loss": 2.6349358558654785,
148
- "eval_pearson": NaN,
149
- "eval_runtime": 0.9637,
150
- "eval_samples_per_second": 1556.487,
151
- "eval_spearmanr": NaN,
152
- "eval_steps_per_second": 6.226,
153
  "step": 184
154
  },
155
  {
156
  "epoch": 9.0,
157
- "grad_norm": 17.808826446533203,
158
- "learning_rate": 0.00082,
159
- "loss": 2.1962,
160
  "step": 207
161
  },
162
  {
163
  "epoch": 9.0,
164
- "eval_combined_score": NaN,
165
- "eval_loss": 2.3354201316833496,
166
- "eval_pearson": NaN,
167
- "eval_runtime": 0.9676,
168
- "eval_samples_per_second": 1550.299,
169
- "eval_spearmanr": NaN,
170
- "eval_steps_per_second": 6.201,
171
  "step": 207
172
  },
173
  {
174
  "epoch": 10.0,
175
- "grad_norm": 16.991071701049805,
176
- "learning_rate": 0.0008,
177
- "loss": 2.1802,
178
  "step": 230
179
  },
180
  {
181
  "epoch": 10.0,
182
- "eval_combined_score": NaN,
183
- "eval_loss": 2.302783966064453,
184
- "eval_pearson": NaN,
185
- "eval_runtime": 0.9863,
186
- "eval_samples_per_second": 1520.902,
187
- "eval_spearmanr": NaN,
188
- "eval_steps_per_second": 6.084,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 11.0,
193
- "grad_norm": 17.86025619506836,
194
- "learning_rate": 0.0007800000000000001,
195
- "loss": 2.1945,
196
  "step": 253
197
  },
198
  {
199
  "epoch": 11.0,
200
- "eval_combined_score": NaN,
201
- "eval_loss": 2.716400623321533,
202
- "eval_pearson": NaN,
203
- "eval_runtime": 0.9587,
204
- "eval_samples_per_second": 1564.538,
205
- "eval_spearmanr": NaN,
206
- "eval_steps_per_second": 6.258,
207
  "step": 253
208
  },
209
  {
210
  "epoch": 12.0,
211
- "grad_norm": 19.099140167236328,
212
- "learning_rate": 0.00076,
213
- "loss": 2.1932,
214
  "step": 276
215
  },
216
  {
217
  "epoch": 12.0,
218
- "eval_combined_score": NaN,
219
- "eval_loss": 2.737987518310547,
220
- "eval_pearson": NaN,
221
- "eval_runtime": 0.9594,
222
- "eval_samples_per_second": 1563.435,
223
- "eval_spearmanr": NaN,
224
- "eval_steps_per_second": 6.254,
225
  "step": 276
226
  },
227
  {
228
  "epoch": 13.0,
229
- "grad_norm": 13.359597206115723,
230
- "learning_rate": 0.00074,
231
- "loss": 2.206,
232
  "step": 299
233
  },
234
  {
235
  "epoch": 13.0,
236
- "eval_combined_score": NaN,
237
- "eval_loss": 2.737987518310547,
238
- "eval_pearson": NaN,
239
- "eval_runtime": 0.9641,
240
- "eval_samples_per_second": 1555.805,
241
- "eval_spearmanr": NaN,
242
- "eval_steps_per_second": 6.223,
243
  "step": 299
244
  },
245
  {
246
- "epoch": 14.0,
247
- "grad_norm": 5.281756401062012,
248
- "learning_rate": 0.0007199999999999999,
249
- "loss": 2.1965,
250
- "step": 322
251
- },
252
- {
253
- "epoch": 14.0,
254
- "eval_combined_score": NaN,
255
- "eval_loss": 2.654569625854492,
256
- "eval_pearson": NaN,
257
- "eval_runtime": 0.9615,
258
- "eval_samples_per_second": 1560.105,
259
- "eval_spearmanr": NaN,
260
- "eval_steps_per_second": 6.24,
261
- "step": 322
262
- },
263
- {
264
- "epoch": 15.0,
265
- "grad_norm": 4.948043346405029,
266
- "learning_rate": 0.0007,
267
- "loss": 2.1794,
268
- "step": 345
269
- },
270
- {
271
- "epoch": 15.0,
272
- "eval_combined_score": NaN,
273
- "eval_loss": 2.4802048206329346,
274
- "eval_pearson": NaN,
275
- "eval_runtime": 0.9617,
276
- "eval_samples_per_second": 1559.791,
277
- "eval_spearmanr": NaN,
278
- "eval_steps_per_second": 6.239,
279
- "step": 345
280
- },
281
- {
282
- "epoch": 15.0,
283
- "step": 345,
284
- "total_flos": 1.134458907008256e+16,
285
- "train_loss": 2.4118328260338826,
286
- "train_runtime": 190.6822,
287
- "train_samples_per_second": 1507.482,
288
- "train_steps_per_second": 6.031
289
  }
290
  ],
291
  "logging_steps": 1,
@@ -314,7 +278,7 @@
314
  "attributes": {}
315
  }
316
  },
317
- "total_flos": 1.134458907008256e+16,
318
  "train_batch_size": 256,
319
  "trial_name": null,
320
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.6844276189804077,
3
+ "best_model_checkpoint": "bert_base_lda_100_v1_stsb/checkpoint-184",
4
+ "epoch": 13.0,
5
  "eval_steps": 500,
6
+ "global_step": 299,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 9.02434253692627,
14
+ "learning_rate": 4.9e-05,
15
+ "loss": 2.7331,
16
  "step": 23
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_combined_score": 0.07012747710946046,
21
+ "eval_loss": 2.6188812255859375,
22
+ "eval_pearson": 0.0642887160182263,
23
+ "eval_runtime": 0.9624,
24
+ "eval_samples_per_second": 1558.649,
25
+ "eval_spearmanr": 0.0759662382006946,
26
+ "eval_steps_per_second": 6.235,
27
  "step": 23
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 23.63865852355957,
32
+ "learning_rate": 4.8e-05,
33
+ "loss": 1.9804,
34
  "step": 46
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_combined_score": 0.27527671204636106,
39
+ "eval_loss": 2.0897152423858643,
40
+ "eval_pearson": 0.28175468669023457,
41
+ "eval_runtime": 0.9642,
42
+ "eval_samples_per_second": 1555.678,
43
+ "eval_spearmanr": 0.2687987374024875,
44
+ "eval_steps_per_second": 6.223,
45
  "step": 46
46
  },
47
  {
48
  "epoch": 3.0,
49
+ "grad_norm": 10.638742446899414,
50
+ "learning_rate": 4.7e-05,
51
+ "loss": 1.7486,
52
  "step": 69
53
  },
54
  {
55
  "epoch": 3.0,
56
+ "eval_combined_score": 0.41554924979667834,
57
+ "eval_loss": 1.9471008777618408,
58
+ "eval_pearson": 0.41582453640686257,
59
+ "eval_runtime": 0.9708,
60
+ "eval_samples_per_second": 1545.093,
61
+ "eval_spearmanr": 0.4152739631864941,
62
+ "eval_steps_per_second": 6.18,
63
  "step": 69
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "grad_norm": 16.40125846862793,
68
+ "learning_rate": 4.600000000000001e-05,
69
+ "loss": 1.2963,
70
  "step": 92
71
  },
72
  {
73
  "epoch": 4.0,
74
+ "eval_combined_score": 0.4597302763812634,
75
+ "eval_loss": 2.3058416843414307,
76
+ "eval_pearson": 0.4520132571391213,
77
+ "eval_runtime": 0.993,
78
+ "eval_samples_per_second": 1510.585,
79
+ "eval_spearmanr": 0.4674472956234055,
80
+ "eval_steps_per_second": 6.042,
81
  "step": 92
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "grad_norm": 42.770668029785156,
86
+ "learning_rate": 4.5e-05,
87
+ "loss": 1.0162,
88
  "step": 115
89
  },
90
  {
91
  "epoch": 5.0,
92
+ "eval_combined_score": 0.48877111938435025,
93
+ "eval_loss": 1.8441771268844604,
94
+ "eval_pearson": 0.4886953649858171,
95
+ "eval_runtime": 0.974,
96
+ "eval_samples_per_second": 1540.11,
97
+ "eval_spearmanr": 0.48884687378288344,
98
+ "eval_steps_per_second": 6.16,
99
  "step": 115
100
  },
101
  {
102
  "epoch": 6.0,
103
+ "grad_norm": 22.240951538085938,
104
+ "learning_rate": 4.4000000000000006e-05,
105
+ "loss": 0.8446,
106
  "step": 138
107
  },
108
  {
109
  "epoch": 6.0,
110
+ "eval_combined_score": 0.5259105826273307,
111
+ "eval_loss": 1.7663521766662598,
112
+ "eval_pearson": 0.5228342603176177,
113
+ "eval_runtime": 0.9691,
114
+ "eval_samples_per_second": 1547.89,
115
+ "eval_spearmanr": 0.5289869049370436,
116
+ "eval_steps_per_second": 6.192,
117
  "step": 138
118
  },
119
  {
120
  "epoch": 7.0,
121
+ "grad_norm": 9.541956901550293,
122
+ "learning_rate": 4.3e-05,
123
+ "loss": 0.6767,
124
  "step": 161
125
  },
126
  {
127
  "epoch": 7.0,
128
+ "eval_combined_score": 0.5168115431891785,
129
+ "eval_loss": 1.7573641538619995,
130
+ "eval_pearson": 0.5151591812766733,
131
+ "eval_runtime": 0.9781,
132
+ "eval_samples_per_second": 1533.662,
133
+ "eval_spearmanr": 0.5184639051016838,
134
+ "eval_steps_per_second": 6.135,
135
  "step": 161
136
  },
137
  {
138
  "epoch": 8.0,
139
+ "grad_norm": 18.553218841552734,
140
+ "learning_rate": 4.2e-05,
141
+ "loss": 0.5349,
142
  "step": 184
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "eval_combined_score": 0.5327909501566208,
147
+ "eval_loss": 1.6844276189804077,
148
+ "eval_pearson": 0.5330379395182387,
149
+ "eval_runtime": 0.967,
150
+ "eval_samples_per_second": 1551.234,
151
+ "eval_spearmanr": 0.5325439607950028,
152
+ "eval_steps_per_second": 6.205,
153
  "step": 184
154
  },
155
  {
156
  "epoch": 9.0,
157
+ "grad_norm": 8.788359642028809,
158
+ "learning_rate": 4.1e-05,
159
+ "loss": 0.4606,
160
  "step": 207
161
  },
162
  {
163
  "epoch": 9.0,
164
+ "eval_combined_score": 0.5061528931646921,
165
+ "eval_loss": 1.9861546754837036,
166
+ "eval_pearson": 0.503941115665909,
167
+ "eval_runtime": 0.9662,
168
+ "eval_samples_per_second": 1552.405,
169
+ "eval_spearmanr": 0.5083646706634753,
170
+ "eval_steps_per_second": 6.21,
171
  "step": 207
172
  },
173
  {
174
  "epoch": 10.0,
175
+ "grad_norm": 9.931353569030762,
176
+ "learning_rate": 4e-05,
177
+ "loss": 0.3951,
178
  "step": 230
179
  },
180
  {
181
  "epoch": 10.0,
182
+ "eval_combined_score": 0.5270205929961109,
183
+ "eval_loss": 1.8024095296859741,
184
+ "eval_pearson": 0.5265505998740807,
185
+ "eval_runtime": 0.9616,
186
+ "eval_samples_per_second": 1559.852,
187
+ "eval_spearmanr": 0.527490586118141,
188
+ "eval_steps_per_second": 6.239,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 11.0,
193
+ "grad_norm": 21.041873931884766,
194
+ "learning_rate": 3.9000000000000006e-05,
195
+ "loss": 0.3624,
196
  "step": 253
197
  },
198
  {
199
  "epoch": 11.0,
200
+ "eval_combined_score": 0.5382256315731109,
201
+ "eval_loss": 2.015653371810913,
202
+ "eval_pearson": 0.5341689579826407,
203
+ "eval_runtime": 0.9659,
204
+ "eval_samples_per_second": 1552.881,
205
+ "eval_spearmanr": 0.5422823051635811,
206
+ "eval_steps_per_second": 6.212,
207
  "step": 253
208
  },
209
  {
210
  "epoch": 12.0,
211
+ "grad_norm": 19.475910186767578,
212
+ "learning_rate": 3.8e-05,
213
+ "loss": 0.3087,
214
  "step": 276
215
  },
216
  {
217
  "epoch": 12.0,
218
+ "eval_combined_score": 0.5305901722528907,
219
+ "eval_loss": 2.409419059753418,
220
+ "eval_pearson": 0.5226677549863216,
221
+ "eval_runtime": 0.9641,
222
+ "eval_samples_per_second": 1555.776,
223
+ "eval_spearmanr": 0.5385125895194598,
224
+ "eval_steps_per_second": 6.223,
225
  "step": 276
226
  },
227
  {
228
  "epoch": 13.0,
229
+ "grad_norm": 15.23548412322998,
230
+ "learning_rate": 3.7e-05,
231
+ "loss": 0.2879,
232
  "step": 299
233
  },
234
  {
235
  "epoch": 13.0,
236
+ "eval_combined_score": 0.5327170657272342,
237
+ "eval_loss": 2.0560126304626465,
238
+ "eval_pearson": 0.5303940978069547,
239
+ "eval_runtime": 0.966,
240
+ "eval_samples_per_second": 1552.868,
241
+ "eval_spearmanr": 0.5350400336475136,
242
+ "eval_steps_per_second": 6.211,
243
  "step": 299
244
  },
245
  {
246
+ "epoch": 13.0,
247
+ "step": 299,
248
+ "total_flos": 9831977194071552.0,
249
+ "train_loss": 0.9727236291636592,
250
+ "train_runtime": 168.3036,
251
+ "train_samples_per_second": 1707.925,
252
+ "train_steps_per_second": 6.833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  }
254
  ],
255
  "logging_steps": 1,
 
278
  "attributes": {}
279
  }
280
  },
281
+ "total_flos": 9831977194071552.0,
282
  "train_batch_size": 256,
283
  "trial_name": null,
284
  "trial_params": null