gokulsrinivasagan commited on
Commit
d919325
·
verified ·
1 Parent(s): c372f46

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,28 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: gokulsrinivasagan/distilbert_lda_100_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - spearmanr
8
  model-index:
9
  - name: distilbert_lda_100_v1_stsb
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,12 +30,12 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # distilbert_lda_100_v1_stsb
17
 
18
- This model is a fine-tuned version of [gokulsrinivasagan/distilbert_lda_100_v1](https://huggingface.co/gokulsrinivasagan/distilbert_lda_100_v1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.1066
21
- - Pearson: 0.7473
22
- - Spearmanr: 0.7433
23
- - Combined Score: 0.7453
24
 
25
  ## Model description
26
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: gokulsrinivasagan/distilbert_lda_100_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - spearmanr
12
  model-index:
13
  - name: distilbert_lda_100_v1_stsb
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE STSB
20
+ type: glue
21
+ args: stsb
22
+ metrics:
23
+ - name: Spearmanr
24
+ type: spearmanr
25
+ value: 0.7703935491023064
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # distilbert_lda_100_v1_stsb
32
 
33
+ This model is a fine-tuned version of [gokulsrinivasagan/distilbert_lda_100_v1](https://huggingface.co/gokulsrinivasagan/distilbert_lda_100_v1) on the GLUE STSB dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.9153
36
+ - Pearson: 0.7758
37
+ - Spearmanr: 0.7704
38
+ - Combined Score: 0.7731
39
 
40
  ## Model description
41
 
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_combined_score": NaN,
4
- "eval_loss": 2.283433198928833,
5
- "eval_pearson": NaN,
6
- "eval_runtime": 0.6501,
7
  "eval_samples": 1500,
8
- "eval_samples_per_second": 2307.493,
9
- "eval_spearmanr": NaN,
10
- "eval_steps_per_second": 9.23,
11
- "total_flos": 3046165974478848.0,
12
- "train_loss": 2.5827721512835957,
13
- "train_runtime": 60.8667,
14
  "train_samples": 5749,
15
- "train_samples_per_second": 4722.617,
16
- "train_steps_per_second": 18.894
17
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_combined_score": 0.7731133282988156,
4
+ "eval_loss": 0.9153493642807007,
5
+ "eval_pearson": 0.775833107495325,
6
+ "eval_runtime": 0.6409,
7
  "eval_samples": 1500,
8
+ "eval_samples_per_second": 2340.504,
9
+ "eval_spearmanr": 0.7703935491023064,
10
+ "eval_steps_per_second": 9.362,
11
+ "total_flos": 5711561202147840.0,
12
+ "train_loss": 0.6606892288595007,
13
+ "train_runtime": 116.1909,
14
  "train_samples": 5749,
15
+ "train_samples_per_second": 2473.946,
16
+ "train_steps_per_second": 9.898
17
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_combined_score": NaN,
4
- "eval_loss": 2.283433198928833,
5
- "eval_pearson": NaN,
6
- "eval_runtime": 0.6501,
7
  "eval_samples": 1500,
8
- "eval_samples_per_second": 2307.493,
9
- "eval_spearmanr": NaN,
10
- "eval_steps_per_second": 9.23
11
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_combined_score": 0.7731133282988156,
4
+ "eval_loss": 0.9153493642807007,
5
+ "eval_pearson": 0.775833107495325,
6
+ "eval_runtime": 0.6409,
7
  "eval_samples": 1500,
8
+ "eval_samples_per_second": 2340.504,
9
+ "eval_spearmanr": 0.7703935491023064,
10
+ "eval_steps_per_second": 9.362
11
  }
logs/events.out.tfevents.1733321256.ki-g0008.1206436.31 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d21c145e7bd5fccff7d7718aa7d11f68ec700d1361d212869ee2a909fbe9725
3
+ size 521
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 3046165974478848.0,
4
- "train_loss": 2.5827721512835957,
5
- "train_runtime": 60.8667,
6
  "train_samples": 5749,
7
- "train_samples_per_second": 4722.617,
8
- "train_steps_per_second": 18.894
9
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "total_flos": 5711561202147840.0,
4
+ "train_loss": 0.6606892288595007,
5
+ "train_runtime": 116.1909,
6
  "train_samples": 5749,
7
+ "train_samples_per_second": 2473.946,
8
+ "train_steps_per_second": 9.898
9
  }
trainer_state.json CHANGED
@@ -1,165 +1,291 @@
1
  {
2
- "best_metric": 2.283433198928833,
3
- "best_model_checkpoint": "distilbert_lda_100_v1_stsb/checkpoint-69",
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 184,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.36644172668457,
14
- "learning_rate": 0.00098,
15
- "loss": 5.2042,
16
  "step": 23
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_combined_score": NaN,
21
- "eval_loss": 2.2958452701568604,
22
- "eval_pearson": NaN,
23
- "eval_runtime": 0.6458,
24
- "eval_samples_per_second": 2322.538,
25
- "eval_spearmanr": NaN,
26
- "eval_steps_per_second": 9.29,
27
  "step": 23
28
  },
29
  {
30
  "epoch": 2.0,
31
- "grad_norm": 3.7728922367095947,
32
- "learning_rate": 0.00096,
33
- "loss": 2.2241,
34
  "step": 46
35
  },
36
  {
37
  "epoch": 2.0,
38
- "eval_combined_score": NaN,
39
- "eval_loss": 2.8777623176574707,
40
- "eval_pearson": NaN,
41
- "eval_runtime": 0.641,
42
- "eval_samples_per_second": 2339.956,
43
- "eval_spearmanr": NaN,
44
- "eval_steps_per_second": 9.36,
45
  "step": 46
46
  },
47
  {
48
  "epoch": 3.0,
49
- "grad_norm": 3.375149965286255,
50
- "learning_rate": 0.00094,
51
- "loss": 2.1945,
52
  "step": 69
53
  },
54
  {
55
  "epoch": 3.0,
56
- "eval_combined_score": NaN,
57
- "eval_loss": 2.283433198928833,
58
- "eval_pearson": NaN,
59
- "eval_runtime": 0.6368,
60
- "eval_samples_per_second": 2355.423,
61
- "eval_spearmanr": NaN,
62
- "eval_steps_per_second": 9.422,
63
  "step": 69
64
  },
65
  {
66
  "epoch": 4.0,
67
- "grad_norm": 18.793352127075195,
68
- "learning_rate": 0.00092,
69
- "loss": 2.2493,
70
  "step": 92
71
  },
72
  {
73
  "epoch": 4.0,
74
- "eval_combined_score": NaN,
75
- "eval_loss": 2.3354201316833496,
76
- "eval_pearson": NaN,
77
- "eval_runtime": 0.6344,
78
- "eval_samples_per_second": 2364.427,
79
- "eval_spearmanr": NaN,
80
- "eval_steps_per_second": 9.458,
81
  "step": 92
82
  },
83
  {
84
  "epoch": 5.0,
85
- "grad_norm": 17.609710693359375,
86
- "learning_rate": 0.0009000000000000001,
87
- "loss": 2.2051,
88
  "step": 115
89
  },
90
  {
91
  "epoch": 5.0,
92
- "eval_combined_score": NaN,
93
- "eval_loss": 2.5273873805999756,
94
- "eval_pearson": NaN,
95
- "eval_runtime": 0.6294,
96
- "eval_samples_per_second": 2383.05,
97
- "eval_spearmanr": NaN,
98
- "eval_steps_per_second": 9.532,
99
  "step": 115
100
  },
101
  {
102
  "epoch": 6.0,
103
- "grad_norm": 4.918025016784668,
104
- "learning_rate": 0.00088,
105
- "loss": 2.1982,
106
  "step": 138
107
  },
108
  {
109
  "epoch": 6.0,
110
- "eval_combined_score": NaN,
111
- "eval_loss": 2.3758695125579834,
112
- "eval_pearson": NaN,
113
- "eval_runtime": 0.6533,
114
- "eval_samples_per_second": 2296.056,
115
- "eval_spearmanr": NaN,
116
- "eval_steps_per_second": 9.184,
117
  "step": 138
118
  },
119
  {
120
  "epoch": 7.0,
121
- "grad_norm": 24.767385482788086,
122
- "learning_rate": 0.00086,
123
- "loss": 2.1983,
124
  "step": 161
125
  },
126
  {
127
  "epoch": 7.0,
128
- "eval_combined_score": NaN,
129
- "eval_loss": 2.8532462120056152,
130
- "eval_pearson": NaN,
131
- "eval_runtime": 0.6508,
132
- "eval_samples_per_second": 2304.905,
133
- "eval_spearmanr": NaN,
134
- "eval_steps_per_second": 9.22,
135
  "step": 161
136
  },
137
  {
138
  "epoch": 8.0,
139
- "grad_norm": 9.223726272583008,
140
- "learning_rate": 0.00084,
141
- "loss": 2.1885,
142
  "step": 184
143
  },
144
  {
145
  "epoch": 8.0,
146
- "eval_combined_score": NaN,
147
- "eval_loss": 2.7600629329681396,
148
- "eval_pearson": NaN,
149
- "eval_runtime": 0.6498,
150
- "eval_samples_per_second": 2308.561,
151
- "eval_spearmanr": NaN,
152
- "eval_steps_per_second": 9.234,
153
  "step": 184
154
  },
155
  {
156
- "epoch": 8.0,
157
- "step": 184,
158
- "total_flos": 3046165974478848.0,
159
- "train_loss": 2.5827721512835957,
160
- "train_runtime": 60.8667,
161
- "train_samples_per_second": 4722.617,
162
- "train_steps_per_second": 18.894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
  ],
165
  "logging_steps": 1,
@@ -188,7 +314,7 @@
188
  "attributes": {}
189
  }
190
  },
191
- "total_flos": 3046165974478848.0,
192
  "train_batch_size": 256,
193
  "trial_name": null,
194
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9153493642807007,
3
+ "best_model_checkpoint": "distilbert_lda_100_v1_stsb/checkpoint-230",
4
+ "epoch": 15.0,
5
  "eval_steps": 500,
6
+ "global_step": 345,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 6.528723239898682,
14
+ "learning_rate": 4.9e-05,
15
+ "loss": 2.622,
16
  "step": 23
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_combined_score": 0.09512879196662656,
21
+ "eval_loss": 2.529794216156006,
22
+ "eval_pearson": 0.10113703827077289,
23
+ "eval_runtime": 0.6476,
24
+ "eval_samples_per_second": 2316.18,
25
+ "eval_spearmanr": 0.08912054566248023,
26
+ "eval_steps_per_second": 9.265,
27
  "step": 23
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 14.141378402709961,
32
+ "learning_rate": 4.8e-05,
33
+ "loss": 1.8404,
34
  "step": 46
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_combined_score": 0.46451389801303666,
39
+ "eval_loss": 2.334291458129883,
40
+ "eval_pearson": 0.4642190872924477,
41
+ "eval_runtime": 0.6424,
42
+ "eval_samples_per_second": 2334.917,
43
+ "eval_spearmanr": 0.4648087087336257,
44
+ "eval_steps_per_second": 9.34,
45
  "step": 46
46
  },
47
  {
48
  "epoch": 3.0,
49
+ "grad_norm": 15.804608345031738,
50
+ "learning_rate": 4.7e-05,
51
+ "loss": 1.3143,
52
  "step": 69
53
  },
54
  {
55
  "epoch": 3.0,
56
+ "eval_combined_score": 0.6702617110676681,
57
+ "eval_loss": 1.2509400844573975,
58
+ "eval_pearson": 0.6735628956208805,
59
+ "eval_runtime": 0.7279,
60
+ "eval_samples_per_second": 2060.6,
61
+ "eval_spearmanr": 0.6669605265144558,
62
+ "eval_steps_per_second": 8.242,
63
  "step": 69
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "grad_norm": 13.790794372558594,
68
+ "learning_rate": 4.600000000000001e-05,
69
+ "loss": 0.8809,
70
  "step": 92
71
  },
72
  {
73
  "epoch": 4.0,
74
+ "eval_combined_score": 0.7213381248378519,
75
+ "eval_loss": 1.3873708248138428,
76
+ "eval_pearson": 0.7172298554270424,
77
+ "eval_runtime": 0.6538,
78
+ "eval_samples_per_second": 2294.292,
79
+ "eval_spearmanr": 0.7254463942486612,
80
+ "eval_steps_per_second": 9.177,
81
  "step": 92
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "grad_norm": 12.30782699584961,
86
+ "learning_rate": 4.5e-05,
87
+ "loss": 0.6317,
88
  "step": 115
89
  },
90
  {
91
  "epoch": 5.0,
92
+ "eval_combined_score": 0.7164477017307784,
93
+ "eval_loss": 1.583528757095337,
94
+ "eval_pearson": 0.7090720167762655,
95
+ "eval_runtime": 0.6363,
96
+ "eval_samples_per_second": 2357.515,
97
+ "eval_spearmanr": 0.7238233866852912,
98
+ "eval_steps_per_second": 9.43,
99
  "step": 115
100
  },
101
  {
102
  "epoch": 6.0,
103
+ "grad_norm": 10.6016845703125,
104
+ "learning_rate": 4.4000000000000006e-05,
105
+ "loss": 0.5139,
106
  "step": 138
107
  },
108
  {
109
  "epoch": 6.0,
110
+ "eval_combined_score": 0.7456445881950656,
111
+ "eval_loss": 1.279284954071045,
112
+ "eval_pearson": 0.744296040490723,
113
+ "eval_runtime": 0.6455,
114
+ "eval_samples_per_second": 2323.754,
115
+ "eval_spearmanr": 0.7469931358994083,
116
+ "eval_steps_per_second": 9.295,
117
  "step": 138
118
  },
119
  {
120
  "epoch": 7.0,
121
+ "grad_norm": 6.149612903594971,
122
+ "learning_rate": 4.3e-05,
123
+ "loss": 0.3919,
124
  "step": 161
125
  },
126
  {
127
  "epoch": 7.0,
128
+ "eval_combined_score": 0.7555569821393648,
129
+ "eval_loss": 1.0237528085708618,
130
+ "eval_pearson": 0.7576470777149404,
131
+ "eval_runtime": 0.6495,
132
+ "eval_samples_per_second": 2309.412,
133
+ "eval_spearmanr": 0.7534668865637894,
134
+ "eval_steps_per_second": 9.238,
135
  "step": 161
136
  },
137
  {
138
  "epoch": 8.0,
139
+ "grad_norm": 5.594114303588867,
140
+ "learning_rate": 4.2e-05,
141
+ "loss": 0.3125,
142
  "step": 184
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "eval_combined_score": 0.734024640227608,
147
+ "eval_loss": 1.4519073963165283,
148
+ "eval_pearson": 0.7331238450711645,
149
+ "eval_runtime": 0.6526,
150
+ "eval_samples_per_second": 2298.344,
151
+ "eval_spearmanr": 0.7349254353840515,
152
+ "eval_steps_per_second": 9.193,
153
  "step": 184
154
  },
155
  {
156
+ "epoch": 9.0,
157
+ "grad_norm": 7.766717910766602,
158
+ "learning_rate": 4.1e-05,
159
+ "loss": 0.281,
160
+ "step": 207
161
+ },
162
+ {
163
+ "epoch": 9.0,
164
+ "eval_combined_score": 0.7381812541060337,
165
+ "eval_loss": 1.256384253501892,
166
+ "eval_pearson": 0.7389521632675855,
167
+ "eval_runtime": 0.6521,
168
+ "eval_samples_per_second": 2300.157,
169
+ "eval_spearmanr": 0.7374103449444821,
170
+ "eval_steps_per_second": 9.201,
171
+ "step": 207
172
+ },
173
+ {
174
+ "epoch": 10.0,
175
+ "grad_norm": 7.7253313064575195,
176
+ "learning_rate": 4e-05,
177
+ "loss": 0.2395,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 10.0,
182
+ "eval_combined_score": 0.7731133282988156,
183
+ "eval_loss": 0.9153493642807007,
184
+ "eval_pearson": 0.775833107495325,
185
+ "eval_runtime": 0.654,
186
+ "eval_samples_per_second": 2293.525,
187
+ "eval_spearmanr": 0.7703935491023064,
188
+ "eval_steps_per_second": 9.174,
189
+ "step": 230
190
+ },
191
+ {
192
+ "epoch": 11.0,
193
+ "grad_norm": 8.97021484375,
194
+ "learning_rate": 3.9000000000000006e-05,
195
+ "loss": 0.2219,
196
+ "step": 253
197
+ },
198
+ {
199
+ "epoch": 11.0,
200
+ "eval_combined_score": 0.7508687271969791,
201
+ "eval_loss": 1.2410622835159302,
202
+ "eval_pearson": 0.7508843218702125,
203
+ "eval_runtime": 0.6479,
204
+ "eval_samples_per_second": 2315.036,
205
+ "eval_spearmanr": 0.7508531325237456,
206
+ "eval_steps_per_second": 9.26,
207
+ "step": 253
208
+ },
209
+ {
210
+ "epoch": 12.0,
211
+ "grad_norm": 7.8722004890441895,
212
+ "learning_rate": 3.8e-05,
213
+ "loss": 0.1923,
214
+ "step": 276
215
+ },
216
+ {
217
+ "epoch": 12.0,
218
+ "eval_combined_score": 0.7436207323425442,
219
+ "eval_loss": 1.5144480466842651,
220
+ "eval_pearson": 0.7428768737971987,
221
+ "eval_runtime": 0.6534,
222
+ "eval_samples_per_second": 2295.818,
223
+ "eval_spearmanr": 0.7443645908878898,
224
+ "eval_steps_per_second": 9.183,
225
+ "step": 276
226
+ },
227
+ {
228
+ "epoch": 13.0,
229
+ "grad_norm": 5.461670398712158,
230
+ "learning_rate": 3.7e-05,
231
+ "loss": 0.1688,
232
+ "step": 299
233
+ },
234
+ {
235
+ "epoch": 13.0,
236
+ "eval_combined_score": 0.7493074527202994,
237
+ "eval_loss": 1.0667222738265991,
238
+ "eval_pearson": 0.7517996606156556,
239
+ "eval_runtime": 0.6697,
240
+ "eval_samples_per_second": 2239.742,
241
+ "eval_spearmanr": 0.7468152448249431,
242
+ "eval_steps_per_second": 8.959,
243
+ "step": 299
244
+ },
245
+ {
246
+ "epoch": 14.0,
247
+ "grad_norm": 4.178849220275879,
248
+ "learning_rate": 3.6e-05,
249
+ "loss": 0.1494,
250
+ "step": 322
251
+ },
252
+ {
253
+ "epoch": 14.0,
254
+ "eval_combined_score": 0.7492505592661682,
255
+ "eval_loss": 1.2371269464492798,
256
+ "eval_pearson": 0.750178139345508,
257
+ "eval_runtime": 0.6719,
258
+ "eval_samples_per_second": 2232.526,
259
+ "eval_spearmanr": 0.7483229791868286,
260
+ "eval_steps_per_second": 8.93,
261
+ "step": 322
262
+ },
263
+ {
264
+ "epoch": 15.0,
265
+ "grad_norm": 4.821505069732666,
266
+ "learning_rate": 3.5e-05,
267
+ "loss": 0.1498,
268
+ "step": 345
269
+ },
270
+ {
271
+ "epoch": 15.0,
272
+ "eval_combined_score": 0.7453157328804569,
273
+ "eval_loss": 1.1066056489944458,
274
+ "eval_pearson": 0.7473142192268427,
275
+ "eval_runtime": 0.6418,
276
+ "eval_samples_per_second": 2337.353,
277
+ "eval_spearmanr": 0.7433172465340713,
278
+ "eval_steps_per_second": 9.349,
279
+ "step": 345
280
+ },
281
+ {
282
+ "epoch": 15.0,
283
+ "step": 345,
284
+ "total_flos": 5711561202147840.0,
285
+ "train_loss": 0.6606892288595007,
286
+ "train_runtime": 116.1909,
287
+ "train_samples_per_second": 2473.946,
288
+ "train_steps_per_second": 9.898
289
  }
290
  ],
291
  "logging_steps": 1,
 
314
  "attributes": {}
315
  }
316
  },
317
+ "total_flos": 5711561202147840.0,
318
  "train_batch_size": 256,
319
  "trial_name": null,
320
  "trial_params": null