avsolatorio commited on
Commit
1de63e6
·
verified ·
1 Parent(s): 4bd17bc

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -8,184 +8,184 @@
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "Water Supply",
12
- "1": "Sexual Orientation and Gender Identity",
13
- "2": "Investment Climate",
14
- "3": "Environment",
15
- "4": "Water Resources Management",
16
- "5": "Labor Markets",
17
- "6": "Governance",
18
- "7": "Competition Policy",
19
- "8": "Long-Term Finance",
20
- "9": "Food Security Update",
21
- "10": "Social Protection",
22
- "11": "Inequality and Shared Prosperity",
23
- "12": "Infectious diseases and Vaccines",
24
- "13": "Jobs and Development",
25
- "14": "Energy",
26
- "15": "Education and Technology",
27
- "16": "Debt Relief",
28
- "17": "Financial Sector",
29
- "18": "Measuring Poverty",
30
- "19": "Education",
31
- "20": "Water",
32
- "21": "Procurement for Development",
33
- "22": "Livestock and Sustainability",
34
- "23": "Disaster Risk Management",
35
- "24": "Financial Inclusion",
36
- "25": "Trade Facilitation and Logistics",
37
- "26": "Financial Integrity",
38
- "27": "Health",
39
- "28": "Regional Integration",
40
- "29": "Forests and Landscapes",
41
- "30": "Jobs & Development",
42
- "31": "Payment Systems",
43
- "32": "Agribusiness and Value Chains",
44
- "33": "Disability Inclusion",
45
- "34": "Marine Plastic Pollution",
46
- "35": "Global Financing Facility for Women, Children and Adolescents",
47
- "36": "Skills Development",
48
- "37": "Sustainable Infrastructure Finance",
49
- "38": "Land",
50
- "39": "Sustainable Communities",
51
- "40": "Higher Education",
52
- "41": "Climate Change",
53
- "42": "Small and Medium Enterprises Finance",
54
- "43": "Teachers",
55
- "44": "Migration",
56
- "45": "Debt",
57
- "46": "Urban Development",
58
- "47": "Mining Investment and Governance Review",
59
- "48": "Innovation and Entrepreneurship",
60
- "49": "Natural Capital",
61
- "50": "Transport",
62
- "51": "Gender",
63
- "52": "Safety Nets and Cash Transfers",
64
- "53": "Inclusive Cities",
65
- "54": "Extractive Industries",
66
- "55": "Global Value Chains",
67
- "56": "Agriculture and Food",
68
- "57": "Universal Health Coverage",
69
- "58": "Sanitation",
70
- "59": "Gas Flaring Reduction",
71
- "60": "COVID-19 Hub",
72
- "61": "Girls' Education",
73
- "62": "Macroeconomics",
74
- "63": "Trade",
75
- "64": "Pandemic Preparedness and COVID-19",
76
- "65": "Infrastructure",
77
- "66": "Oceans, Fisheries, and Coastal Economies",
78
- "67": "Social Sustainability and Inclusion",
79
- "68": "Community-Driven Development",
80
- "69": "Credit Infrastructure",
81
- "70": "Pollution",
82
- "71": "Pensions",
83
- "72": "Financial Stability",
84
- "73": "One Health",
85
- "74": "Indigenous Peoples",
86
- "75": "Digital Development",
87
- "76": "Climate-Smart Agriculture",
88
- "77": "Food System Jobs",
89
- "78": "Early Childhood Development",
90
- "79": "Biodiversity",
91
- "80": "Poverty",
92
- "81": "Fragility, Conflict, and Violence",
93
- "82": "Competitiveness",
94
- "83": "Social Inclusion",
95
- "84": "Nutrition",
96
- "85": "Taxes and Government Revenue",
97
- "86": "Water in Agriculture"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
- "Agribusiness and Value Chains": 32,
103
- "Agriculture and Food": 56,
104
- "Biodiversity": 79,
105
- "COVID-19 Hub": 60,
106
- "Climate Change": 41,
107
- "Climate-Smart Agriculture": 76,
108
- "Community-Driven Development": 68,
109
- "Competition Policy": 7,
110
- "Competitiveness": 82,
111
- "Credit Infrastructure": 69,
112
- "Debt": 45,
113
- "Debt Relief": 16,
114
- "Digital Development": 75,
115
- "Disability Inclusion": 33,
116
- "Disaster Risk Management": 23,
117
- "Early Childhood Development": 78,
118
- "Education": 19,
119
- "Education and Technology": 15,
120
- "Energy": 14,
121
- "Environment": 3,
122
- "Extractive Industries": 54,
123
- "Financial Inclusion": 24,
124
- "Financial Integrity": 26,
125
- "Financial Sector": 17,
126
- "Financial Stability": 72,
127
- "Food Security Update": 9,
128
- "Food System Jobs": 77,
129
- "Forests and Landscapes": 29,
130
- "Fragility, Conflict, and Violence": 81,
131
- "Gas Flaring Reduction": 59,
132
- "Gender": 51,
133
- "Girls' Education": 61,
134
- "Global Financing Facility for Women, Children and Adolescents": 35,
135
- "Global Value Chains": 55,
136
- "Governance": 6,
137
- "Health": 27,
138
- "Higher Education": 40,
139
- "Inclusive Cities": 53,
140
- "Indigenous Peoples": 74,
141
- "Inequality and Shared Prosperity": 11,
142
- "Infectious diseases and Vaccines": 12,
143
- "Infrastructure": 65,
144
- "Innovation and Entrepreneurship": 48,
145
- "Investment Climate": 2,
146
- "Jobs & Development": 30,
147
- "Jobs and Development": 13,
148
- "Labor Markets": 5,
149
- "Land": 38,
150
- "Livestock and Sustainability": 22,
151
- "Long-Term Finance": 8,
152
- "Macroeconomics": 62,
153
- "Marine Plastic Pollution": 34,
154
- "Measuring Poverty": 18,
155
- "Migration": 44,
156
- "Mining Investment and Governance Review": 47,
157
- "Natural Capital": 49,
158
- "Nutrition": 84,
159
- "Oceans, Fisheries, and Coastal Economies": 66,
160
- "One Health": 73,
161
- "Pandemic Preparedness and COVID-19": 64,
162
- "Payment Systems": 31,
163
- "Pensions": 71,
164
- "Pollution": 70,
165
- "Poverty": 80,
166
- "Procurement for Development": 21,
167
- "Regional Integration": 28,
168
- "Safety Nets and Cash Transfers": 52,
169
- "Sanitation": 58,
170
- "Sexual Orientation and Gender Identity": 1,
171
- "Skills Development": 36,
172
- "Small and Medium Enterprises Finance": 42,
173
- "Social Inclusion": 83,
174
- "Social Protection": 10,
175
- "Social Sustainability and Inclusion": 67,
176
- "Sustainable Communities": 39,
177
- "Sustainable Infrastructure Finance": 37,
178
- "Taxes and Government Revenue": 85,
179
- "Teachers": 43,
180
- "Trade": 63,
181
- "Trade Facilitation and Logistics": 25,
182
- "Transport": 50,
183
- "Universal Health Coverage": 57,
184
- "Urban Development": 46,
185
- "Water": 20,
186
- "Water Resources Management": 4,
187
- "Water Supply": 0,
188
- "Water in Agriculture": 86
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
 
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "Global Financing Facility for Women, Children and Adolescents",
12
+ "1": "Competitiveness",
13
+ "2": "Sustainable Communities",
14
+ "3": "Education",
15
+ "4": "Natural Capital",
16
+ "5": "Regional Integration",
17
+ "6": "Jobs and Development",
18
+ "7": "Innovation and Entrepreneurship",
19
+ "8": "Inclusive Cities",
20
+ "9": "Jobs & Development",
21
+ "10": "Transport",
22
+ "11": "Sustainable Infrastructure Finance",
23
+ "12": "Pandemic Preparedness and COVID-19",
24
+ "13": "Taxes and Government Revenue",
25
+ "14": "Labor Markets",
26
+ "15": "Measuring Poverty",
27
+ "16": "Food Security Update",
28
+ "17": "Disability Inclusion",
29
+ "18": "Agriculture and Food",
30
+ "19": "Trade Facilitation and Logistics",
31
+ "20": "Social Protection",
32
+ "21": "One Health",
33
+ "22": "Biodiversity",
34
+ "23": "Social Inclusion",
35
+ "24": "Credit Infrastructure",
36
+ "25": "Water Supply",
37
+ "26": "Early Childhood Development",
38
+ "27": "Food System Jobs",
39
+ "28": "Migration",
40
+ "29": "Indigenous Peoples",
41
+ "30": "Universal Health Coverage",
42
+ "31": "Financial Sector",
43
+ "32": "Procurement for Development",
44
+ "33": "Inequality and Shared Prosperity",
45
+ "34": "COVID-19 Hub",
46
+ "35": "Poverty",
47
+ "36": "Financial Stability",
48
+ "37": "Digital Development",
49
+ "38": "Long-Term Finance",
50
+ "39": "Gas Flaring Reduction",
51
+ "40": "Mining Investment and Governance Review",
52
+ "41": "Small and Medium Enterprises Finance",
53
+ "42": "Infrastructure",
54
+ "43": "Health",
55
+ "44": "Sexual Orientation and Gender Identity",
56
+ "45": "Nutrition",
57
+ "46": "Financial Inclusion",
58
+ "47": "Fragility, Conflict, and Violence",
59
+ "48": "Debt Relief",
60
+ "49": "Disaster Risk Management",
61
+ "50": "Water in Agriculture",
62
+ "51": "Livestock and Sustainability",
63
+ "52": "Global Value Chains",
64
+ "53": "Competition Policy",
65
+ "54": "Pollution",
66
+ "55": "Urban Development",
67
+ "56": "Gender",
68
+ "57": "Safety Nets and Cash Transfers",
69
+ "58": "Forests and Landscapes",
70
+ "59": "Water Resources Management",
71
+ "60": "Extractive Industries",
72
+ "61": "Social Sustainability and Inclusion",
73
+ "62": "Energy",
74
+ "63": "Girls' Education",
75
+ "64": "Environment",
76
+ "65": "Marine Plastic Pollution",
77
+ "66": "Education and Technology",
78
+ "67": "Financial Integrity",
79
+ "68": "Oceans, Fisheries, and Coastal Economies",
80
+ "69": "Sanitation",
81
+ "70": "Land",
82
+ "71": "Higher Education",
83
+ "72": "Teachers",
84
+ "73": "Investment Climate",
85
+ "74": "Debt",
86
+ "75": "Climate Change",
87
+ "76": "Trade",
88
+ "77": "Skills Development",
89
+ "78": "Agribusiness and Value Chains",
90
+ "79": "Climate-Smart Agriculture",
91
+ "80": "Pensions",
92
+ "81": "Infectious diseases and Vaccines",
93
+ "82": "Payment Systems",
94
+ "83": "Community-Driven Development",
95
+ "84": "Water",
96
+ "85": "Governance",
97
+ "86": "Macroeconomics"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
+ "Agribusiness and Value Chains": 78,
103
+ "Agriculture and Food": 18,
104
+ "Biodiversity": 22,
105
+ "COVID-19 Hub": 34,
106
+ "Climate Change": 75,
107
+ "Climate-Smart Agriculture": 79,
108
+ "Community-Driven Development": 83,
109
+ "Competition Policy": 53,
110
+ "Competitiveness": 1,
111
+ "Credit Infrastructure": 24,
112
+ "Debt": 74,
113
+ "Debt Relief": 48,
114
+ "Digital Development": 37,
115
+ "Disability Inclusion": 17,
116
+ "Disaster Risk Management": 49,
117
+ "Early Childhood Development": 26,
118
+ "Education": 3,
119
+ "Education and Technology": 66,
120
+ "Energy": 62,
121
+ "Environment": 64,
122
+ "Extractive Industries": 60,
123
+ "Financial Inclusion": 46,
124
+ "Financial Integrity": 67,
125
+ "Financial Sector": 31,
126
+ "Financial Stability": 36,
127
+ "Food Security Update": 16,
128
+ "Food System Jobs": 27,
129
+ "Forests and Landscapes": 58,
130
+ "Fragility, Conflict, and Violence": 47,
131
+ "Gas Flaring Reduction": 39,
132
+ "Gender": 56,
133
+ "Girls' Education": 63,
134
+ "Global Financing Facility for Women, Children and Adolescents": 0,
135
+ "Global Value Chains": 52,
136
+ "Governance": 85,
137
+ "Health": 43,
138
+ "Higher Education": 71,
139
+ "Inclusive Cities": 8,
140
+ "Indigenous Peoples": 29,
141
+ "Inequality and Shared Prosperity": 33,
142
+ "Infectious diseases and Vaccines": 81,
143
+ "Infrastructure": 42,
144
+ "Innovation and Entrepreneurship": 7,
145
+ "Investment Climate": 73,
146
+ "Jobs & Development": 9,
147
+ "Jobs and Development": 6,
148
+ "Labor Markets": 14,
149
+ "Land": 70,
150
+ "Livestock and Sustainability": 51,
151
+ "Long-Term Finance": 38,
152
+ "Macroeconomics": 86,
153
+ "Marine Plastic Pollution": 65,
154
+ "Measuring Poverty": 15,
155
+ "Migration": 28,
156
+ "Mining Investment and Governance Review": 40,
157
+ "Natural Capital": 4,
158
+ "Nutrition": 45,
159
+ "Oceans, Fisheries, and Coastal Economies": 68,
160
+ "One Health": 21,
161
+ "Pandemic Preparedness and COVID-19": 12,
162
+ "Payment Systems": 82,
163
+ "Pensions": 80,
164
+ "Pollution": 54,
165
+ "Poverty": 35,
166
+ "Procurement for Development": 32,
167
+ "Regional Integration": 5,
168
+ "Safety Nets and Cash Transfers": 57,
169
+ "Sanitation": 69,
170
+ "Sexual Orientation and Gender Identity": 44,
171
+ "Skills Development": 77,
172
+ "Small and Medium Enterprises Finance": 41,
173
+ "Social Inclusion": 23,
174
+ "Social Protection": 20,
175
+ "Social Sustainability and Inclusion": 61,
176
+ "Sustainable Communities": 2,
177
+ "Sustainable Infrastructure Finance": 11,
178
+ "Taxes and Government Revenue": 13,
179
+ "Teachers": 72,
180
+ "Trade": 76,
181
+ "Trade Facilitation and Logistics": 19,
182
+ "Transport": 10,
183
+ "Universal Health Coverage": 30,
184
+ "Urban Development": 55,
185
+ "Water": 84,
186
+ "Water Resources Management": 59,
187
+ "Water Supply": 25,
188
+ "Water in Agriculture": 50
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47d9450ee32445570ac0f99bd61548d0db965a4e6edd93b15e3b6aaa0d756f1e
3
  size 567860028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171435965d16e8467d93cb72739550b78deb4e15f86639ad2ddc07a18249ce1e
3
  size 567860028
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f74756958baa31a23d6968c507d7850e8689def8038cd22b1d5acff64dbc0db
3
  size 1135783354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac4231528fab4650ae11af5c79ae4d0c4ccc8c5d483a1dde1456cc539f94f31b
3
  size 1135783354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:712d1e9e9623896c394d82d0be1b750fd445034b8cb9f0073258005a6df45273
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34649568145f465443beb487f2295ab3dad9e5f49f758646dd823029413e18fe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71d98b2235723fe888fb281dc5e6454bb5d9e489547cbc29dc5479c4d9a3698e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e136870ce715682b288c29fb4d05a0aec61f1a74b2cc393d7fb9e66e4965261
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,306 +1,55 @@
1
  {
2
- "best_metric": 0.03788253664970398,
3
- "best_model_checkpoint": "doc-topic-model_eval-00_train-02/checkpoint-11000",
4
- "epoch": 5.424063116370808,
5
  "eval_steps": 1000,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.2465483234714004,
13
- "grad_norm": 0.3801194727420807,
14
- "learning_rate": 1.95069033530572e-05,
15
- "loss": 0.1664,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.4930966469428008,
20
- "grad_norm": 0.38533496856689453,
21
- "learning_rate": 1.90138067061144e-05,
22
- "loss": 0.0941,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.4930966469428008,
27
  "eval_accuracy": 0.9814660487265615,
28
  "eval_f1": 0.0,
29
- "eval_loss": 0.08822464942932129,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
- "eval_runtime": 25.7552,
33
- "eval_samples_per_second": 314.888,
34
- "eval_steps_per_second": 19.685,
35
  "step": 1000
36
- },
37
- {
38
- "epoch": 0.7396449704142012,
39
- "grad_norm": 0.38025280833244324,
40
- "learning_rate": 1.85207100591716e-05,
41
- "loss": 0.0858,
42
- "step": 1500
43
- },
44
- {
45
- "epoch": 0.9861932938856016,
46
- "grad_norm": 0.37026312947273254,
47
- "learning_rate": 1.80276134122288e-05,
48
- "loss": 0.0776,
49
- "step": 2000
50
- },
51
- {
52
- "epoch": 0.9861932938856016,
53
- "eval_accuracy": 0.9814660487265615,
54
- "eval_f1": 0.0,
55
- "eval_loss": 0.06934941560029984,
56
- "eval_precision": 0.0,
57
- "eval_recall": 0.0,
58
- "eval_runtime": 26.0587,
59
- "eval_samples_per_second": 311.22,
60
- "eval_steps_per_second": 19.456,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 1.232741617357002,
65
- "grad_norm": 0.42509472370147705,
66
- "learning_rate": 1.7534516765285997e-05,
67
- "loss": 0.0688,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 1.4792899408284024,
72
- "grad_norm": 0.3655988276004791,
73
- "learning_rate": 1.70414201183432e-05,
74
- "loss": 0.0628,
75
- "step": 3000
76
- },
77
- {
78
- "epoch": 1.4792899408284024,
79
- "eval_accuracy": 0.9822214663321853,
80
- "eval_f1": 0.1038719817116731,
81
- "eval_loss": 0.05777422711253166,
82
- "eval_precision": 0.7893593919652552,
83
- "eval_recall": 0.05559379062476103,
84
- "eval_runtime": 26.8512,
85
- "eval_samples_per_second": 302.035,
86
- "eval_steps_per_second": 18.882,
87
- "step": 3000
88
- },
89
- {
90
- "epoch": 1.725838264299803,
91
- "grad_norm": 0.37198927998542786,
92
- "learning_rate": 1.6548323471400396e-05,
93
- "loss": 0.0581,
94
- "step": 3500
95
- },
96
- {
97
- "epoch": 1.972386587771203,
98
- "grad_norm": 0.34727558493614197,
99
- "learning_rate": 1.6055226824457594e-05,
100
- "loss": 0.0549,
101
- "step": 4000
102
- },
103
- {
104
- "epoch": 1.972386587771203,
105
- "eval_accuracy": 0.9840894595858668,
106
- "eval_f1": 0.300647894343384,
107
- "eval_loss": 0.050482869148254395,
108
- "eval_precision": 0.8110924369747899,
109
- "eval_recall": 0.18452244398562362,
110
- "eval_runtime": 26.4214,
111
- "eval_samples_per_second": 306.948,
112
- "eval_steps_per_second": 19.189,
113
- "step": 4000
114
- },
115
- {
116
- "epoch": 2.2189349112426036,
117
- "grad_norm": 0.39387047290802,
118
- "learning_rate": 1.5562130177514792e-05,
119
- "loss": 0.0493,
120
- "step": 4500
121
- },
122
- {
123
- "epoch": 2.465483234714004,
124
- "grad_norm": 0.32869574427604675,
125
- "learning_rate": 1.5069033530571993e-05,
126
- "loss": 0.0486,
127
- "step": 5000
128
- },
129
- {
130
- "epoch": 2.465483234714004,
131
- "eval_accuracy": 0.9852473886361381,
132
- "eval_f1": 0.4197558392329562,
133
- "eval_loss": 0.04679542034864426,
134
- "eval_precision": 0.7743726861373921,
135
- "eval_recall": 0.2879100711172287,
136
- "eval_runtime": 27.7172,
137
- "eval_samples_per_second": 292.598,
138
- "eval_steps_per_second": 18.292,
139
- "step": 5000
140
- },
141
- {
142
- "epoch": 2.712031558185404,
143
- "grad_norm": 0.44469282031059265,
144
- "learning_rate": 1.4575936883629191e-05,
145
- "loss": 0.047,
146
- "step": 5500
147
- },
148
- {
149
- "epoch": 2.9585798816568047,
150
- "grad_norm": 0.34966567158699036,
151
- "learning_rate": 1.4082840236686392e-05,
152
- "loss": 0.0456,
153
- "step": 6000
154
- },
155
- {
156
- "epoch": 2.9585798816568047,
157
- "eval_accuracy": 0.9862536672477571,
158
- "eval_f1": 0.47643724696356277,
159
- "eval_loss": 0.04318871349096298,
160
- "eval_precision": 0.8100220264317181,
161
- "eval_recall": 0.33746272080752465,
162
- "eval_runtime": 27.2164,
163
- "eval_samples_per_second": 297.982,
164
- "eval_steps_per_second": 18.628,
165
- "step": 6000
166
- },
167
- {
168
- "epoch": 3.2051282051282053,
169
- "grad_norm": 0.3240692615509033,
170
- "learning_rate": 1.3589743589743592e-05,
171
- "loss": 0.0411,
172
- "step": 6500
173
- },
174
- {
175
- "epoch": 3.4516765285996054,
176
- "grad_norm": 0.3621165454387665,
177
- "learning_rate": 1.309664694280079e-05,
178
- "loss": 0.0409,
179
- "step": 7000
180
- },
181
- {
182
- "epoch": 3.4516765285996054,
183
- "eval_accuracy": 0.9866760208058732,
184
- "eval_f1": 0.5272791270679338,
185
- "eval_loss": 0.041621919721364975,
186
- "eval_precision": 0.7698972099853157,
187
- "eval_recall": 0.4009329356886136,
188
- "eval_runtime": 26.6306,
189
- "eval_samples_per_second": 304.537,
190
- "eval_steps_per_second": 19.038,
191
- "step": 7000
192
- },
193
- {
194
- "epoch": 3.698224852071006,
195
- "grad_norm": 0.4138104319572449,
196
- "learning_rate": 1.2603550295857989e-05,
197
- "loss": 0.0403,
198
- "step": 7500
199
- },
200
- {
201
- "epoch": 3.9447731755424065,
202
- "grad_norm": 0.42052382230758667,
203
- "learning_rate": 1.2110453648915189e-05,
204
- "loss": 0.0388,
205
- "step": 8000
206
- },
207
- {
208
- "epoch": 3.9447731755424065,
209
- "eval_accuracy": 0.9869098742860383,
210
- "eval_f1": 0.5476097178683386,
211
- "eval_loss": 0.04007947817444801,
212
- "eval_precision": 0.7616841531543808,
213
- "eval_recall": 0.4274680737172134,
214
- "eval_runtime": 26.5409,
215
- "eval_samples_per_second": 305.567,
216
- "eval_steps_per_second": 19.103,
217
- "step": 8000
218
- },
219
- {
220
- "epoch": 4.191321499013807,
221
- "grad_norm": 0.48471179604530334,
222
- "learning_rate": 1.1617357001972386e-05,
223
- "loss": 0.0376,
224
- "step": 8500
225
- },
226
- {
227
- "epoch": 4.437869822485207,
228
- "grad_norm": 0.3329317271709442,
229
- "learning_rate": 1.1124260355029586e-05,
230
- "loss": 0.0356,
231
- "step": 9000
232
- },
233
- {
234
- "epoch": 4.437869822485207,
235
- "eval_accuracy": 0.9870360134359454,
236
- "eval_f1": 0.5559924275520606,
237
- "eval_loss": 0.039172105491161346,
238
- "eval_precision": 0.761164274322169,
239
- "eval_recall": 0.43794448267951364,
240
- "eval_runtime": 26.6005,
241
- "eval_samples_per_second": 304.881,
242
- "eval_steps_per_second": 19.06,
243
- "step": 9000
244
- },
245
- {
246
- "epoch": 4.684418145956608,
247
- "grad_norm": 0.43250563740730286,
248
- "learning_rate": 1.0631163708086787e-05,
249
- "loss": 0.0352,
250
- "step": 9500
251
- },
252
- {
253
- "epoch": 4.930966469428008,
254
- "grad_norm": 0.49166053533554077,
255
- "learning_rate": 1.0138067061143987e-05,
256
- "loss": 0.035,
257
- "step": 10000
258
- },
259
- {
260
- "epoch": 4.930966469428008,
261
- "eval_accuracy": 0.9873464007823461,
262
- "eval_f1": 0.5759475634083785,
263
- "eval_loss": 0.03805544227361679,
264
- "eval_precision": 0.7600601729973674,
265
- "eval_recall": 0.46363844918559305,
266
- "eval_runtime": 26.4106,
267
- "eval_samples_per_second": 307.074,
268
- "eval_steps_per_second": 19.197,
269
- "step": 10000
270
- },
271
- {
272
- "epoch": 5.177514792899408,
273
- "grad_norm": 0.562140703201294,
274
- "learning_rate": 9.644970414201184e-06,
275
- "loss": 0.033,
276
- "step": 10500
277
- },
278
- {
279
- "epoch": 5.424063116370808,
280
- "grad_norm": 0.41164836287498474,
281
- "learning_rate": 9.151873767258384e-06,
282
- "loss": 0.0316,
283
- "step": 11000
284
- },
285
- {
286
- "epoch": 5.424063116370808,
287
- "eval_accuracy": 0.9873619910143572,
288
- "eval_f1": 0.5883764944836819,
289
- "eval_loss": 0.03788253664970398,
290
- "eval_precision": 0.742254833449802,
291
- "eval_recall": 0.4873441920929877,
292
- "eval_runtime": 26.6635,
293
- "eval_samples_per_second": 304.161,
294
- "eval_steps_per_second": 19.015,
295
- "step": 11000
296
  }
297
  ],
298
  "logging_steps": 500,
299
- "max_steps": 20280,
300
  "num_input_tokens_seen": 0,
301
- "num_train_epochs": 10,
302
  "save_steps": 1000,
303
  "stateful_callbacks": {
 
 
 
 
 
 
 
 
 
304
  "TrainerControl": {
305
  "args": {
306
  "should_epoch_stop": false,
@@ -312,7 +61,7 @@
312
  "attributes": {}
313
  }
314
  },
315
- "total_flos": 248295812264388.0,
316
  "train_batch_size": 4,
317
  "trial_name": null,
318
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0863451287150383,
3
+ "best_model_checkpoint": "doc-topic-model_eval-00_train-02/checkpoint-1000",
4
+ "epoch": 0.4930966469428008,
5
  "eval_steps": 1000,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.2465483234714004,
13
+ "grad_norm": 0.3767470121383667,
14
+ "learning_rate": 1.9950690335305722e-05,
15
+ "loss": 0.1663,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.4930966469428008,
20
+ "grad_norm": 0.4007192850112915,
21
+ "learning_rate": 1.9901380670611442e-05,
22
+ "loss": 0.0932,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.4930966469428008,
27
  "eval_accuracy": 0.9814660487265615,
28
  "eval_f1": 0.0,
29
+ "eval_loss": 0.0863451287150383,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
+ "eval_runtime": 11.8139,
33
+ "eval_samples_per_second": 686.478,
34
+ "eval_steps_per_second": 2.709,
35
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
  "logging_steps": 500,
39
+ "max_steps": 202800,
40
  "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 100,
42
  "save_steps": 1000,
43
  "stateful_callbacks": {
44
+ "EarlyStoppingCallback": {
45
+ "args": {
46
+ "early_stopping_patience": 5,
47
+ "early_stopping_threshold": 0.0
48
+ },
49
+ "attributes": {
50
+ "early_stopping_patience_counter": 0
51
+ }
52
+ },
53
  "TrainerControl": {
54
  "args": {
55
  "should_epoch_stop": false,
 
61
  "attributes": {}
62
  }
63
  },
64
+ "total_flos": 22565797728696.0,
65
  "train_batch_size": 4,
66
  "trial_name": null,
67
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b15ea728e9972258bd6817d47dab7216621d086d49df9d5c90ce4cef3be12a5
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52f3fd8049a89729a3c1cf7bfe4d3a85504bb9c099d007b31c6e5cbeb3e690e4
3
  size 5240