AlekseyKorshuk commited on
Commit
0a238b6
·
1 Parent(s): 204fd78

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/oxxxymiron")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/35c25tqd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1z3u6lod) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1z3u6lod/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/oxxxymiron")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/if83r8sb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/s58wdkfz) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/s58wdkfz/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.3626197576522827, "eval_runtime": 22.1222, "eval_samples_per_second": 20.839, "eval_steps_per_second": 2.622, "epoch": 16.0}
 
1
+ {"eval_loss": 1.3110859394073486, "eval_runtime": 18.5351, "eval_samples_per_second": 20.825, "eval_steps_per_second": 2.644, "epoch": 26.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb82b94c7d34decc95ea0c0ed51c0cccd4dc0cf1f0a8925648cdfafaeb1ad6e6
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf04b5cb92f0bb037efcff6106efd6559eadeb2c169561274b38adf7974d3cbd
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb0cfb7ad5f79a151980f8e8fcb534c9dbe0dc5ca44d80132aa39db60c590550
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89bd3fcc9080941291eb43d62d53800524426bd1841ce2e59ad94d37b402ee47
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94f6e5d2d51fc4962178d6141bcb4e442c8a4b69e91f35e541dc52137ea1701b
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b078eadcf9ca889f51cd804d94e22e90d64d6befb42b2adc89663598895f61d
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2072ed4e05f726a0eacce3eb003550040e7f5d45ef05ee6a83533a4c6834a56f
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93f956bb61c562ef5de6ecf68f0acb1e428bb13e97ddfd9eb68fef96714fcef
3
  size 14439
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b130d72fae4cb24993ff792bae18a70e194b6d74fdcb623b38c9f59180e61d5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40aa28790758cc10852db2e41872a7c552d32089589ee1f68283d1cfcd6cf1da
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.3626197576522827,
3
- "best_model_checkpoint": "output/oxxxymiron/checkpoint-4144",
4
- "epoch": 16.0,
5
- "global_step": 4144,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4982,11 +4982,543 @@
4982
  "eval_samples_per_second": 20.897,
4983
  "eval_steps_per_second": 2.629,
4984
  "step": 4144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4985
  }
4986
  ],
4987
- "max_steps": 4144,
4988
- "num_train_epochs": 16,
4989
- "total_flos": 4329217032192000.0,
4990
  "trial_name": null,
4991
  "trial_params": null
4992
  }
 
1
  {
2
+ "best_metric": 1.3110859394073486,
3
+ "best_model_checkpoint": "output/oxxxymiron/checkpoint-4573",
4
+ "epoch": 17.0,
5
+ "global_step": 4573,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4982
  "eval_samples_per_second": 20.897,
4983
  "eval_steps_per_second": 2.629,
4984
  "step": 4144
4985
+ },
4986
+ {
4987
+ "epoch": 15.41,
4988
+ "learning_rate": 4.923820788333643e-05,
4989
+ "loss": 1.4417,
4990
+ "step": 4145
4991
+ },
4992
+ {
4993
+ "epoch": 15.43,
4994
+ "learning_rate": 5.311198428226757e-05,
4995
+ "loss": 1.5224,
4996
+ "step": 4150
4997
+ },
4998
+ {
4999
+ "epoch": 15.45,
5000
+ "learning_rate": 5.7038557476801184e-05,
5001
+ "loss": 1.4984,
5002
+ "step": 4155
5003
+ },
5004
+ {
5005
+ "epoch": 15.46,
5006
+ "learning_rate": 6.100454224793001e-05,
5007
+ "loss": 1.4427,
5008
+ "step": 4160
5009
+ },
5010
+ {
5011
+ "epoch": 15.48,
5012
+ "learning_rate": 6.49964190272892e-05,
5013
+ "loss": 1.4789,
5014
+ "step": 4165
5015
+ },
5016
+ {
5017
+ "epoch": 15.5,
5018
+ "learning_rate": 6.900057998375254e-05,
5019
+ "loss": 1.5665,
5020
+ "step": 4170
5021
+ },
5022
+ {
5023
+ "epoch": 15.52,
5024
+ "learning_rate": 7.300337541089789e-05,
5025
+ "loss": 1.5002,
5026
+ "step": 4175
5027
+ },
5028
+ {
5029
+ "epoch": 15.54,
5030
+ "learning_rate": 7.699116025723293e-05,
5031
+ "loss": 1.4668,
5032
+ "step": 4180
5033
+ },
5034
+ {
5035
+ "epoch": 15.56,
5036
+ "learning_rate": 8.09503406405399e-05,
5037
+ "loss": 1.3757,
5038
+ "step": 4185
5039
+ },
5040
+ {
5041
+ "epoch": 15.58,
5042
+ "learning_rate": 8.48674201878012e-05,
5043
+ "loss": 1.4722,
5044
+ "step": 4190
5045
+ },
5046
+ {
5047
+ "epoch": 15.59,
5048
+ "learning_rate": 8.872904604271726e-05,
5049
+ "loss": 1.4961,
5050
+ "step": 4195
5051
+ },
5052
+ {
5053
+ "epoch": 15.61,
5054
+ "learning_rate": 9.252205438400528e-05,
5055
+ "loss": 1.4798,
5056
+ "step": 4200
5057
+ },
5058
+ {
5059
+ "epoch": 15.63,
5060
+ "learning_rate": 9.623351529928802e-05,
5061
+ "loss": 1.5392,
5062
+ "step": 4205
5063
+ },
5064
+ {
5065
+ "epoch": 15.65,
5066
+ "learning_rate": 9.985077686162523e-05,
5067
+ "loss": 1.5653,
5068
+ "step": 4210
5069
+ },
5070
+ {
5071
+ "epoch": 15.67,
5072
+ "learning_rate": 0.00010336150825841603,
5073
+ "loss": 1.4743,
5074
+ "step": 4215
5075
+ },
5076
+ {
5077
+ "epoch": 15.69,
5078
+ "learning_rate": 0.00010675374182567242,
5079
+ "loss": 1.4201,
5080
+ "step": 4220
5081
+ },
5082
+ {
5083
+ "epoch": 15.71,
5084
+ "learning_rate": 0.00011001591384435138,
5085
+ "loss": 1.3889,
5086
+ "step": 4225
5087
+ },
5088
+ {
5089
+ "epoch": 15.72,
5090
+ "learning_rate": 0.00011313690395969416,
5091
+ "loss": 1.4913,
5092
+ "step": 4230
5093
+ },
5094
+ {
5095
+ "epoch": 15.74,
5096
+ "learning_rate": 0.00011610607308918656,
5097
+ "loss": 1.3722,
5098
+ "step": 4235
5099
+ },
5100
+ {
5101
+ "epoch": 15.76,
5102
+ "learning_rate": 0.00011891329968992182,
5103
+ "loss": 1.4133,
5104
+ "step": 4240
5105
+ },
5106
+ {
5107
+ "epoch": 15.78,
5108
+ "learning_rate": 0.0001215490142617292,
5109
+ "loss": 1.36,
5110
+ "step": 4245
5111
+ },
5112
+ {
5113
+ "epoch": 15.8,
5114
+ "learning_rate": 0.00012400423196845864,
5115
+ "loss": 1.361,
5116
+ "step": 4250
5117
+ },
5118
+ {
5119
+ "epoch": 15.82,
5120
+ "learning_rate": 0.00012627058326621316,
5121
+ "loss": 1.542,
5122
+ "step": 4255
5123
+ },
5124
+ {
5125
+ "epoch": 15.84,
5126
+ "learning_rate": 0.0001283403424341258,
5127
+ "loss": 1.4983,
5128
+ "step": 4260
5129
+ },
5130
+ {
5131
+ "epoch": 15.86,
5132
+ "learning_rate": 0.00013020645391041629,
5133
+ "loss": 1.4985,
5134
+ "step": 4265
5135
+ },
5136
+ {
5137
+ "epoch": 15.87,
5138
+ "learning_rate": 0.00013186255634396195,
5139
+ "loss": 1.4767,
5140
+ "step": 4270
5141
+ },
5142
+ {
5143
+ "epoch": 15.89,
5144
+ "learning_rate": 0.00013330300427938103,
5145
+ "loss": 1.4258,
5146
+ "step": 4275
5147
+ },
5148
+ {
5149
+ "epoch": 15.91,
5150
+ "learning_rate": 0.00013452288740171763,
5151
+ "loss": 1.4773,
5152
+ "step": 4280
5153
+ },
5154
+ {
5155
+ "epoch": 15.93,
5156
+ "learning_rate": 0.00013551804727511717,
5157
+ "loss": 1.462,
5158
+ "step": 4285
5159
+ },
5160
+ {
5161
+ "epoch": 15.95,
5162
+ "learning_rate": 0.0001362850915184393,
5163
+ "loss": 1.4688,
5164
+ "step": 4290
5165
+ },
5166
+ {
5167
+ "epoch": 15.97,
5168
+ "learning_rate": 0.00013682140536947865,
5169
+ "loss": 1.5146,
5170
+ "step": 4295
5171
+ },
5172
+ {
5173
+ "epoch": 15.99,
5174
+ "learning_rate": 0.00013712516059837763,
5175
+ "loss": 1.5462,
5176
+ "step": 4300
5177
+ },
5178
+ {
5179
+ "epoch": 16.0,
5180
+ "eval_loss": 1.3411859273910522,
5181
+ "eval_runtime": 18.5136,
5182
+ "eval_samples_per_second": 20.85,
5183
+ "eval_steps_per_second": 2.647,
5184
+ "step": 4304
5185
+ },
5186
+ {
5187
+ "epoch": 16.0,
5188
+ "learning_rate": 0.00013719532173984305,
5189
+ "loss": 1.3395,
5190
+ "step": 4305
5191
+ },
5192
+ {
5193
+ "epoch": 16.02,
5194
+ "learning_rate": 0.00013703164962292424,
5195
+ "loss": 1.3995,
5196
+ "step": 4310
5197
+ },
5198
+ {
5199
+ "epoch": 16.04,
5200
+ "learning_rate": 0.00013663470218631772,
5201
+ "loss": 1.4118,
5202
+ "step": 4315
5203
+ },
5204
+ {
5205
+ "epoch": 16.06,
5206
+ "learning_rate": 0.00013600583257642132,
5207
+ "loss": 1.3778,
5208
+ "step": 4320
5209
+ },
5210
+ {
5211
+ "epoch": 16.08,
5212
+ "learning_rate": 0.00013514718453461912,
5213
+ "loss": 1.4416,
5214
+ "step": 4325
5215
+ },
5216
+ {
5217
+ "epoch": 16.1,
5218
+ "learning_rate": 0.0001340616850895236,
5219
+ "loss": 1.4926,
5220
+ "step": 4330
5221
+ },
5222
+ {
5223
+ "epoch": 16.12,
5224
+ "learning_rate": 0.00013275303457908525,
5225
+ "loss": 1.4668,
5226
+ "step": 4335
5227
+ },
5228
+ {
5229
+ "epoch": 16.13,
5230
+ "learning_rate": 0.00013122569403658038,
5231
+ "loss": 1.3931,
5232
+ "step": 4340
5233
+ },
5234
+ {
5235
+ "epoch": 16.15,
5236
+ "learning_rate": 0.00012948486998348453,
5237
+ "loss": 1.403,
5238
+ "step": 4345
5239
+ },
5240
+ {
5241
+ "epoch": 16.17,
5242
+ "learning_rate": 0.0001275364966810606,
5243
+ "loss": 1.3802,
5244
+ "step": 4350
5245
+ },
5246
+ {
5247
+ "epoch": 16.19,
5248
+ "learning_rate": 0.00012538721590117088,
5249
+ "loss": 1.429,
5250
+ "step": 4355
5251
+ },
5252
+ {
5253
+ "epoch": 16.21,
5254
+ "learning_rate": 0.00012304435428527134,
5255
+ "loss": 1.4773,
5256
+ "step": 4360
5257
+ },
5258
+ {
5259
+ "epoch": 16.23,
5260
+ "learning_rate": 0.00012051589836876666,
5261
+ "loss": 1.3717,
5262
+ "step": 4365
5263
+ },
5264
+ {
5265
+ "epoch": 16.25,
5266
+ "learning_rate": 0.00011781046735586077,
5267
+ "loss": 1.4166,
5268
+ "step": 4370
5269
+ },
5270
+ {
5271
+ "epoch": 16.26,
5272
+ "learning_rate": 0.00011493728373772612,
5273
+ "loss": 1.432,
5274
+ "step": 4375
5275
+ },
5276
+ {
5277
+ "epoch": 16.28,
5278
+ "learning_rate": 0.00011190614185412497,
5279
+ "loss": 1.4722,
5280
+ "step": 4380
5281
+ },
5282
+ {
5283
+ "epoch": 16.3,
5284
+ "learning_rate": 0.00010872737450568259,
5285
+ "loss": 1.3411,
5286
+ "step": 4385
5287
+ },
5288
+ {
5289
+ "epoch": 16.32,
5290
+ "learning_rate": 0.00010541181773059928,
5291
+ "loss": 1.4268,
5292
+ "step": 4390
5293
+ },
5294
+ {
5295
+ "epoch": 16.34,
5296
+ "learning_rate": 0.00010197077386589103,
5297
+ "loss": 1.4257,
5298
+ "step": 4395
5299
+ },
5300
+ {
5301
+ "epoch": 16.36,
5302
+ "learning_rate": 9.841597301907411e-05,
5303
+ "loss": 1.4367,
5304
+ "step": 4400
5305
+ },
5306
+ {
5307
+ "epoch": 16.38,
5308
+ "learning_rate": 9.475953308163089e-05,
5309
+ "loss": 1.388,
5310
+ "step": 4405
5311
+ },
5312
+ {
5313
+ "epoch": 16.39,
5314
+ "learning_rate": 9.101391842055883e-05,
5315
+ "loss": 1.4486,
5316
+ "step": 4410
5317
+ },
5318
+ {
5319
+ "epoch": 16.41,
5320
+ "learning_rate": 8.719189738884117e-05,
5321
+ "loss": 1.3824,
5322
+ "step": 4415
5323
+ },
5324
+ {
5325
+ "epoch": 16.43,
5326
+ "learning_rate": 8.330649879965051e-05,
5327
+ "loss": 1.4313,
5328
+ "step": 4420
5329
+ },
5330
+ {
5331
+ "epoch": 16.45,
5332
+ "learning_rate": 7.937096751268169e-05,
5333
+ "loss": 1.3933,
5334
+ "step": 4425
5335
+ },
5336
+ {
5337
+ "epoch": 16.47,
5338
+ "learning_rate": 7.539871928400956e-05,
5339
+ "loss": 1.4352,
5340
+ "step": 4430
5341
+ },
5342
+ {
5343
+ "epoch": 16.49,
5344
+ "learning_rate": 7.140329503337758e-05,
5345
+ "loss": 1.4244,
5346
+ "step": 4435
5347
+ },
5348
+ {
5349
+ "epoch": 16.51,
5350
+ "learning_rate": 6.739831468481779e-05,
5351
+ "loss": 1.4062,
5352
+ "step": 4440
5353
+ },
5354
+ {
5355
+ "epoch": 16.52,
5356
+ "learning_rate": 6.33974307379626e-05,
5357
+ "loss": 1.3753,
5358
+ "step": 4445
5359
+ },
5360
+ {
5361
+ "epoch": 16.54,
5362
+ "learning_rate": 5.94142817282949e-05,
5363
+ "loss": 1.3918,
5364
+ "step": 4450
5365
+ },
5366
+ {
5367
+ "epoch": 16.56,
5368
+ "learning_rate": 5.546244573501996e-05,
5369
+ "loss": 1.423,
5370
+ "step": 4455
5371
+ },
5372
+ {
5373
+ "epoch": 16.58,
5374
+ "learning_rate": 5.155539409500841e-05,
5375
+ "loss": 1.4141,
5376
+ "step": 4460
5377
+ },
5378
+ {
5379
+ "epoch": 16.6,
5380
+ "learning_rate": 4.7706445480618974e-05,
5381
+ "loss": 1.4364,
5382
+ "step": 4465
5383
+ },
5384
+ {
5385
+ "epoch": 16.62,
5386
+ "learning_rate": 4.3928720497937174e-05,
5387
+ "loss": 1.405,
5388
+ "step": 4470
5389
+ },
5390
+ {
5391
+ "epoch": 16.64,
5392
+ "learning_rate": 4.02350969601972e-05,
5393
+ "loss": 1.4752,
5394
+ "step": 4475
5395
+ },
5396
+ {
5397
+ "epoch": 16.65,
5398
+ "learning_rate": 3.663816598884848e-05,
5399
+ "loss": 1.4515,
5400
+ "step": 4480
5401
+ },
5402
+ {
5403
+ "epoch": 16.67,
5404
+ "learning_rate": 3.315018909193563e-05,
5405
+ "loss": 1.4503,
5406
+ "step": 4485
5407
+ },
5408
+ {
5409
+ "epoch": 16.69,
5410
+ "learning_rate": 2.9783056366075814e-05,
5411
+ "loss": 1.3878,
5412
+ "step": 4490
5413
+ },
5414
+ {
5415
+ "epoch": 16.71,
5416
+ "learning_rate": 2.6548245964540616e-05,
5417
+ "loss": 1.3826,
5418
+ "step": 4495
5419
+ },
5420
+ {
5421
+ "epoch": 16.73,
5422
+ "learning_rate": 2.345678496960497e-05,
5423
+ "loss": 1.3709,
5424
+ "step": 4500
5425
+ },
5426
+ {
5427
+ "epoch": 16.75,
5428
+ "learning_rate": 2.051921180253764e-05,
5429
+ "loss": 1.4434,
5430
+ "step": 4505
5431
+ },
5432
+ {
5433
+ "epoch": 16.77,
5434
+ "learning_rate": 1.774554029938429e-05,
5435
+ "loss": 1.4217,
5436
+ "step": 4510
5437
+ },
5438
+ {
5439
+ "epoch": 16.78,
5440
+ "learning_rate": 1.5145225574996895e-05,
5441
+ "loss": 1.3259,
5442
+ "step": 4515
5443
+ },
5444
+ {
5445
+ "epoch": 16.8,
5446
+ "learning_rate": 1.272713179167218e-05,
5447
+ "loss": 1.3681,
5448
+ "step": 4520
5449
+ },
5450
+ {
5451
+ "epoch": 16.82,
5452
+ "learning_rate": 1.0499501942287456e-05,
5453
+ "loss": 1.3708,
5454
+ "step": 4525
5455
+ },
5456
+ {
5457
+ "epoch": 16.84,
5458
+ "learning_rate": 8.469929750918058e-06,
5459
+ "loss": 1.4352,
5460
+ "step": 4530
5461
+ },
5462
+ {
5463
+ "epoch": 16.86,
5464
+ "learning_rate": 6.6453337867398825e-06,
5465
+ "loss": 1.4355,
5466
+ "step": 4535
5467
+ },
5468
+ {
5469
+ "epoch": 16.88,
5470
+ "learning_rate": 5.031933879454651e-06,
5471
+ "loss": 1.4338,
5472
+ "step": 4540
5473
+ },
5474
+ {
5475
+ "epoch": 16.9,
5476
+ "learning_rate": 3.6352299166325223e-06,
5477
+ "loss": 1.3822,
5478
+ "step": 4545
5479
+ },
5480
+ {
5481
+ "epoch": 16.91,
5482
+ "learning_rate": 2.459983095251791e-06,
5483
+ "loss": 1.3442,
5484
+ "step": 4550
5485
+ },
5486
+ {
5487
+ "epoch": 16.93,
5488
+ "learning_rate": 1.5101996913488535e-06,
5489
+ "loss": 1.356,
5490
+ "step": 4555
5491
+ },
5492
+ {
5493
+ "epoch": 16.95,
5494
+ "learning_rate": 7.891174030992353e-07,
5495
+ "loss": 1.3681,
5496
+ "step": 4560
5497
+ },
5498
+ {
5499
+ "epoch": 16.97,
5500
+ "learning_rate": 2.991943138937121e-07,
5501
+ "loss": 1.3964,
5502
+ "step": 4565
5503
+ },
5504
+ {
5505
+ "epoch": 16.99,
5506
+ "learning_rate": 4.2100513024036057e-08,
5507
+ "loss": 1.4004,
5508
+ "step": 4570
5509
+ },
5510
+ {
5511
+ "epoch": 17.0,
5512
+ "eval_loss": 1.3110859394073486,
5513
+ "eval_runtime": 18.5084,
5514
+ "eval_samples_per_second": 20.855,
5515
+ "eval_steps_per_second": 2.647,
5516
+ "step": 4573
5517
  }
5518
  ],
5519
+ "max_steps": 6994,
5520
+ "num_train_epochs": 26,
5521
+ "total_flos": 4776548990976000.0,
5522
  "trial_name": null,
5523
  "trial_params": null
5524
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa22e711f18ef9c494e076522c9fc6c23c5e5db9e44a9588da998b4d53d50ce7
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c111611d0270c2f70ae5a43dbcc59025e6535cde71a5fbf71689de64f457d265
3
  size 2991