nbtpj commited on
Commit
3d71eca
·
1 Parent(s): 5c6665a

Training in progress, step 7500

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26877d744980b431b76d3e2c4397501813f741bfd4d56d3d01913dd7a72edf84
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7993740ed5e437b04353de4ecb526bd9918a96a4e9eaf258d67af6f961bd6cf
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3788c718c873a5a631cbd6930b47d0c500358621cac639d4e9bc55c21520493c
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97db51ff697a7e7e17772ffa00db91d65d9b5000341e4cb4899d26c9fe90c8e2
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e580d1df17d3fc5663dd4d2a79e58e94c4e8b7aa63ccfd53d9b69366e2d1f1a
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faec401c00d51234b05bffba3c32fc98159e150f1de70dde51eaeeef3f15563a
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c57b88e1b4f27dad7565a46df4569bd671ad83f261df06872ce384b18b5f02c6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22490afe27492170ee18653712e55a82f1d2a2adbc61025e6296be8d30c2663
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.574514535217741,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -306,11 +306,161 @@
306
  "learning_rate": 3.404187412230308e-05,
307
  "loss": 2.5644,
308
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  }
310
  ],
311
  "max_steps": 15666,
312
  "num_train_epochs": 2,
313
- "total_flos": 1.976059637121024e+16,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8617718028266115,
5
+ "global_step": 7500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
306
  "learning_rate": 3.404187412230308e-05,
307
  "loss": 2.5644,
308
  "step": 5000
309
+ },
310
+ {
311
+ "epoch": 0.59,
312
+ "learning_rate": 3.372271160474914e-05,
313
+ "loss": 2.5206,
314
+ "step": 5100
315
+ },
316
+ {
317
+ "epoch": 0.6,
318
+ "learning_rate": 3.3403549087195205e-05,
319
+ "loss": 2.5305,
320
+ "step": 5200
321
+ },
322
+ {
323
+ "epoch": 0.61,
324
+ "learning_rate": 3.3084386569641264e-05,
325
+ "loss": 2.462,
326
+ "step": 5300
327
+ },
328
+ {
329
+ "epoch": 0.62,
330
+ "learning_rate": 3.2765224052087324e-05,
331
+ "loss": 2.4704,
332
+ "step": 5400
333
+ },
334
+ {
335
+ "epoch": 0.63,
336
+ "learning_rate": 3.244606153453338e-05,
337
+ "loss": 2.5133,
338
+ "step": 5500
339
+ },
340
+ {
341
+ "epoch": 0.64,
342
+ "learning_rate": 3.212689901697944e-05,
343
+ "loss": 2.4805,
344
+ "step": 5600
345
+ },
346
+ {
347
+ "epoch": 0.65,
348
+ "learning_rate": 3.180773649942551e-05,
349
+ "loss": 2.4572,
350
+ "step": 5700
351
+ },
352
+ {
353
+ "epoch": 0.67,
354
+ "learning_rate": 3.1488573981871575e-05,
355
+ "loss": 2.4128,
356
+ "step": 5800
357
+ },
358
+ {
359
+ "epoch": 0.68,
360
+ "learning_rate": 3.1169411464317634e-05,
361
+ "loss": 2.4577,
362
+ "step": 5900
363
+ },
364
+ {
365
+ "epoch": 0.69,
366
+ "learning_rate": 3.0850248946763694e-05,
367
+ "loss": 2.5539,
368
+ "step": 6000
369
+ },
370
+ {
371
+ "epoch": 0.7,
372
+ "learning_rate": 3.053108642920975e-05,
373
+ "loss": 2.472,
374
+ "step": 6100
375
+ },
376
+ {
377
+ "epoch": 0.71,
378
+ "learning_rate": 3.0211923911655816e-05,
379
+ "loss": 2.4468,
380
+ "step": 6200
381
+ },
382
+ {
383
+ "epoch": 0.72,
384
+ "learning_rate": 2.9892761394101875e-05,
385
+ "loss": 2.4831,
386
+ "step": 6300
387
+ },
388
+ {
389
+ "epoch": 0.74,
390
+ "learning_rate": 2.957359887654794e-05,
391
+ "loss": 2.4093,
392
+ "step": 6400
393
+ },
394
+ {
395
+ "epoch": 0.75,
396
+ "learning_rate": 2.9254436358994004e-05,
397
+ "loss": 2.4683,
398
+ "step": 6500
399
+ },
400
+ {
401
+ "epoch": 0.76,
402
+ "learning_rate": 2.8935273841440063e-05,
403
+ "loss": 2.4388,
404
+ "step": 6600
405
+ },
406
+ {
407
+ "epoch": 0.77,
408
+ "learning_rate": 2.8616111323886126e-05,
409
+ "loss": 2.3558,
410
+ "step": 6700
411
+ },
412
+ {
413
+ "epoch": 0.78,
414
+ "learning_rate": 2.8296948806332185e-05,
415
+ "loss": 2.4788,
416
+ "step": 6800
417
+ },
418
+ {
419
+ "epoch": 0.79,
420
+ "learning_rate": 2.7977786288778245e-05,
421
+ "loss": 2.4523,
422
+ "step": 6900
423
+ },
424
+ {
425
+ "epoch": 0.8,
426
+ "learning_rate": 2.7658623771224308e-05,
427
+ "loss": 2.4759,
428
+ "step": 7000
429
+ },
430
+ {
431
+ "epoch": 0.82,
432
+ "learning_rate": 2.7339461253670367e-05,
433
+ "loss": 2.3574,
434
+ "step": 7100
435
+ },
436
+ {
437
+ "epoch": 0.83,
438
+ "learning_rate": 2.7020298736116433e-05,
439
+ "loss": 2.4569,
440
+ "step": 7200
441
+ },
442
+ {
443
+ "epoch": 0.84,
444
+ "learning_rate": 2.6701136218562496e-05,
445
+ "loss": 2.4078,
446
+ "step": 7300
447
+ },
448
+ {
449
+ "epoch": 0.85,
450
+ "learning_rate": 2.6381973701008555e-05,
451
+ "loss": 2.3948,
452
+ "step": 7400
453
+ },
454
+ {
455
+ "epoch": 0.86,
456
+ "learning_rate": 2.6062811183454615e-05,
457
+ "loss": 2.5496,
458
+ "step": 7500
459
  }
460
  ],
461
  "max_steps": 15666,
462
  "num_train_epochs": 2,
463
+ "total_flos": 2.963322045186048e+16,
464
  "trial_name": null,
465
  "trial_params": null
466
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3788c718c873a5a631cbd6930b47d0c500358621cac639d4e9bc55c21520493c
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97db51ff697a7e7e17772ffa00db91d65d9b5000341e4cb4899d26c9fe90c8e2
3
  size 557969145
runs/Jan01_04-28-26_6ea5fb75c867/events.out.tfevents.1672547319.6ea5fb75c867.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1294decc8c9ef75cd30d8852ab459f4a1f68f6df2d4da45e86410f825db4f1d7
3
- size 12685
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c102e25ecb6796a1e93092de6658a83da616eed41abb3c0f7da8764fe5c465e
3
+ size 16610