Training in progress, step 2500

Browse files

Files changed (11) hide show

last-checkpoint/config.json +75 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/pytorch_model.bin +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/trainer_state.json +166 -0
last-checkpoint/training_args.bin +3 -0
pytorch_model.bin +1 -1
runs/Jan26_03-13-45_b3489f7155a5/1674703022.4698694/events.out.tfevents.1674703022.b3489f7155a5.24.1 +3 -0
runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0 +3 -0
training_args.bin +1 -1

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "_name_or_path": "facebook/bart-base",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.1",
+  "use_cache": true,
+  "vocab_size": 50265
+}

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db2a928e4b1018822ebfad8dd090db4a4ca977e1048920cbdc5a5f8704e2fe9e
+size 1115513717

last-checkpoint/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3452a933eb17a294bf3db537f645a47165dde167185e4924f97d560c8131bd9d
+size 557969145

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4df149674baccc6b8a44d91d6c61759ea1c40b882afaadf2a006deec964a28d0
+size 15459

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cab32727d494c2b0fc6dc98b7af308231f98d00a7e16d41acbb7fcea88aa97a1
+size 623

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.28712530148156656,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.968098002934984e-05,
+      "loss": 0.8326,
+      "step": 100
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.936196005869967e-05,
+      "loss": 0.8028,
+      "step": 200
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 4.9042940088049516e-05,
+      "loss": 0.7492,
+      "step": 300
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 4.8723920117399354e-05,
+      "loss": 0.7413,
+      "step": 400
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 4.840490014674919e-05,
+      "loss": 0.7788,
+      "step": 500
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 4.808588017609902e-05,
+      "loss": 0.6938,
+      "step": 600
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 4.776686020544887e-05,
+      "loss": 0.7157,
+      "step": 700
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 4.7447840234798705e-05,
+      "loss": 0.7606,
+      "step": 800
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4.7128820264148536e-05,
+      "loss": 0.7222,
+      "step": 900
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 4.6809800293498373e-05,
+      "loss": 0.6986,
+      "step": 1000
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 4.649078032284822e-05,
+      "loss": 0.7202,
+      "step": 1100
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 4.617176035219805e-05,
+      "loss": 0.7456,
+      "step": 1200
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 4.585274038154789e-05,
+      "loss": 0.6753,
+      "step": 1300
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 4.5533720410897725e-05,
+      "loss": 0.6983,
+      "step": 1400
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 4.521470044024756e-05,
+      "loss": 0.7113,
+      "step": 1500
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 4.48956804695974e-05,
+      "loss": 0.7059,
+      "step": 1600
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 4.457666049894724e-05,
+      "loss": 0.691,
+      "step": 1700
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 4.4257640528297076e-05,
+      "loss": 0.752,
+      "step": 1800
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 4.393862055764691e-05,
+      "loss": 0.681,
+      "step": 1900
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 4.361960058699675e-05,
+      "loss": 0.7501,
+      "step": 2000
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 4.330058061634659e-05,
+      "loss": 0.6956,
+      "step": 2100
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 4.298156064569642e-05,
+      "loss": 0.6996,
+      "step": 2200
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 4.266254067504626e-05,
+      "loss": 0.6891,
+      "step": 2300
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 4.2343520704396096e-05,
+      "loss": 0.7419,
+      "step": 2400
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 4.202450073374593e-05,
+      "loss": 0.6979,
+      "step": 2500
+    }
+  ],
+  "max_steps": 15673,
+  "num_train_epochs": 2,
+  "total_flos": 9884552050114560.0,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fab8dd6db5a19553c3856081555c13cd9dabcada806f1bf66328d790e5db97f1
+size 3439

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51feb8f9eb40ac13ea3bcbdaa38ed1fee7ea527683f56fba10d5981cf701327b
 size 557969145

 version https://git-lfs.github.com/spec/v1
+oid sha256:3452a933eb17a294bf3db537f645a47165dde167185e4924f97d560c8131bd9d
 size 557969145

runs/Jan26_03-13-45_b3489f7155a5/1674703022.4698694/events.out.tfevents.1674703022.b3489f7155a5.24.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eee88cfabf9ceb3ac4b7d29ac684d5dc17fe5ec955340265f903d12f857445cb
+size 5588

runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf0c7498ed35ceaf14cc4e42fea62f4e4f959e2201619dc946ad5b831a1f1d8f
+size 8760

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb5c03e02cabfaae5aac527cba1260f324dabff8089669a909b39dcb2a587966
 size 3439

 version https://git-lfs.github.com/spec/v1
+oid sha256:fab8dd6db5a19553c3856081555c13cd9dabcada806f1bf66328d790e5db97f1
 size 3439