Yannis98 commited on Aug 8, 2024

Commit

a96a96b

verified ·

1 Parent(s): 35b6739

albert

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
README.md +55 -0
all_results.json +26 -0
checkpoint-1000/config.json +33 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/special_tokens_map.json +15 -0
checkpoint-1000/tokenizer.json +0 -0
checkpoint-1000/tokenizer_config.json +57 -0
checkpoint-1000/trainer_state.json +47 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-2000/config.json +33 -0
checkpoint-2000/model.safetensors +3 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/rng_state.pth +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/special_tokens_map.json +15 -0
checkpoint-2000/tokenizer.json +0 -0
checkpoint-2000/tokenizer_config.json +57 -0
checkpoint-2000/trainer_state.json +61 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-3000/config.json +33 -0
checkpoint-3000/model.safetensors +3 -0
checkpoint-3000/optimizer.pt +3 -0
checkpoint-3000/rng_state.pth +3 -0
checkpoint-3000/scheduler.pt +3 -0
checkpoint-3000/special_tokens_map.json +15 -0
checkpoint-3000/tokenizer.json +0 -0
checkpoint-3000/tokenizer_config.json +57 -0
checkpoint-3000/trainer_state.json +75 -0
checkpoint-3000/training_args.bin +3 -0
checkpoint-4000/config.json +33 -0
checkpoint-4000/model.safetensors +3 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/rng_state.pth +3 -0
checkpoint-4000/scheduler.pt +3 -0
checkpoint-4000/special_tokens_map.json +15 -0
checkpoint-4000/tokenizer.json +0 -0
checkpoint-4000/tokenizer_config.json +57 -0
checkpoint-4000/trainer_state.json +89 -0
checkpoint-4000/training_args.bin +3 -0
checkpoint-4124/config.json +33 -0
checkpoint-4124/model.safetensors +3 -0
checkpoint-4124/optimizer.pt +3 -0
checkpoint-4124/rng_state.pth +3 -0
checkpoint-4124/scheduler.pt +3 -0
checkpoint-4124/special_tokens_map.json +15 -0
checkpoint-4124/tokenizer.json +0 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 Base_Albert/eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 Base_Albert/eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
+eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+---
+license: apache-2.0
+base_model: albert/albert-base-v2
+tags:
+- generated_from_trainer
+datasets:
+- squad_v2
+model-index:
+- name: Base_Albert
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Base_Albert
+This model is a fine-tuned version of [albert/albert-base-v2](https://huggingface.co/albert/albert-base-v2) on the squad_v2 dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 64
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 2.0
+### Training results
+### Framework versions
+- Transformers 4.45.0.dev0
+- Pytorch 2.2.2+cu121
+- Datasets 2.19.2
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "epoch": 2.0,
+    "eval_HasAns_exact": 75.37112010796221,
+    "eval_HasAns_f1": 81.67986330801969,
+    "eval_HasAns_total": 5928,
+    "eval_NoAns_exact": 82.70815811606391,
+    "eval_NoAns_f1": 82.70815811606391,
+    "eval_NoAns_total": 5945,
+    "eval_best_exact": 79.04489177124569,
+    "eval_best_exact_thresh": 0.0,
+    "eval_best_f1": 82.1947468786271,
+    "eval_best_f1_thresh": 0.0,
+    "eval_exact": 79.04489177124569,
+    "eval_f1": 82.19474687862723,
+    "eval_runtime": 79.7911,
+    "eval_samples": 12171,
+    "eval_samples_per_second": 152.536,
+    "eval_steps_per_second": 19.075,
+    "eval_total": 11873,
+    "total_flos": 4371201741330432.0,
+    "train_loss": 0.9245093981116634,
+    "train_runtime": 4409.6844,
+    "train_samples": 131958,
+    "train_samples_per_second": 59.849,
+    "train_steps_per_second": 0.935
+}

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "albert/albert-base-v2",
+  "architectures": [
+    "AlbertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0.dev0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b396361671289ea35df62be5851b96a2f201fb9359090142c33091fe703594e
+size 44381360

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bed26f962d92a3cd7b562d68e97637db084d21caf6307d7495eff9b015bbc00b
+size 88777619

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:960a69c76f4510e5d359f1f8ed0b1d8d5a6a5e2293a5f059697691e42aa1ff31
+size 14244

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81c66296607eae32dd03cf616c1ee454bed866dc0c297c973f719f647f45b77a
+size 1064

checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "unk_token": "<unk>"
+}

checkpoint-1000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "keep_accents": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "remove_space": true,
+  "sep_token": "[SEP]",
+  "tokenizer_class": "AlbertTokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.48496605237633367,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.24248302618816683,
+      "grad_norm": 22.99319839477539,
+      "learning_rate": 2.929668552950687e-05,
+      "loss": 1.9432,
+      "step": 500
+    },
+    {
+      "epoch": 0.48496605237633367,
+      "grad_norm": 29.776500701904297,
+      "learning_rate": 2.5254648342764755e-05,
+      "loss": 1.0335,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 4124,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1060022550528000.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
+size 5240

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "albert/albert-base-v2",
+  "architectures": [
+    "AlbertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0.dev0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

checkpoint-2000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6bcc75b4f409bc7511d2c105aeb79e9ac9152669577265b8c89f9d6db52741a
+size 44381360

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0b92639b4b0018204eb041b6eb71864a3da250b6dd42f82d7d785bf411c8fbf
+size 88777619

checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:960a69c76f4510e5d359f1f8ed0b1d8d5a6a5e2293a5f059697691e42aa1ff31
+size 14244

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0db1ca8ff12dbe618a66e2b8dca8235d001ac2257fb93e96b7781b32c6fd5305
+size 1064

checkpoint-2000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "unk_token": "<unk>"
+}

checkpoint-2000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-2000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "keep_accents": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "remove_space": true,
+  "sep_token": "[SEP]",
+  "tokenizer_class": "AlbertTokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9699321047526673,
+  "eval_steps": 500,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.24248302618816683,
+      "grad_norm": 22.99319839477539,
+      "learning_rate": 2.929668552950687e-05,
+      "loss": 1.9432,
+      "step": 500
+    },
+    {
+      "epoch": 0.48496605237633367,
+      "grad_norm": 29.776500701904297,
+      "learning_rate": 2.5254648342764755e-05,
+      "loss": 1.0335,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7274490785645005,
+      "grad_norm": 17.826507568359375,
+      "learning_rate": 2.1212611156022636e-05,
+      "loss": 0.9314,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9699321047526673,
+      "grad_norm": 16.028316497802734,
+      "learning_rate": 1.7170573969280517e-05,
+      "loss": 0.875,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 4124,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2120045101056000.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
+size 5240

checkpoint-3000/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "albert/albert-base-v2",
+  "architectures": [
+    "AlbertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0.dev0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

checkpoint-3000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84f4b6c26206d9596468fd953d25757798d0baa3a572cc57cebf9ee2d9efae22
+size 44381360

checkpoint-3000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fcd6a270702b70468e31ce242284cc1e98251471318f2dd4b53908bd0ddbcca
+size 88777619

checkpoint-3000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba0bbd22f1de48c7afcee33d3a889b6e69d5787af2f52b8b3e85ebdaa6e9705
+size 14244

checkpoint-3000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c45a1cd2cf6182f86dab32b38f81700fabc02c33f71f34969929416febf2538e
+size 1064

checkpoint-3000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "unk_token": "<unk>"
+}

checkpoint-3000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-3000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "keep_accents": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "remove_space": true,
+  "sep_token": "[SEP]",
+  "tokenizer_class": "AlbertTokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-3000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.454898157129001,
+  "eval_steps": 500,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.24248302618816683,
+      "grad_norm": 22.99319839477539,
+      "learning_rate": 2.929668552950687e-05,
+      "loss": 1.9432,
+      "step": 500
+    },
+    {
+      "epoch": 0.48496605237633367,
+      "grad_norm": 29.776500701904297,
+      "learning_rate": 2.5254648342764755e-05,
+      "loss": 1.0335,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7274490785645005,
+      "grad_norm": 17.826507568359375,
+      "learning_rate": 2.1212611156022636e-05,
+      "loss": 0.9314,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9699321047526673,
+      "grad_norm": 16.028316497802734,
+      "learning_rate": 1.7170573969280517e-05,
+      "loss": 0.875,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2124151309408342,
+      "grad_norm": 27.188560485839844,
+      "learning_rate": 1.31285367825384e-05,
+      "loss": 0.7128,
+      "step": 2500
+    },
+    {
+      "epoch": 1.454898157129001,
+      "grad_norm": 15.629920959472656,
+      "learning_rate": 9.086499595796281e-06,
+      "loss": 0.6671,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 4124,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3179902023060480.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
+size 5240

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "albert/albert-base-v2",
+  "architectures": [
+    "AlbertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0.dev0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

checkpoint-4000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc3543f8064c55082b743de7567800b19a1f3e96d847a83a7ab2af846dc34fac
+size 44381360

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce87d63c7a09078195a45eda009c6042f28baa6698c32ce8f96eb9a91aa6339f
+size 88777619

checkpoint-4000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba0bbd22f1de48c7afcee33d3a889b6e69d5787af2f52b8b3e85ebdaa6e9705
+size 14244

checkpoint-4000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ba6269d6b3fd350746b0f383afc9648ce09cd2ff349a4514e17e2cae44591e6
+size 1064

checkpoint-4000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "unk_token": "<unk>"
+}

checkpoint-4000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "keep_accents": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "remove_space": true,
+  "sep_token": "[SEP]",
+  "tokenizer_class": "AlbertTokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-4000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9398642095053347,
+  "eval_steps": 500,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.24248302618816683,
+      "grad_norm": 22.99319839477539,
+      "learning_rate": 2.929668552950687e-05,
+      "loss": 1.9432,
+      "step": 500
+    },
+    {
+      "epoch": 0.48496605237633367,
+      "grad_norm": 29.776500701904297,
+      "learning_rate": 2.5254648342764755e-05,
+      "loss": 1.0335,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7274490785645005,
+      "grad_norm": 17.826507568359375,
+      "learning_rate": 2.1212611156022636e-05,
+      "loss": 0.9314,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9699321047526673,
+      "grad_norm": 16.028316497802734,
+      "learning_rate": 1.7170573969280517e-05,
+      "loss": 0.875,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2124151309408342,
+      "grad_norm": 27.188560485839844,
+      "learning_rate": 1.31285367825384e-05,
+      "loss": 0.7128,
+      "step": 2500
+    },
+    {
+      "epoch": 1.454898157129001,
+      "grad_norm": 15.629920959472656,
+      "learning_rate": 9.086499595796281e-06,
+      "loss": 0.6671,
+      "step": 3000
+    },
+    {
+      "epoch": 1.6973811833171677,
+      "grad_norm": 15.22022819519043,
+      "learning_rate": 5.044462409054164e-06,
+      "loss": 0.6666,
+      "step": 3500
+    },
+    {
+      "epoch": 1.9398642095053347,
+      "grad_norm": 21.28203582763672,
+      "learning_rate": 1.0024252223120454e-06,
+      "loss": 0.641,
+      "step": 4000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 4124,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4239924573588480.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f78188d53f32e51fbfc484f90e61efc40bb61dc887520af92922483998ad43
+size 5240

checkpoint-4124/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "albert/albert-base-v2",
+  "architectures": [
+    "AlbertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0.dev0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

checkpoint-4124/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:949037f5b37793e09ab2f2f7eeddd0592de75c2aa8f34f3ce8753b1c362c4aee
+size 44381360

checkpoint-4124/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8adf2c81f41b900f880bf12c51aaf821848b0e5b3d83bbc524e51244ff0b0aed
+size 88777619

checkpoint-4124/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba0bbd22f1de48c7afcee33d3a889b6e69d5787af2f52b8b3e85ebdaa6e9705
+size 14244

checkpoint-4124/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9caeddb926499794fec79c386599b77509588bd568b6405745c0bf9f348096b
+size 1064

checkpoint-4124/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "unk_token": "<unk>"
+}

checkpoint-4124/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff