Adapting `google-bert/bert-large-uncased` for `swag`.

Browse files

Files changed (9) hide show

README.md +108 -0
adapter_config.json +29 -0
adapter_model.safetensors +3 -0
runs/Aug25_23-56-59_b007ed356559/events.out.tfevents.1724630220.b007ed356559.2046.0 +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +55 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,108 @@

+---
+base_model: google-bert/bert-large-uncased
+library_name: peft
+license: apache-2.0
+metrics:
+- accuracy
+tags:
+- trl
+- sft
+- generated_from_trainer
+model-index:
+- name: bert-large-uncased-swag
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# bert-large-uncased-swag
+This model is a fine-tuned version of [google-bert/bert-large-uncased](https://huggingface.co/google-bert/bert-large-uncased) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4643
+- Accuracy: 0.8295
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 5
+### Training results
+| Training Loss | Epoch  | Step  | Validation Loss | Accuracy |
+|:-------------:|:------:|:-----:|:---------------:|:--------:|
+| 1.2132        | 0.1088 | 500   | 0.8717          | 0.6959   |
+| 0.908         | 0.2175 | 1000  | 0.7149          | 0.7473   |
+| 0.8353        | 0.3263 | 1500  | 0.6474          | 0.7575   |
+| 0.8075        | 0.4351 | 2000  | 0.6142          | 0.7798   |
+| 0.8011        | 0.5438 | 2500  | 0.5785          | 0.7867   |
+| 0.7727        | 0.6526 | 3000  | 0.5643          | 0.7936   |
+| 0.7647        | 0.7614 | 3500  | 0.5698          | 0.7956   |
+| 0.7731        | 0.8701 | 4000  | 0.5453          | 0.8011   |
+| 0.7489        | 0.9789 | 4500  | 0.5336          | 0.8052   |
+| 0.7496        | 1.0877 | 5000  | 0.5431          | 0.8033   |
+| 0.735         | 1.1964 | 5500  | 0.5231          | 0.8083   |
+| 0.7194        | 1.3052 | 6000  | 0.5147          | 0.8096   |
+| 0.7307        | 1.4140 | 6500  | 0.5102          | 0.8112   |
+| 0.7355        | 1.5227 | 7000  | 0.5223          | 0.8133   |
+| 0.7085        | 1.6315 | 7500  | 0.5054          | 0.8142   |
+| 0.7206        | 1.7403 | 8000  | 0.5026          | 0.8157   |
+| 0.7143        | 1.8490 | 8500  | 0.5126          | 0.8144   |
+| 0.7045        | 1.9578 | 9000  | 0.5035          | 0.8162   |
+| 0.6972        | 2.0666 | 9500  | 0.4948          | 0.8178   |
+| 0.6885        | 2.1753 | 10000 | 0.4890          | 0.8202   |
+| 0.7079        | 2.2841 | 10500 | 0.4910          | 0.8193   |
+| 0.6874        | 2.3929 | 11000 | 0.4907          | 0.8222   |
+| 0.6832        | 2.5016 | 11500 | 0.4875          | 0.8217   |
+| 0.6807        | 2.6104 | 12000 | 0.4824          | 0.8224   |
+| 0.6865        | 2.7192 | 12500 | 0.4877          | 0.8227   |
+| 0.6863        | 2.8279 | 13000 | 0.4821          | 0.8232   |
+| 0.6913        | 2.9367 | 13500 | 0.4914          | 0.8229   |
+| 0.6996        | 3.0455 | 14000 | 0.4843          | 0.8241   |
+| 0.687         | 3.1542 | 14500 | 0.4753          | 0.8250   |
+| 0.6896        | 3.2630 | 15000 | 0.4762          | 0.8251   |
+| 0.6745        | 3.3718 | 15500 | 0.4753          | 0.8242   |
+| 0.6735        | 3.4805 | 16000 | 0.4713          | 0.8267   |
+| 0.6764        | 3.5893 | 16500 | 0.4715          | 0.8259   |
+| 0.6521        | 3.6981 | 17000 | 0.4669          | 0.8285   |
+| 0.6686        | 3.8068 | 17500 | 0.4726          | 0.8269   |
+| 0.6721        | 3.9156 | 18000 | 0.4703          | 0.8273   |
+| 0.6682        | 4.0244 | 18500 | 0.4660          | 0.8274   |
+| 0.6533        | 4.1331 | 19000 | 0.4690          | 0.8281   |
+| 0.6547        | 4.2419 | 19500 | 0.4697          | 0.8282   |
+| 0.6589        | 4.3507 | 20000 | 0.4640          | 0.8291   |
+| 0.6518        | 4.4594 | 20500 | 0.4638          | 0.8294   |
+| 0.6739        | 4.5682 | 21000 | 0.4669          | 0.8285   |
+| 0.6763        | 4.6770 | 21500 | 0.4628          | 0.8304   |
+| 0.6503        | 4.7857 | 22000 | 0.4640          | 0.8296   |
+| 0.6659        | 4.8945 | 22500 | 0.4643          | 0.8295   |
+### Framework versions
+- PEFT 0.12.1.dev0
+- Transformers 4.45.0.dev0
+- Pytorch 2.3.1+cu121
+- Datasets 2.21.0
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google-bert/bert-large-uncased",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query",
+    "value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1faef6ed1fe00917ee1e3fe9383c3306e9b9f90ebc854bb8c2c4230f16103d5a
+size 3159288

runs/Aug25_23-56-59_b007ed356559/events.out.tfevents.1724630220.b007ed356559.2046.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca3d172de0128cd1c4b1a340677089597c35ec4aa29a79197cac7e08d0d2dd43
+size 29770

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4b7e58b17ab5e76d84a661fa5c93ca0a43ac52bdcbce01c5669792108176056
+size 5432

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff