sophonai/lora-test-distilbert-base-uncased

Browse files

Files changed (6) hide show

README.md +118 -68
adapter_config.json +30 -30
adapter_model.safetensors +1 -1
special_tokens_map.json +7 -7
tokenizer_config.json +56 -56
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,15 +1,15 @@
----
-base_model: distilbert-base-uncased
-library_name: peft
-license: apache-2.0
-metrics:
-- accuracy
-tags:
-- generated_from_trainer
-model-index:
-- name: distilbert-base-uncased-lora-text-classification
-  results: []
----
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.0845
-- Accuracy: {'accuracy': 0.884}
 ## Model description
@@ -44,68 +44,118 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 50
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Accuracy            |
-|:-------------:|:-----:|:----:|:---------------:|:-------------------:|
-| No log        | 1.0   | 125  | 0.3046          | {'accuracy': 0.89}  |
-| No log        | 2.0   | 250  | 0.3899          | {'accuracy': 0.853} |
-| No log        | 3.0   | 375  | 0.5440          | {'accuracy': 0.844} |
-| 0.2813        | 4.0   | 500  | 0.4597          | {'accuracy': 0.867} |
-| 0.2813        | 5.0   | 625  | 0.5280          | {'accuracy': 0.88}  |
-| 0.2813        | 6.0   | 750  | 0.5096          | {'accuracy': 0.877} |
-| 0.2813        | 7.0   | 875  | 0.6435          | {'accuracy': 0.881} |
-| 0.0793        | 8.0   | 1000 | 0.6281          | {'accuracy': 0.883} |
-| 0.0793        | 9.0   | 1125 | 0.7278          | {'accuracy': 0.868} |
-| 0.0793        | 10.0  | 1250 | 0.6827          | {'accuracy': 0.876} |
-| 0.0793        | 11.0  | 1375 | 0.8269          | {'accuracy': 0.86}  |
-| 0.0502        | 12.0  | 1500 | 0.7706          | {'accuracy': 0.866} |
-| 0.0502        | 13.0  | 1625 | 0.8552          | {'accuracy': 0.867} |
-| 0.0502        | 14.0  | 1750 | 1.0037          | {'accuracy': 0.872} |
-| 0.0502        | 15.0  | 1875 | 0.8915          | {'accuracy': 0.874} |
-| 0.0237        | 16.0  | 2000 | 0.7394          | {'accuracy': 0.882} |
-| 0.0237        | 17.0  | 2125 | 0.8335          | {'accuracy': 0.878} |
-| 0.0237        | 18.0  | 2250 | 1.0328          | {'accuracy': 0.875} |
-| 0.0237        | 19.0  | 2375 | 0.8622          | {'accuracy': 0.879} |
-| 0.0232        | 20.0  | 2500 | 0.8536          | {'accuracy': 0.872} |
-| 0.0232        | 21.0  | 2625 | 0.9345          | {'accuracy': 0.874} |
-| 0.0232        | 22.0  | 2750 | 0.9473          | {'accuracy': 0.876} |
-| 0.0232        | 23.0  | 2875 | 1.0162          | {'accuracy': 0.874} |
-| 0.0121        | 24.0  | 3000 | 0.9833          | {'accuracy': 0.868} |
-| 0.0121        | 25.0  | 3125 | 0.9889          | {'accuracy': 0.878} |
-| 0.0121        | 26.0  | 3250 | 0.9712          | {'accuracy': 0.874} |
-| 0.0121        | 27.0  | 3375 | 0.9586          | {'accuracy': 0.877} |
-| 0.0098        | 28.0  | 3500 | 0.9557          | {'accuracy': 0.875} |
-| 0.0098        | 29.0  | 3625 | 1.0105          | {'accuracy': 0.873} |
-| 0.0098        | 30.0  | 3750 | 0.9052          | {'accuracy': 0.869} |
-| 0.0098        | 31.0  | 3875 | 0.9571          | {'accuracy': 0.878} |
-| 0.0093        | 32.0  | 4000 | 1.0417          | {'accuracy': 0.871} |
-| 0.0093        | 33.0  | 4125 | 1.1209          | {'accuracy': 0.871} |
-| 0.0093        | 34.0  | 4250 | 1.1851          | {'accuracy': 0.869} |
-| 0.0093        | 35.0  | 4375 | 1.4009          | {'accuracy': 0.871} |
-| 0.0048        | 36.0  | 4500 | 1.1369          | {'accuracy': 0.879} |
-| 0.0048        | 37.0  | 4625 | 1.1375          | {'accuracy': 0.879} |
-| 0.0048        | 38.0  | 4750 | 1.1177          | {'accuracy': 0.877} |
-| 0.0048        | 39.0  | 4875 | 1.1163          | {'accuracy': 0.881} |
-| 0.0004        | 40.0  | 5000 | 1.1190          | {'accuracy': 0.874} |
-| 0.0004        | 41.0  | 5125 | 1.1133          | {'accuracy': 0.879} |
-| 0.0004        | 42.0  | 5250 | 1.1240          | {'accuracy': 0.878} |
-| 0.0004        | 43.0  | 5375 | 1.0499          | {'accuracy': 0.885} |
-| 0.0023        | 44.0  | 5500 | 1.0496          | {'accuracy': 0.884} |
-| 0.0023        | 45.0  | 5625 | 1.0538          | {'accuracy': 0.884} |
-| 0.0023        | 46.0  | 5750 | 1.0807          | {'accuracy': 0.879} |
-| 0.0023        | 47.0  | 5875 | 1.0642          | {'accuracy': 0.88}  |
-| 0.0002        | 48.0  | 6000 | 1.0655          | {'accuracy': 0.879} |
-| 0.0002        | 49.0  | 6125 | 1.0755          | {'accuracy': 0.881} |
-| 0.0002        | 50.0  | 6250 | 1.0845          | {'accuracy': 0.884} |
 ### Framework versions
 - PEFT 0.11.1
-- Transformers 4.42.3
 - Pytorch 2.3.1+cu121
 - Datasets 2.19.1
 - Tokenizers 0.19.1

+---
+base_model: distilbert-base-uncased
+library_name: peft
+license: apache-2.0
+metrics:
+- accuracy
+tags:
+- generated_from_trainer
+model-index:
+- name: distilbert-base-uncased-lora-text-classification
+  results: []
+---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.2966
+- Accuracy: {'accuracy': 0.886}
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 100
 ### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Accuracy            |
+|:-------------:|:-----:|:-----:|:---------------:|:-------------------:|
+| No log        | 1.0   | 125   | 0.2972          | {'accuracy': 0.873} |
+| No log        | 2.0   | 250   | 0.4349          | {'accuracy': 0.857} |
+| No log        | 3.0   | 375   | 0.4850          | {'accuracy': 0.861} |
+| 0.2757        | 4.0   | 500   | 0.4277          | {'accuracy': 0.865} |
+| 0.2757        | 5.0   | 625   | 0.4342          | {'accuracy': 0.881} |
+| 0.2757        | 6.0   | 750   | 0.4613          | {'accuracy': 0.88}  |
+| 0.2757        | 7.0   | 875   | 0.6101          | {'accuracy': 0.879} |
+| 0.1047        | 8.0   | 1000  | 0.6068          | {'accuracy': 0.877} |
+| 0.1047        | 9.0   | 1125  | 0.6253          | {'accuracy': 0.878} |
+| 0.1047        | 10.0  | 1250  | 0.6737          | {'accuracy': 0.89}  |
+| 0.1047        | 11.0  | 1375  | 0.8528          | {'accuracy': 0.867} |
+| 0.0462        | 12.0  | 1500  | 0.8829          | {'accuracy': 0.879} |
+| 0.0462        | 13.0  | 1625  | 0.8560          | {'accuracy': 0.881} |
+| 0.0462        | 14.0  | 1750  | 0.9111          | {'accuracy': 0.877} |
+| 0.0462        | 15.0  | 1875  | 0.9331          | {'accuracy': 0.883} |
+| 0.0329        | 16.0  | 2000  | 0.8129          | {'accuracy': 0.879} |
+| 0.0329        | 17.0  | 2125  | 0.8663          | {'accuracy': 0.882} |
+| 0.0329        | 18.0  | 2250  | 0.8163          | {'accuracy': 0.887} |
+| 0.0329        | 19.0  | 2375  | 0.7679          | {'accuracy': 0.891} |
+| 0.0188        | 20.0  | 2500  | 0.7408          | {'accuracy': 0.893} |
+| 0.0188        | 21.0  | 2625  | 0.8557          | {'accuracy': 0.889} |
+| 0.0188        | 22.0  | 2750  | 0.9201          | {'accuracy': 0.878} |
+| 0.0188        | 23.0  | 2875  | 0.8839          | {'accuracy': 0.893} |
+| 0.0078        | 24.0  | 3000  | 0.9388          | {'accuracy': 0.886} |
+| 0.0078        | 25.0  | 3125  | 0.9004          | {'accuracy': 0.877} |
+| 0.0078        | 26.0  | 3250  | 0.9489          | {'accuracy': 0.89}  |
+| 0.0078        | 27.0  | 3375  | 1.0055          | {'accuracy': 0.88}  |
+| 0.0241        | 28.0  | 3500  | 0.9758          | {'accuracy': 0.88}  |
+| 0.0241        | 29.0  | 3625  | 1.0809          | {'accuracy': 0.876} |
+| 0.0241        | 30.0  | 3750  | 1.0976          | {'accuracy': 0.858} |
+| 0.0241        | 31.0  | 3875  | 1.1300          | {'accuracy': 0.859} |
+| 0.0293        | 32.0  | 4000  | 1.1039          | {'accuracy': 0.869} |
+| 0.0293        | 33.0  | 4125  | 0.9788          | {'accuracy': 0.875} |
+| 0.0293        | 34.0  | 4250  | 1.0639          | {'accuracy': 0.873} |
+| 0.0293        | 35.0  | 4375  | 1.2398          | {'accuracy': 0.866} |
+| 0.0088        | 36.0  | 4500  | 1.1332          | {'accuracy': 0.874} |
+| 0.0088        | 37.0  | 4625  | 1.1145          | {'accuracy': 0.877} |
+| 0.0088        | 38.0  | 4750  | 1.1481          | {'accuracy': 0.867} |
+| 0.0088        | 39.0  | 4875  | 1.3712          | {'accuracy': 0.87}  |
+| 0.0054        | 40.0  | 5000  | 1.3314          | {'accuracy': 0.871} |
+| 0.0054        | 41.0  | 5125  | 1.2189          | {'accuracy': 0.879} |
+| 0.0054        | 42.0  | 5250  | 1.4673          | {'accuracy': 0.864} |
+| 0.0054        | 43.0  | 5375  | 1.2771          | {'accuracy': 0.885} |
+| 0.0097        | 44.0  | 5500  | 0.9926          | {'accuracy': 0.879} |
+| 0.0097        | 45.0  | 5625  | 1.0428          | {'accuracy': 0.881} |
+| 0.0097        | 46.0  | 5750  | 1.3764          | {'accuracy': 0.867} |
+| 0.0097        | 47.0  | 5875  | 1.2730          | {'accuracy': 0.88}  |
+| 0.0076        | 48.0  | 6000  | 1.3435          | {'accuracy': 0.895} |
+| 0.0076        | 49.0  | 6125  | 1.4281          | {'accuracy': 0.883} |
+| 0.0076        | 50.0  | 6250  | 1.4440          | {'accuracy': 0.874} |
+| 0.0076        | 51.0  | 6375  | 1.5093          | {'accuracy': 0.88}  |
+| 0.0113        | 52.0  | 6500  | 1.2309          | {'accuracy': 0.877} |
+| 0.0113        | 53.0  | 6625  | 1.1447          | {'accuracy': 0.88}  |
+| 0.0113        | 54.0  | 6750  | 1.1743          | {'accuracy': 0.877} |
+| 0.0113        | 55.0  | 6875  | 1.4742          | {'accuracy': 0.867} |
+| 0.0179        | 56.0  | 7000  | 1.2592          | {'accuracy': 0.882} |
+| 0.0179        | 57.0  | 7125  | 1.2337          | {'accuracy': 0.889} |
+| 0.0179        | 58.0  | 7250  | 1.1486          | {'accuracy': 0.894} |
+| 0.0179        | 59.0  | 7375  | 1.1452          | {'accuracy': 0.89}  |
+| 0.0059        | 60.0  | 7500  | 1.1572          | {'accuracy': 0.891} |
+| 0.0059        | 61.0  | 7625  | 1.1582          | {'accuracy': 0.891} |
+| 0.0059        | 62.0  | 7750  | 1.3938          | {'accuracy': 0.884} |
+| 0.0059        | 63.0  | 7875  | 1.2767          | {'accuracy': 0.89}  |
+| 0.0006        | 64.0  | 8000  | 1.2217          | {'accuracy': 0.89}  |
+| 0.0006        | 65.0  | 8125  | 1.2232          | {'accuracy': 0.89}  |
+| 0.0006        | 66.0  | 8250  | 1.2689          | {'accuracy': 0.894} |
+| 0.0006        | 67.0  | 8375  | 1.2529          | {'accuracy': 0.894} |
+| 0.0           | 68.0  | 8500  | 1.2292          | {'accuracy': 0.894} |
+| 0.0           | 69.0  | 8625  | 1.2053          | {'accuracy': 0.893} |
+| 0.0           | 70.0  | 8750  | 1.2587          | {'accuracy': 0.891} |
+| 0.0           | 71.0  | 8875  | 1.2803          | {'accuracy': 0.89}  |
+| 0.0005        | 72.0  | 9000  | 1.3449          | {'accuracy': 0.889} |
+| 0.0005        | 73.0  | 9125  | 1.3193          | {'accuracy': 0.891} |
+| 0.0005        | 74.0  | 9250  | 1.3032          | {'accuracy': 0.892} |
+| 0.0005        | 75.0  | 9375  | 1.3586          | {'accuracy': 0.895} |
+| 0.0006        | 76.0  | 9500  | 1.3457          | {'accuracy': 0.894} |
+| 0.0006        | 77.0  | 9625  | 1.3742          | {'accuracy': 0.892} |
+| 0.0006        | 78.0  | 9750  | 1.3986          | {'accuracy': 0.891} |
+| 0.0006        | 79.0  | 9875  | 1.5180          | {'accuracy': 0.884} |
+| 0.0022        | 80.0  | 10000 | 1.5658          | {'accuracy': 0.879} |
+| 0.0022        | 81.0  | 10125 | 1.5500          | {'accuracy': 0.879} |
+| 0.0022        | 82.0  | 10250 | 1.4174          | {'accuracy': 0.888} |
+| 0.0022        | 83.0  | 10375 | 1.3601          | {'accuracy': 0.89}  |
+| 0.0023        | 84.0  | 10500 | 1.4022          | {'accuracy': 0.887} |
+| 0.0023        | 85.0  | 10625 | 1.3639          | {'accuracy': 0.89}  |
+| 0.0023        | 86.0  | 10750 | 1.2567          | {'accuracy': 0.887} |
+| 0.0023        | 87.0  | 10875 | 1.3608          | {'accuracy': 0.89}  |
+| 0.0043        | 88.0  | 11000 | 1.3487          | {'accuracy': 0.888} |
+| 0.0043        | 89.0  | 11125 | 1.3392          | {'accuracy': 0.889} |
+| 0.0043        | 90.0  | 11250 | 1.3368          | {'accuracy': 0.888} |
+| 0.0043        | 91.0  | 11375 | 1.3246          | {'accuracy': 0.881} |
+| 0.0002        | 92.0  | 11500 | 1.3173          | {'accuracy': 0.881} |
+| 0.0002        | 93.0  | 11625 | 1.2988          | {'accuracy': 0.888} |
+| 0.0002        | 94.0  | 11750 | 1.3090          | {'accuracy': 0.882} |
+| 0.0002        | 95.0  | 11875 | 1.3269          | {'accuracy': 0.894} |
+| 0.0006        | 96.0  | 12000 | 1.2966          | {'accuracy': 0.885} |
+| 0.0006        | 97.0  | 12125 | 1.2965          | {'accuracy': 0.885} |
+| 0.0006        | 98.0  | 12250 | 1.2966          | {'accuracy': 0.886} |
+| 0.0006        | 99.0  | 12375 | 1.2965          | {'accuracy': 0.886} |
+| 0.0           | 100.0 | 12500 | 1.2966          | {'accuracy': 0.886} |
 ### Framework versions
 - PEFT 0.11.1
+- Transformers 4.43.1
 - Pytorch 2.3.1+cu121
 - Datasets 2.19.1
 - Tokenizers 0.19.1

adapter_config.json CHANGED Viewed

@@ -1,31 +1,31 @@
-{
-  "alpha_pattern": {},
-  "auto_mapping": null,
-  "base_model_name_or_path": "distilbert-base-uncased",
-  "bias": "none",
-  "fan_in_fan_out": false,
-  "inference_mode": true,
-  "init_lora_weights": true,
-  "layer_replication": null,
-  "layers_pattern": null,
-  "layers_to_transform": null,
-  "loftq_config": {},
-  "lora_alpha": 32,
-  "lora_dropout": 0.01,
-  "megatron_config": null,
-  "megatron_core": "megatron.core",
-  "modules_to_save": [
-    "classifier",
-    "score"
-  ],
-  "peft_type": "LORA",
-  "r": 1024,
-  "rank_pattern": {},
-  "revision": null,
-  "target_modules": [
-    "q_lin"
-  ],
-  "task_type": "SEQ_CLS",
-  "use_dora": false,
-  "use_rslora": false
 }

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "distilbert-base-uncased",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.01,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "r": 1024,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_lin"
+  ],
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1be07c69b61d4b8e9c4dcf727a1fdc663f293ded1f0ec91730474dea43d84e21
 size 40119480

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0245711dde517faeaa4f6114aa8c7adb04a46af934e8b1861981499c0ddd143
 size 40119480

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,7 @@
-{
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
-}

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json CHANGED Viewed

@@ -1,56 +1,56 @@
-{
-  "add_prefix_space": true,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "100": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "DistilBertTokenizer",
-  "unk_token": "[UNK]"
-}

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48d608d87a4a289efccf3cc694efa37c422e7bdf9dc08320ccff5b567902df4c
-size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2edc551ea9bd01270ad14603c0b7d04fa67fc0f101312ab1672b9e2019b3d7e
+size 5240