SharonTudi commited on
Commit
0546022
·
verified ·
1 Parent(s): 7547faf

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: bert-base-cased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # DIALOGUE_one
20
 
21
- This model is a fine-tuned version of [bert-base-cased](https://huggingface.co/bert-base-cased) on the None dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.1156
24
  - Precision: 0.9762
25
  - Recall: 0.9737
26
  - F1: 0.9736
@@ -55,59 +55,59 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
57
  |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
58
- | 1.1326 | 0.62 | 30 | 0.6327 | 0.9875 | 0.9868 | 0.9868 | 0.9868 |
59
- | 0.4421 | 1.25 | 60 | 0.1854 | 0.9637 | 0.9605 | 0.9604 | 0.9605 |
60
- | 0.1449 | 1.88 | 90 | 0.0766 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
61
- | 0.0179 | 2.5 | 120 | 0.0802 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
62
- | 0.0059 | 3.12 | 150 | 0.0361 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
63
- | 0.0032 | 3.75 | 180 | 0.0472 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
64
- | 0.0035 | 4.38 | 210 | 0.0995 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
65
- | 0.0018 | 5.0 | 240 | 0.0930 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
66
- | 0.0015 | 5.62 | 270 | 0.0957 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
67
- | 0.0013 | 6.25 | 300 | 0.0991 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
68
- | 0.0012 | 6.88 | 330 | 0.1028 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
69
- | 0.001 | 7.5 | 360 | 0.0992 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
70
- | 0.0009 | 8.12 | 390 | 0.1020 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
71
- | 0.0009 | 8.75 | 420 | 0.1037 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
72
- | 0.0008 | 9.38 | 450 | 0.1037 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
73
- | 0.0007 | 10.0 | 480 | 0.1035 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
74
- | 0.0007 | 10.62 | 510 | 0.1044 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
75
- | 0.0006 | 11.25 | 540 | 0.1063 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
76
- | 0.0006 | 11.88 | 570 | 0.1061 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
77
- | 0.0005 | 12.5 | 600 | 0.1071 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
78
- | 0.0005 | 13.12 | 630 | 0.1057 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
79
- | 0.0005 | 13.75 | 660 | 0.1064 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
80
- | 0.0005 | 14.38 | 690 | 0.1072 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
81
- | 0.0004 | 15.0 | 720 | 0.1063 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
82
- | 0.0004 | 15.62 | 750 | 0.1068 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
83
- | 0.0004 | 16.25 | 780 | 0.1090 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
84
- | 0.0004 | 16.88 | 810 | 0.1085 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
85
- | 0.0004 | 17.5 | 840 | 0.1095 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
86
- | 0.0004 | 18.12 | 870 | 0.1106 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
87
- | 0.0004 | 18.75 | 900 | 0.1110 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
88
- | 0.0004 | 19.38 | 930 | 0.1101 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
89
- | 0.0004 | 20.0 | 960 | 0.1110 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
90
- | 0.0003 | 20.62 | 990 | 0.1116 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
91
- | 0.0003 | 21.25 | 1020 | 0.1121 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
92
- | 0.0003 | 21.88 | 1050 | 0.1126 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
93
- | 0.0003 | 22.5 | 1080 | 0.1117 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
94
- | 0.0003 | 23.12 | 1110 | 0.1127 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
95
- | 0.0003 | 23.75 | 1140 | 0.1135 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
96
- | 0.0003 | 24.38 | 1170 | 0.1138 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
97
- | 0.0003 | 25.0 | 1200 | 0.1145 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
98
- | 0.0003 | 25.62 | 1230 | 0.1151 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
99
- | 0.0003 | 26.25 | 1260 | 0.1151 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
100
- | 0.0003 | 26.88 | 1290 | 0.1148 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
101
- | 0.0003 | 27.5 | 1320 | 0.1152 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
102
- | 0.0003 | 28.12 | 1350 | 0.1153 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
103
- | 0.0003 | 28.75 | 1380 | 0.1156 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
104
- | 0.0003 | 29.38 | 1410 | 0.1156 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
105
- | 0.0003 | 30.0 | 1440 | 0.1156 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
106
 
107
 
108
  ### Framework versions
109
 
110
- - Transformers 4.37.0
111
  - Pytorch 2.1.0+cu121
112
  - Datasets 2.16.1
113
- - Tokenizers 0.15.0
 
1
  ---
2
  license: apache-2.0
3
+ base_model: distilbert-base-cased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
18
 
19
  # DIALOGUE_one
20
 
21
+ This model is a fine-tuned version of [distilbert-base-cased](https://huggingface.co/distilbert-base-cased) on the None dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.1162
24
  - Precision: 0.9762
25
  - Recall: 0.9737
26
  - F1: 0.9736
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
57
  |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
58
+ | 1.1718 | 0.62 | 30 | 0.7204 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
59
+ | 0.5326 | 1.25 | 60 | 0.2354 | 0.9637 | 0.9605 | 0.9604 | 0.9605 |
60
+ | 0.2144 | 1.88 | 90 | 0.1127 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
61
+ | 0.0577 | 2.5 | 120 | 0.0236 | 1.0 | 1.0 | 1.0 | 1.0 |
62
+ | 0.0417 | 3.12 | 150 | 0.0598 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
63
+ | 0.0191 | 3.75 | 180 | 0.0617 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
64
+ | 0.0069 | 4.38 | 210 | 0.0785 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
65
+ | 0.0051 | 5.0 | 240 | 0.0715 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
66
+ | 0.0038 | 5.62 | 270 | 0.0879 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
67
+ | 0.0033 | 6.25 | 300 | 0.0812 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
68
+ | 0.0027 | 6.88 | 330 | 0.0864 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
69
+ | 0.0025 | 7.5 | 360 | 0.0867 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
70
+ | 0.002 | 8.12 | 390 | 0.0943 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
71
+ | 0.0019 | 8.75 | 420 | 0.0954 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
72
+ | 0.0017 | 9.38 | 450 | 0.1012 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
73
+ | 0.0015 | 10.0 | 480 | 0.0986 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
74
+ | 0.0013 | 10.62 | 510 | 0.1019 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
75
+ | 0.0013 | 11.25 | 540 | 0.1041 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
76
+ | 0.0012 | 11.88 | 570 | 0.1052 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
77
+ | 0.0011 | 12.5 | 600 | 0.1076 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
78
+ | 0.001 | 13.12 | 630 | 0.1086 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
79
+ | 0.001 | 13.75 | 660 | 0.1080 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
80
+ | 0.0009 | 14.38 | 690 | 0.1078 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
81
+ | 0.0009 | 15.0 | 720 | 0.1038 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
82
+ | 0.0008 | 15.62 | 750 | 0.1002 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
83
+ | 0.0007 | 16.25 | 780 | 0.1014 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
84
+ | 0.0007 | 16.88 | 810 | 0.1039 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
85
+ | 0.0007 | 17.5 | 840 | 0.1057 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
86
+ | 0.0007 | 18.12 | 870 | 0.1086 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
87
+ | 0.0007 | 18.75 | 900 | 0.1076 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
88
+ | 0.0006 | 19.38 | 930 | 0.1075 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
89
+ | 0.0006 | 20.0 | 960 | 0.1065 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
90
+ | 0.0006 | 20.62 | 990 | 0.1066 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
91
+ | 0.0006 | 21.25 | 1020 | 0.1084 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
92
+ | 0.0005 | 21.88 | 1050 | 0.1094 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
93
+ | 0.0005 | 22.5 | 1080 | 0.1097 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
94
+ | 0.0005 | 23.12 | 1110 | 0.1118 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
95
+ | 0.0005 | 23.75 | 1140 | 0.1135 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
96
+ | 0.0005 | 24.38 | 1170 | 0.1140 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
97
+ | 0.0005 | 25.0 | 1200 | 0.1139 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
98
+ | 0.0004 | 25.62 | 1230 | 0.1145 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
99
+ | 0.0004 | 26.25 | 1260 | 0.1148 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
100
+ | 0.0005 | 26.88 | 1290 | 0.1146 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
101
+ | 0.0005 | 27.5 | 1320 | 0.1153 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
102
+ | 0.0005 | 28.12 | 1350 | 0.1154 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
103
+ | 0.0005 | 28.75 | 1380 | 0.1157 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
104
+ | 0.0005 | 29.38 | 1410 | 0.1161 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
105
+ | 0.0004 | 30.0 | 1440 | 0.1162 | 0.9762 | 0.9737 | 0.9736 | 0.9737 |
106
 
107
 
108
  ### Framework versions
109
 
110
+ - Transformers 4.37.1
111
  - Pytorch 2.1.0+cu121
112
  - Datasets 2.16.1
113
+ - Tokenizers 0.15.1
config.json CHANGED
@@ -1,14 +1,13 @@
1
  {
2
- "_name_or_path": "bert-base-cased",
 
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
  "id2label": {
13
  "0": "Hospital-Inform",
14
  "1": "Hospital-Request",
@@ -16,24 +15,24 @@
16
  "3": "general-thank"
17
  },
18
  "initializer_range": 0.02,
19
- "intermediate_size": 3072,
20
  "label2id": {
21
  "LABEL_0": 0,
22
  "LABEL_1": 1,
23
  "LABEL_2": 2,
24
  "LABEL_3": 3
25
  },
26
- "layer_norm_eps": 1e-12,
27
  "max_position_embeddings": 512,
28
- "model_type": "bert",
29
- "num_attention_heads": 12,
30
- "num_hidden_layers": 12,
 
31
  "pad_token_id": 0,
32
- "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
 
 
 
 
34
  "torch_dtype": "float32",
35
- "transformers_version": "4.37.0",
36
- "type_vocab_size": 2,
37
- "use_cache": true,
38
  "vocab_size": 28996
39
  }
 
1
  {
2
+ "_name_or_path": "distilbert-base-cased",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
 
 
11
  "id2label": {
12
  "0": "Hospital-Inform",
13
  "1": "Hospital-Request",
 
15
  "3": "general-thank"
16
  },
17
  "initializer_range": 0.02,
 
18
  "label2id": {
19
  "LABEL_0": 0,
20
  "LABEL_1": 1,
21
  "LABEL_2": 2,
22
  "LABEL_3": 3
23
  },
 
24
  "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "output_past": true,
29
  "pad_token_id": 0,
 
30
  "problem_type": "single_label_classification",
31
+ "qa_dropout": 0.1,
32
+ "seq_classif_dropout": 0.2,
33
+ "sinusoidal_pos_embds": false,
34
+ "tie_weights_": true,
35
  "torch_dtype": "float32",
36
+ "transformers_version": "4.37.1",
 
 
37
  "vocab_size": 28996
38
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8cca8de7ac313d38a1feea7a9dea0b219bad023616f89f8e1fd2efddfbbc1bd
3
- size 433276920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef203c014ca693c91fdb0106e4b721f1f1168df800e00c1b1fdf136992b42599
3
+ size 263150840
runs/Jan24_18-39-27_cbbbf18f120b/events.out.tfevents.1706121568.cbbbf18f120b.1412.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c07c5fff203482f69865ce11d7c7709d0344104ed9fa01be212ddae8ba28b312
3
+ size 34968
tokenizer_config.json CHANGED
@@ -52,6 +52,6 @@
52
  "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
- "tokenizer_class": "BertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
 
52
  "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca46491bff09500e83a4af31f4590391cd0f367c2d25c453ef459dde72e63b2b
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb54994d82787ddf5f907dccc1c9c0423195a230a1783094d0f87cc2174296ca
3
  size 4664