meyandrei commited on
Commit
cf731fd
·
verified ·
1 Parent(s): 14e418c

Upload 17 files

Browse files
README.md CHANGED
@@ -1,3 +1,59 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: output_model_gpt2_20epoch
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: output_model_gpt2_50epoch
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # output_model_gpt2_50epoch
16
+
17
+ This model is a fine-tuned version of [output_model_gpt2_20epoch](https://huggingface.co/output_model_gpt2_20epoch) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 5.7376
20
+ - Accuracy: 0.5284
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 0.001
40
+ - train_batch_size: 3
41
+ - eval_batch_size: 8
42
+ - seed: 42
43
+ - gradient_accumulation_steps: 18
44
+ - total_train_batch_size: 54
45
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
+ - lr_scheduler_type: linear
47
+ - num_epochs: 30.0
48
+ - mixed_precision_training: Native AMP
49
+
50
+ ### Training results
51
+
52
+
53
+
54
+ ### Framework versions
55
+
56
+ - Transformers 4.35.2
57
+ - Pytorch 2.1.0+cu121
58
+ - Datasets 2.16.1
59
+ - Tokenizers 0.15.1
added_tokens.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|action|>": 50257,
3
+ "<|belief|>": 50263,
4
+ "<|context|>": 50260,
5
+ "<|endofaction|>": 50261,
6
+ "<|endofbelief|>": 50266,
7
+ "<|endofcontext|>": 50258,
8
+ "<|endofresponse|>": 50259,
9
+ "<|response|>": 50265,
10
+ "<|system|>": 50262,
11
+ "<|user|>": 50264
12
+ }
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.86,
3
+ "eval_accuracy": 0.5283621816943802,
4
+ "eval_loss": 5.737628936767578,
5
+ "eval_runtime": 26.229,
6
+ "eval_samples": 379,
7
+ "eval_samples_per_second": 14.45,
8
+ "eval_steps_per_second": 1.83,
9
+ "perplexity": 310.327731295789,
10
+ "train_loss": 0.05997911328854768,
11
+ "train_runtime": 6173.8501,
12
+ "train_samples": 1246,
13
+ "train_samples_per_second": 6.055,
14
+ "train_steps_per_second": 0.112
15
+ }
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "output_model_gpt2_20epoch",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.35.2",
37
+ "use_cache": true,
38
+ "vocab_size": 50267
39
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.86,
3
+ "eval_accuracy": 0.5283621816943802,
4
+ "eval_loss": 5.737628936767578,
5
+ "eval_runtime": 26.229,
6
+ "eval_samples": 379,
7
+ "eval_samples_per_second": 14.45,
8
+ "eval_steps_per_second": 1.83,
9
+ "perplexity": 310.327731295789
10
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.35.2"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85cb23c6892d6e094618ab259722e0bb940424a58b46e836bcd8e922278056de
3
+ size 497804928
runs/Jan27_19-25-13_f9b22a5f3ad7/events.out.tfevents.1706383524.f9b22a5f3ad7.21156.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466552d9c62137e87f1958404cbef69f199d6351f61596848df6e8eadecf55c5
3
+ size 5029
runs/Jan27_19-25-13_f9b22a5f3ad7/events.out.tfevents.1706389726.f9b22a5f3ad7.21156.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73484fc593caefeda0b116bab62905caa7f5a80b454e258d7e21f4fcb4dc23b
3
+ size 411
special_tokens_map.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|action|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|endofcontext|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|endofresponse|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|context|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<|endofaction|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<|system|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<|belief|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<|user|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<|response|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<|endofbelief|>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ }
73
+ ],
74
+ "bos_token": {
75
+ "content": "<|endoftext|>",
76
+ "lstrip": false,
77
+ "normalized": true,
78
+ "rstrip": false,
79
+ "single_word": false
80
+ },
81
+ "eos_token": {
82
+ "content": "<|endoftext|>",
83
+ "lstrip": false,
84
+ "normalized": true,
85
+ "rstrip": false,
86
+ "single_word": false
87
+ },
88
+ "unk_token": {
89
+ "content": "<|endoftext|>",
90
+ "lstrip": false,
91
+ "normalized": true,
92
+ "rstrip": false,
93
+ "single_word": false
94
+ }
95
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50257": {
13
+ "content": "<|action|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "50258": {
21
+ "content": "<|endofcontext|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "50259": {
29
+ "content": "<|endofresponse|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50260": {
37
+ "content": "<|context|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "50261": {
45
+ "content": "<|endofaction|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "50262": {
53
+ "content": "<|system|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "50263": {
61
+ "content": "<|belief|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "50264": {
69
+ "content": "<|user|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "50265": {
77
+ "content": "<|response|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "50266": {
85
+ "content": "<|endofbelief|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ }
92
+ },
93
+ "additional_special_tokens": [
94
+ "<|action|>",
95
+ "<|endofcontext|>",
96
+ "<|endofresponse|>",
97
+ "<|context|>",
98
+ "<|endofaction|>",
99
+ "<|system|>",
100
+ "<|belief|>",
101
+ "<|user|>",
102
+ "<|response|>",
103
+ "<|endofbelief|>"
104
+ ],
105
+ "bos_token": "<|endoftext|>",
106
+ "clean_up_tokenization_spaces": true,
107
+ "eos_token": "<|endoftext|>",
108
+ "model_max_length": 1024,
109
+ "tokenizer_class": "GPT2Tokenizer",
110
+ "unk_token": "<|endoftext|>"
111
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.86,
3
+ "train_loss": 0.05997911328854768,
4
+ "train_runtime": 6173.8501,
5
+ "train_samples": 1246,
6
+ "train_samples_per_second": 6.055,
7
+ "train_steps_per_second": 0.112
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 29.85576923076923,
5
+ "eval_steps": 500,
6
+ "global_step": 690,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 21.63,
13
+ "learning_rate": 0.0002753623188405797,
14
+ "loss": 0.0731,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 29.86,
19
+ "step": 690,
20
+ "total_flos": 1.9441172348928e+16,
21
+ "train_loss": 0.05997911328854768,
22
+ "train_runtime": 6173.8501,
23
+ "train_samples_per_second": 6.055,
24
+ "train_steps_per_second": 0.112
25
+ }
26
+ ],
27
+ "logging_steps": 500,
28
+ "max_steps": 690,
29
+ "num_train_epochs": 30,
30
+ "save_steps": 500,
31
+ "total_flos": 1.9441172348928e+16,
32
+ "trial_name": null,
33
+ "trial_params": null
34
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c59dcfafcb6d01c00cf4062619c1f2e8bb0a3090183759bfa6f40d2a267f8dc2
3
+ size 4600
vocab.json ADDED
The diff for this file is too large to render. See raw diff