palsp commited on
Commit
cb31432
·
verified ·
1 Parent(s): 471ede1

End of training

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 2.2244
20
 
21
  ## Model description
22
 
@@ -35,7 +35,7 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 0.0005
39
  - train_batch_size: 4
40
  - eval_batch_size: 4
41
  - seed: 42
@@ -43,23 +43,17 @@ The following hyperparameters were used during training:
43
  - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
- - num_epochs: 5
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:------:|:----:|:---------------:|
53
- | 2.352 | 0.4737 | 200 | 2.2613 |
54
- | 2.363 | 0.9473 | 400 | 2.2484 |
55
- | 2.3263 | 1.4210 | 600 | 2.2433 |
56
- | 2.3454 | 1.8946 | 800 | 2.2371 |
57
- | 2.3287 | 2.3683 | 1000 | 2.2321 |
58
- | 2.3003 | 2.8419 | 1200 | 2.2296 |
59
- | 2.2973 | 3.3156 | 1400 | 2.2266 |
60
- | 2.296 | 3.7892 | 1600 | 2.2257 |
61
- | 2.2924 | 4.2629 | 1800 | 2.2253 |
62
- | 2.2923 | 4.7365 | 2000 | 2.2244 |
63
 
64
 
65
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 2.2492
20
 
21
  ## Model description
22
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 0.001
39
  - train_batch_size: 4
40
  - eval_batch_size: 4
41
  - seed: 42
 
43
  - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
+ - num_epochs: 2
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
50
 
51
  | Training Loss | Epoch | Step | Validation Loss |
52
  |:-------------:|:------:|:----:|:---------------:|
53
+ | 2.409 | 0.4737 | 200 | 2.2836 |
54
+ | 2.3443 | 0.9473 | 400 | 2.2642 |
55
+ | 2.3351 | 1.4210 | 600 | 2.2530 |
56
+ | 2.337 | 1.8946 | 800 | 2.2492 |
 
 
 
 
 
 
57
 
58
 
59
  ### Framework versions
adapter_config.json CHANGED
@@ -3,25 +3,26 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "gpt2",
5
  "bias": "none",
6
- "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
  "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 8,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "c_proj",
24
- "c_attn"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "gpt2",
5
  "bias": "none",
6
+ "fan_in_fan_out": true,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
  "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 16,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "c_attn",
24
  "c_proj",
25
+ "c_fc"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca076c20175080fe270376f147690c9d184cc22ab11982f66f7b891dcfeaae67
3
- size 3253104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20f6768414da7ab0d5171b00719e81c61a5c014e6870d4a99e2927d4a25b993e
3
+ size 9449344
runs/Aug28_15-24-35_06851a5b4fad/events.out.tfevents.1724858676.06851a5b4fad.560.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32157e417e594816332fbbbdf0d80e76ec7858f7b6b807e55efafd0835c92541
3
+ size 5082
runs/Aug28_15-25-19_06851a5b4fad/events.out.tfevents.1724858720.06851a5b4fad.560.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974d956360ac1f476ed26d455b3a2aaa3acc0d49c745efb92a9b64e9db58f43f
3
+ size 5980
runs/Aug28_15-28-14_06851a5b4fad/events.out.tfevents.1724858895.06851a5b4fad.560.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:439c5d53d29da397c00728f1aaba55a4fb105502d6e47aeafdba0df0a24709f3
3
+ size 8203
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac808cdaefd151e141733b3dceead8564a92a5eebeaabd181fc0e6eec1269385
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a31ea2b6ef19ba1ea8ba84f26caf2695e3fa4bfa2e5154ed7e53e451d666bd2
3
  size 5112