Model save
Browse files- README.md +18 -21
- all_results.json +6 -19
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- train_results.json +6 -6
- trainer_state.json +261 -65
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,15 +1,9 @@
|
|
1 |
---
|
2 |
base_model: dmis-lab/selfbiorag_7b
|
3 |
tags:
|
4 |
-
- alignment-handbook
|
5 |
- trl
|
6 |
- dpo
|
7 |
- generated_from_trainer
|
8 |
-
- trl
|
9 |
-
- dpo
|
10 |
-
- generated_from_trainer
|
11 |
-
datasets:
|
12 |
-
- HuggingFaceH4/ultrafeedback_binarized
|
13 |
model-index:
|
14 |
- name: selfbiorag-7b-dpo-full-wo-healthsearch_qa-ep3
|
15 |
results: []
|
@@ -20,17 +14,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
20 |
|
21 |
# selfbiorag-7b-dpo-full-wo-healthsearch_qa-ep3
|
22 |
|
23 |
-
This model is a fine-tuned version of [dmis-lab/selfbiorag_7b](https://huggingface.co/dmis-lab/selfbiorag_7b) on
|
24 |
It achieves the following results on the evaluation set:
|
25 |
-
-
|
26 |
-
-
|
27 |
-
-
|
28 |
-
-
|
29 |
-
-
|
30 |
-
-
|
31 |
-
-
|
32 |
-
-
|
33 |
-
-
|
34 |
|
35 |
## Model description
|
36 |
|
@@ -54,22 +48,25 @@ The following hyperparameters were used during training:
|
|
54 |
- eval_batch_size: 8
|
55 |
- seed: 42
|
56 |
- distributed_type: multi-GPU
|
57 |
-
- num_devices:
|
58 |
- gradient_accumulation_steps: 2
|
59 |
-
- total_train_batch_size:
|
60 |
-
- total_eval_batch_size:
|
61 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
62 |
- lr_scheduler_type: cosine
|
63 |
- lr_scheduler_warmup_ratio: 0.1
|
64 |
-
- num_epochs:
|
65 |
|
66 |
### Training results
|
67 |
|
|
|
|
|
|
|
68 |
|
69 |
|
70 |
### Framework versions
|
71 |
|
72 |
- Transformers 4.39.0.dev0
|
73 |
-
- Pytorch 2.2
|
74 |
- Datasets 2.14.6
|
75 |
- Tokenizers 0.15.2
|
|
|
1 |
---
|
2 |
base_model: dmis-lab/selfbiorag_7b
|
3 |
tags:
|
|
|
4 |
- trl
|
5 |
- dpo
|
6 |
- generated_from_trainer
|
|
|
|
|
|
|
|
|
|
|
7 |
model-index:
|
8 |
- name: selfbiorag-7b-dpo-full-wo-healthsearch_qa-ep3
|
9 |
results: []
|
|
|
14 |
|
15 |
# selfbiorag-7b-dpo-full-wo-healthsearch_qa-ep3
|
16 |
|
17 |
+
This model is a fine-tuned version of [dmis-lab/selfbiorag_7b](https://huggingface.co/dmis-lab/selfbiorag_7b) on an unknown dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
+
- Logits/chosen: -1.6968
|
20 |
+
- Logits/rejected: -1.6723
|
21 |
+
- Logps/chosen: -158.8701
|
22 |
+
- Logps/rejected: -170.2428
|
23 |
+
- Loss: 0.6691
|
24 |
+
- Rewards/accuracies: 0.6941
|
25 |
+
- Rewards/chosen: 0.0706
|
26 |
+
- Rewards/margins: 0.0503
|
27 |
+
- Rewards/rejected: 0.0202
|
28 |
|
29 |
## Model description
|
30 |
|
|
|
48 |
- eval_batch_size: 8
|
49 |
- seed: 42
|
50 |
- distributed_type: multi-GPU
|
51 |
+
- num_devices: 4
|
52 |
- gradient_accumulation_steps: 2
|
53 |
+
- total_train_batch_size: 64
|
54 |
+
- total_eval_batch_size: 32
|
55 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
56 |
- lr_scheduler_type: cosine
|
57 |
- lr_scheduler_warmup_ratio: 0.1
|
58 |
+
- num_epochs: 1
|
59 |
|
60 |
### Training results
|
61 |
|
62 |
+
| Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
|
63 |
+
|:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
|
64 |
+
| 0.6508 | 0.61 | 100 | -1.6968 | -1.6723 | -158.8701 | -170.2428 | 0.6691 | 0.6941 | 0.0706 | 0.0503 | 0.0202 |
|
65 |
|
66 |
|
67 |
### Framework versions
|
68 |
|
69 |
- Transformers 4.39.0.dev0
|
70 |
+
- Pytorch 2.1.2
|
71 |
- Datasets 2.14.6
|
72 |
- Tokenizers 0.15.2
|
all_results.json
CHANGED
@@ -1,21 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
6 |
-
"
|
7 |
-
"
|
8 |
-
"eval_rewards/accuracies": 1.0,
|
9 |
-
"eval_rewards/chosen": 0.3873787820339203,
|
10 |
-
"eval_rewards/margins": 0.40528222918510437,
|
11 |
-
"eval_rewards/rejected": -0.017903532832860947,
|
12 |
-
"eval_runtime": 67.3461,
|
13 |
-
"eval_samples": 3077,
|
14 |
-
"eval_samples_per_second": 45.689,
|
15 |
-
"eval_steps_per_second": 0.728,
|
16 |
-
"train_loss": 0.5981406688690185,
|
17 |
-
"train_runtime": 346.5059,
|
18 |
-
"train_samples": 1885,
|
19 |
-
"train_samples_per_second": 16.32,
|
20 |
-
"train_steps_per_second": 0.13
|
21 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.2519006322069866,
|
4 |
+
"train_runtime": 787.0698,
|
5 |
+
"train_samples": 10477,
|
6 |
+
"train_samples_per_second": 13.311,
|
7 |
+
"train_steps_per_second": 0.208
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4939116424
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e3bf12878dd219177ecbd7042879cc4e9c7ee0478427bc28d5cd74360bc1c66
|
3 |
size 4939116424
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4947390880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfe885f61c314c6938f605dc911c1de2fe4bbbb4426c99f25c33f776e6ac3dcc
|
3 |
size 4947390880
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3590619888
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4779ffacf1ced93b0fae7c156d31864dc9b11c079ff1599f2ad368083d507fef
|
3 |
size 3590619888
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.2519006322069866,
|
4 |
+
"train_runtime": 787.0698,
|
5 |
+
"train_samples": 10477,
|
6 |
+
"train_samples_per_second": 13.311,
|
7 |
+
"train_steps_per_second": 0.208
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate":
|
15 |
-
"logits/chosen": -1.
|
16 |
-
"logits/rejected": -1.
|
17 |
-
"logps/chosen": -
|
18 |
-
"logps/rejected": -
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
@@ -24,79 +24,275 @@
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"grad_norm":
|
29 |
-
"learning_rate":
|
30 |
-
"logits/chosen": -1.
|
31 |
-
"logits/rejected": -1.
|
32 |
-
"logps/chosen": -
|
33 |
-
"logps/rejected": -
|
34 |
-
"loss": 0.
|
35 |
-
"rewards/accuracies": 0.
|
36 |
-
"rewards/chosen":
|
37 |
-
"rewards/margins": 0.
|
38 |
-
"rewards/rejected": -0.
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
-
"epoch":
|
43 |
-
"grad_norm":
|
44 |
-
"learning_rate":
|
45 |
-
"logits/chosen": -1.
|
46 |
-
"logits/rejected": -1.
|
47 |
-
"logps/chosen": -
|
48 |
-
"logps/rejected": -
|
49 |
-
"loss": 0.
|
50 |
-
"rewards/accuracies": 0.
|
51 |
-
"rewards/chosen": 0.
|
52 |
-
"rewards/margins": 0.
|
53 |
-
"rewards/rejected":
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
-
"epoch":
|
58 |
-
"grad_norm":
|
59 |
-
"learning_rate":
|
60 |
-
"logits/chosen": -1.
|
61 |
-
"logits/rejected": -1.
|
62 |
-
"logps/chosen": -
|
63 |
-
"logps/rejected": -
|
64 |
-
"loss": 0.
|
65 |
-
"rewards/accuracies":
|
66 |
-
"rewards/chosen": 0.
|
67 |
-
"rewards/margins": 0.
|
68 |
-
"rewards/rejected":
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
-
"epoch":
|
73 |
-
"grad_norm":
|
74 |
-
"learning_rate":
|
75 |
-
"logits/chosen": -1.
|
76 |
-
"logits/rejected": -1.
|
77 |
-
"logps/chosen": -
|
78 |
-
"logps/rejected": -
|
79 |
-
"loss": 0.
|
80 |
-
"rewards/accuracies":
|
81 |
-
"rewards/chosen": 0.
|
82 |
-
"rewards/margins": 0.
|
83 |
-
"rewards/rejected":
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
-
"epoch": 3
|
88 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
"total_flos": 0.0,
|
90 |
-
"train_loss": 0.
|
91 |
-
"train_runtime":
|
92 |
-
"train_samples_per_second":
|
93 |
-
"train_steps_per_second": 0.
|
94 |
}
|
95 |
],
|
96 |
"logging_steps": 10,
|
97 |
-
"max_steps":
|
98 |
"num_input_tokens_seen": 0,
|
99 |
-
"num_train_epochs":
|
100 |
"save_steps": 100,
|
101 |
"total_flos": 0.0,
|
102 |
"train_batch_size": 8,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 164,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.01,
|
13 |
+
"grad_norm": 6.5958876428735564,
|
14 |
+
"learning_rate": 2.941176470588235e-08,
|
15 |
+
"logits/chosen": -1.6130714416503906,
|
16 |
+
"logits/rejected": -1.7848026752471924,
|
17 |
+
"logps/chosen": -143.55209350585938,
|
18 |
+
"logps/rejected": -137.43441772460938,
|
19 |
"loss": 0.6931,
|
20 |
"rewards/accuracies": 0.0,
|
21 |
"rewards/chosen": 0.0,
|
|
|
24 |
"step": 1
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.06,
|
28 |
+
"grad_norm": 5.967532383605112,
|
29 |
+
"learning_rate": 2.941176470588235e-07,
|
30 |
+
"logits/chosen": -1.8283494710922241,
|
31 |
+
"logits/rejected": -1.7852643728256226,
|
32 |
+
"logps/chosen": -158.81536865234375,
|
33 |
+
"logps/rejected": -151.6327362060547,
|
34 |
+
"loss": 0.693,
|
35 |
+
"rewards/accuracies": 0.4722222089767456,
|
36 |
+
"rewards/chosen": 8.654648991068825e-05,
|
37 |
+
"rewards/margins": 0.0005829257424920797,
|
38 |
+
"rewards/rejected": -0.0004963793326169252,
|
39 |
"step": 10
|
40 |
},
|
41 |
{
|
42 |
+
"epoch": 0.12,
|
43 |
+
"grad_norm": 5.606818404653461,
|
44 |
+
"learning_rate": 4.994863481875841e-07,
|
45 |
+
"logits/chosen": -1.8151414394378662,
|
46 |
+
"logits/rejected": -1.7734615802764893,
|
47 |
+
"logps/chosen": -151.97584533691406,
|
48 |
+
"logps/rejected": -164.20437622070312,
|
49 |
+
"loss": 0.6923,
|
50 |
+
"rewards/accuracies": 0.581250011920929,
|
51 |
+
"rewards/chosen": 0.003920617047697306,
|
52 |
+
"rewards/margins": 0.0024364024866372347,
|
53 |
+
"rewards/rejected": 0.001484214561060071,
|
54 |
"step": 20
|
55 |
},
|
56 |
{
|
57 |
+
"epoch": 0.18,
|
58 |
+
"grad_norm": 6.452038531330129,
|
59 |
+
"learning_rate": 4.904133592102591e-07,
|
60 |
+
"logits/chosen": -1.8305763006210327,
|
61 |
+
"logits/rejected": -1.7172702550888062,
|
62 |
+
"logps/chosen": -154.3677520751953,
|
63 |
+
"logps/rejected": -148.50753784179688,
|
64 |
+
"loss": 0.6882,
|
65 |
+
"rewards/accuracies": 0.6937500238418579,
|
66 |
+
"rewards/chosen": 0.016027290374040604,
|
67 |
+
"rewards/margins": 0.00950100552290678,
|
68 |
+
"rewards/rejected": 0.006526285316795111,
|
69 |
"step": 30
|
70 |
},
|
71 |
{
|
72 |
+
"epoch": 0.24,
|
73 |
+
"grad_norm": 6.2953570308846825,
|
74 |
+
"learning_rate": 4.704015606870022e-07,
|
75 |
+
"logits/chosen": -1.7697455883026123,
|
76 |
+
"logits/rejected": -1.7966588735580444,
|
77 |
+
"logps/chosen": -143.58848571777344,
|
78 |
+
"logps/rejected": -166.49522399902344,
|
79 |
+
"loss": 0.6829,
|
80 |
+
"rewards/accuracies": 0.675000011920929,
|
81 |
+
"rewards/chosen": 0.03490619733929634,
|
82 |
+
"rewards/margins": 0.02003355883061886,
|
83 |
+
"rewards/rejected": 0.014872634783387184,
|
84 |
"step": 40
|
85 |
},
|
86 |
{
|
87 |
+
"epoch": 0.3,
|
88 |
+
"grad_norm": 6.274119591898531,
|
89 |
+
"learning_rate": 4.4036148959228356e-07,
|
90 |
+
"logits/chosen": -1.7394487857818604,
|
91 |
+
"logits/rejected": -1.804693579673767,
|
92 |
+
"logps/chosen": -159.61492919921875,
|
93 |
+
"logps/rejected": -136.1581268310547,
|
94 |
+
"loss": 0.6763,
|
95 |
+
"rewards/accuracies": 0.8062499761581421,
|
96 |
+
"rewards/chosen": 0.06023404002189636,
|
97 |
+
"rewards/margins": 0.042321957647800446,
|
98 |
+
"rewards/rejected": 0.017912080511450768,
|
99 |
+
"step": 50
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 0.37,
|
103 |
+
"grad_norm": 6.180992532830828,
|
104 |
+
"learning_rate": 4.016599693735638e-07,
|
105 |
+
"logits/chosen": -1.6605278253555298,
|
106 |
+
"logits/rejected": -1.724905252456665,
|
107 |
+
"logps/chosen": -146.7899932861328,
|
108 |
+
"logps/rejected": -148.02505493164062,
|
109 |
+
"loss": 0.6733,
|
110 |
+
"rewards/accuracies": 0.762499988079071,
|
111 |
+
"rewards/chosen": 0.0686994269490242,
|
112 |
+
"rewards/margins": 0.04312276840209961,
|
113 |
+
"rewards/rejected": 0.02557666040956974,
|
114 |
+
"step": 60
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.43,
|
118 |
+
"grad_norm": 5.590599679916071,
|
119 |
+
"learning_rate": 3.5605791947475926e-07,
|
120 |
+
"logits/chosen": -1.7533237934112549,
|
121 |
+
"logits/rejected": -1.702845811843872,
|
122 |
+
"logps/chosen": -146.6136474609375,
|
123 |
+
"logps/rejected": -140.97921752929688,
|
124 |
+
"loss": 0.6631,
|
125 |
+
"rewards/accuracies": 0.7250000238418579,
|
126 |
+
"rewards/chosen": 0.07920090854167938,
|
127 |
+
"rewards/margins": 0.053236376494169235,
|
128 |
+
"rewards/rejected": 0.0259645227342844,
|
129 |
+
"step": 70
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 0.49,
|
133 |
+
"grad_norm": 5.096416269116106,
|
134 |
+
"learning_rate": 3.056302334890786e-07,
|
135 |
+
"logits/chosen": -1.616193413734436,
|
136 |
+
"logits/rejected": -1.6094154119491577,
|
137 |
+
"logps/chosen": -142.79188537597656,
|
138 |
+
"logps/rejected": -140.85447692871094,
|
139 |
+
"loss": 0.6609,
|
140 |
+
"rewards/accuracies": 0.78125,
|
141 |
+
"rewards/chosen": 0.09861920028924942,
|
142 |
+
"rewards/margins": 0.0706188827753067,
|
143 |
+
"rewards/rejected": 0.028000324964523315,
|
144 |
+
"step": 80
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 0.55,
|
148 |
+
"grad_norm": 5.517912420297569,
|
149 |
+
"learning_rate": 2.526713714858433e-07,
|
150 |
+
"logits/chosen": -1.608278512954712,
|
151 |
+
"logits/rejected": -1.5585658550262451,
|
152 |
+
"logps/chosen": -132.39981079101562,
|
153 |
+
"logps/rejected": -143.10488891601562,
|
154 |
+
"loss": 0.6557,
|
155 |
+
"rewards/accuracies": 0.7562500238418579,
|
156 |
+
"rewards/chosen": 0.1142318844795227,
|
157 |
+
"rewards/margins": 0.07896542549133301,
|
158 |
+
"rewards/rejected": 0.0352664515376091,
|
159 |
+
"step": 90
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 0.61,
|
163 |
+
"grad_norm": 5.179137970855667,
|
164 |
+
"learning_rate": 1.9959096206109175e-07,
|
165 |
+
"logits/chosen": -1.5899827480316162,
|
166 |
+
"logits/rejected": -1.5742290019989014,
|
167 |
+
"logps/chosen": -136.0356903076172,
|
168 |
+
"logps/rejected": -162.7815704345703,
|
169 |
+
"loss": 0.6508,
|
170 |
+
"rewards/accuracies": 0.7562500238418579,
|
171 |
+
"rewards/chosen": 0.11762702465057373,
|
172 |
+
"rewards/margins": 0.08622404932975769,
|
173 |
+
"rewards/rejected": 0.03140297532081604,
|
174 |
+
"step": 100
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 0.61,
|
178 |
+
"eval_logits/chosen": -1.6967989206314087,
|
179 |
+
"eval_logits/rejected": -1.6722551584243774,
|
180 |
+
"eval_logps/chosen": -158.87005615234375,
|
181 |
+
"eval_logps/rejected": -170.24278259277344,
|
182 |
+
"eval_loss": 0.6690559983253479,
|
183 |
+
"eval_rewards/accuracies": 0.6940954923629761,
|
184 |
+
"eval_rewards/chosen": 0.07056128978729248,
|
185 |
+
"eval_rewards/margins": 0.050339534878730774,
|
186 |
+
"eval_rewards/rejected": 0.020221758633852005,
|
187 |
+
"eval_runtime": 1977.6877,
|
188 |
+
"eval_samples_per_second": 9.659,
|
189 |
+
"eval_steps_per_second": 0.302,
|
190 |
+
"step": 100
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"epoch": 0.67,
|
194 |
+
"grad_norm": 5.410829812028072,
|
195 |
+
"learning_rate": 1.4880416421940154e-07,
|
196 |
+
"logits/chosen": -1.6502714157104492,
|
197 |
+
"logits/rejected": -1.6523603200912476,
|
198 |
+
"logps/chosen": -134.38687133789062,
|
199 |
+
"logps/rejected": -157.00936889648438,
|
200 |
+
"loss": 0.6512,
|
201 |
+
"rewards/accuracies": 0.8062499761581421,
|
202 |
+
"rewards/chosen": 0.11995081603527069,
|
203 |
+
"rewards/margins": 0.09394902735948563,
|
204 |
+
"rewards/rejected": 0.026001790538430214,
|
205 |
+
"step": 110
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.73,
|
209 |
+
"grad_norm": 5.845780336717107,
|
210 |
+
"learning_rate": 1.0262177762208507e-07,
|
211 |
+
"logits/chosen": -1.565212607383728,
|
212 |
+
"logits/rejected": -1.6423566341400146,
|
213 |
+
"logps/chosen": -143.96304321289062,
|
214 |
+
"logps/rejected": -149.28546142578125,
|
215 |
+
"loss": 0.6496,
|
216 |
+
"rewards/accuracies": 0.7875000238418579,
|
217 |
+
"rewards/chosen": 0.13488885760307312,
|
218 |
+
"rewards/margins": 0.10831846296787262,
|
219 |
+
"rewards/rejected": 0.026570383459329605,
|
220 |
+
"step": 120
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"epoch": 0.79,
|
224 |
+
"grad_norm": 5.76403048084688,
|
225 |
+
"learning_rate": 6.31451011862412e-08,
|
226 |
+
"logits/chosen": -1.6332323551177979,
|
227 |
+
"logits/rejected": -1.6044152975082397,
|
228 |
+
"logps/chosen": -137.62985229492188,
|
229 |
+
"logps/rejected": -159.90980529785156,
|
230 |
+
"loss": 0.6439,
|
231 |
+
"rewards/accuracies": 0.737500011920929,
|
232 |
+
"rewards/chosen": 0.13013367354869843,
|
233 |
+
"rewards/margins": 0.10071909427642822,
|
234 |
+
"rewards/rejected": 0.02941458486020565,
|
235 |
+
"step": 130
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"epoch": 0.85,
|
239 |
+
"grad_norm": 5.119446644831888,
|
240 |
+
"learning_rate": 3.217032396915265e-08,
|
241 |
+
"logits/chosen": -1.569746971130371,
|
242 |
+
"logits/rejected": -1.6146259307861328,
|
243 |
+
"logps/chosen": -130.83258056640625,
|
244 |
+
"logps/rejected": -160.59701538085938,
|
245 |
+
"loss": 0.6439,
|
246 |
+
"rewards/accuracies": 0.78125,
|
247 |
+
"rewards/chosen": 0.1322535276412964,
|
248 |
+
"rewards/margins": 0.10249896347522736,
|
249 |
+
"rewards/rejected": 0.029754554852843285,
|
250 |
+
"step": 140
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"epoch": 0.91,
|
254 |
+
"grad_norm": 5.590191167835734,
|
255 |
+
"learning_rate": 1.1106798553464802e-08,
|
256 |
+
"logits/chosen": -1.6109774112701416,
|
257 |
+
"logits/rejected": -1.607143759727478,
|
258 |
+
"logps/chosen": -145.5422821044922,
|
259 |
+
"logps/rejected": -155.8082733154297,
|
260 |
+
"loss": 0.6426,
|
261 |
+
"rewards/accuracies": 0.8062499761581421,
|
262 |
+
"rewards/chosen": 0.14719954133033752,
|
263 |
+
"rewards/margins": 0.11081697046756744,
|
264 |
+
"rewards/rejected": 0.03638254478573799,
|
265 |
+
"step": 150
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"epoch": 0.98,
|
269 |
+
"grad_norm": 5.417981503927173,
|
270 |
+
"learning_rate": 9.129154946982687e-10,
|
271 |
+
"logits/chosen": -1.5755327939987183,
|
272 |
+
"logits/rejected": -1.6533405780792236,
|
273 |
+
"logps/chosen": -144.75936889648438,
|
274 |
+
"logps/rejected": -150.3732452392578,
|
275 |
+
"loss": 0.6439,
|
276 |
+
"rewards/accuracies": 0.8062499761581421,
|
277 |
+
"rewards/chosen": 0.1261114478111267,
|
278 |
+
"rewards/margins": 0.10229575634002686,
|
279 |
+
"rewards/rejected": 0.023815687745809555,
|
280 |
+
"step": 160
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"epoch": 1.0,
|
284 |
+
"step": 164,
|
285 |
"total_flos": 0.0,
|
286 |
+
"train_loss": 0.2519006322069866,
|
287 |
+
"train_runtime": 787.0698,
|
288 |
+
"train_samples_per_second": 13.311,
|
289 |
+
"train_steps_per_second": 0.208
|
290 |
}
|
291 |
],
|
292 |
"logging_steps": 10,
|
293 |
+
"max_steps": 164,
|
294 |
"num_input_tokens_seen": 0,
|
295 |
+
"num_train_epochs": 1,
|
296 |
"save_steps": 100,
|
297 |
"total_flos": 0.0,
|
298 |
"train_batch_size": 8,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28f15ca60ba3b07f2027c55b34635f9cf9250401baa46a13461b5c1fbbfdd3f5
|
3 |
size 6264
|