beamaia commited on
Commit
806c688
·
verified ·
1 Parent(s): d3c03c3

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,9 +20,9 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
  "k_proj",
25
  "v_proj",
 
26
  "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
  "v_proj",
25
+ "q_proj",
26
  "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f30e5cab4159743259ea25988060ddd0796aa045be1b0d895d433c3835a71464
3
  size 872450448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625e1afaabf27f3c4b4d662331681428569385509fbeaa4f2b10ade303f513d2
3
  size 872450448
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17e4f2ce1e411941009ba9612f26c008414cb32e1d3562295057d2e3d6edaea8
3
  size 1745047034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b72dc8562e0b97c6f460f11d8faeb9872f3f13223f56b9ab73829f958e960e3
3
  size 1745047034
checkpoint-100/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 0.47333332896232605,
3
- "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.15-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
@@ -12,39 +12,39 @@
12
  "epoch": 0.14,
13
  "grad_norm": 0.0,
14
  "learning_rate": 0.0001785714285714286,
15
- "loss": 0.4625,
16
  "step": 20,
17
- "train/kl": 0.0,
18
- "train/logps/chosen": -1776.8545590753424,
19
- "train/logps/rejected": -1942.4822198275863,
20
- "train/rewards/chosen": -150.52799925085617,
21
- "train/rewards/margins": 14.066132394258773,
22
- "train/rewards/rejected": -164.59413164511494
23
  },
24
  {
25
  "epoch": 0.27,
26
  "grad_norm": 0.0,
27
  "learning_rate": 0.00015000000000000001,
28
- "loss": 0.5125,
29
  "step": 40,
30
  "train/kl": 0.0,
31
- "train/logps/chosen": -2509.815929878049,
32
- "train/logps/rejected": -2293.327123397436,
33
- "train/rewards/chosen": -221.2838700457317,
34
- "train/rewards/margins": -20.546890879065046,
35
- "train/rewards/rejected": -200.73697916666666
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval/kl": 0.0,
40
- "eval/logps/chosen": -2216.0310299295775,
41
- "eval/logps/rejected": -2032.4711234177216,
42
- "eval/rewards/chosen": -193.23674075704224,
43
- "eval/rewards/margins": -16.426811959573882,
44
- "eval/rewards/rejected": -176.80992879746836,
45
  "eval_loss": 0.47333332896232605,
46
- "eval_runtime": 140.9169,
47
- "eval_samples_per_second": 2.129,
48
  "eval_steps_per_second": 0.532,
49
  "step": 50
50
  },
@@ -52,52 +52,52 @@
52
  "epoch": 0.41,
53
  "grad_norm": 0.0,
54
  "learning_rate": 0.00012142857142857143,
55
- "loss": 0.5344,
56
  "step": 60,
57
  "train/kl": 0.0,
58
- "train/logps/chosen": -2424.8951023391814,
59
- "train/logps/rejected": -2159.9033137583892,
60
- "train/rewards/chosen": -213.98085709064327,
61
- "train/rewards/margins": -25.586968143495625,
62
- "train/rewards/rejected": -188.39388894714764
63
  },
64
  {
65
  "epoch": 0.55,
66
  "grad_norm": 0.0,
67
  "learning_rate": 9.285714285714286e-05,
68
- "loss": 0.4469,
69
  "step": 80,
70
  "train/kl": 0.0,
71
- "train/logps/chosen": -2407.7829982517483,
72
- "train/logps/rejected": -2151.3718220338983,
73
- "train/rewards/chosen": -211.3436680506993,
74
- "train/rewards/margins": -24.664863319060885,
75
- "train/rewards/rejected": -186.67880473163842
76
  },
77
  {
78
  "epoch": 0.68,
79
  "grad_norm": 0.0,
80
  "learning_rate": 6.428571428571429e-05,
81
- "loss": 0.4281,
82
  "step": 100,
83
  "train/kl": 0.0,
84
- "train/logps/chosen": -2460.182253649635,
85
- "train/logps/rejected": -2252.468920765027,
86
- "train/rewards/chosen": -215.44699475364965,
87
- "train/rewards/margins": -18.727454965398294,
88
- "train/rewards/rejected": -196.71953978825135
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval/kl": 0.0,
93
- "eval/logps/chosen": -2217.5072623239435,
94
- "eval/logps/rejected": -2035.4477848101267,
95
- "eval/rewards/chosen": -193.3844080105634,
96
- "eval/rewards/margins": -16.276813073854527,
97
- "eval/rewards/rejected": -177.10759493670886,
98
  "eval_loss": 0.47333332896232605,
99
- "eval_runtime": 140.8307,
100
- "eval_samples_per_second": 2.13,
101
  "eval_steps_per_second": 0.533,
102
  "step": 100
103
  }
 
1
  {
2
  "best_metric": 0.47333332896232605,
3
+ "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.15-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
 
12
  "epoch": 0.14,
13
  "grad_norm": 0.0,
14
  "learning_rate": 0.0001785714285714286,
15
+ "loss": 0.4711,
16
  "step": 20,
17
+ "train/kl": 5.843189239501953,
18
+ "train/logps/chosen": -1172.703515625,
19
+ "train/logps/rejected": -1156.49736328125,
20
+ "train/rewards/chosen": -88.494970703125,
21
+ "train/rewards/margins": -1.6718872070312614,
22
+ "train/rewards/rejected": -86.82308349609374
23
  },
24
  {
25
  "epoch": 0.27,
26
  "grad_norm": 0.0,
27
  "learning_rate": 0.00015000000000000001,
28
+ "loss": 0.4437,
29
  "step": 40,
30
  "train/kl": 0.0,
31
+ "train/logps/chosen": -2424.414392605634,
32
+ "train/logps/rejected": -2336.605688202247,
33
+ "train/rewards/chosen": -215.21177651848592,
34
+ "train/rewards/margins": -9.581421883654457,
35
+ "train/rewards/rejected": -205.63035463483146
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval/kl": 0.0,
40
+ "eval/logps/chosen": -2342.893926056338,
41
+ "eval/logps/rejected": -2118.181566455696,
42
+ "eval/rewards/chosen": -205.92391065140845,
43
+ "eval/rewards/margins": -20.54295238400337,
44
+ "eval/rewards/rejected": -185.38095826740508,
45
  "eval_loss": 0.47333332896232605,
46
+ "eval_runtime": 140.8471,
47
+ "eval_samples_per_second": 2.13,
48
  "eval_steps_per_second": 0.532,
49
  "step": 50
50
  },
 
52
  "epoch": 0.41,
53
  "grad_norm": 0.0,
54
  "learning_rate": 0.00012142857142857143,
55
+ "loss": 0.4594,
56
  "step": 60,
57
  "train/kl": 0.0,
58
+ "train/logps/chosen": -2388.497661564626,
59
+ "train/logps/rejected": -2377.544617052023,
60
+ "train/rewards/chosen": -211.77136479591837,
61
+ "train/rewards/margins": -3.213425850831669,
62
+ "train/rewards/rejected": -208.5579389450867
63
  },
64
  {
65
  "epoch": 0.55,
66
  "grad_norm": 0.0,
67
  "learning_rate": 9.285714285714286e-05,
68
+ "loss": 0.4656,
69
  "step": 80,
70
  "train/kl": 0.0,
71
+ "train/logps/chosen": -2349.194211409396,
72
+ "train/logps/rejected": -2324.7878289473683,
73
+ "train/rewards/chosen": -207.3123295931208,
74
+ "train/rewards/margins": -3.7029089059863054,
75
+ "train/rewards/rejected": -203.6094206871345
76
  },
77
  {
78
  "epoch": 0.68,
79
  "grad_norm": 0.0,
80
  "learning_rate": 6.428571428571429e-05,
81
+ "loss": 0.4469,
82
  "step": 100,
83
  "train/kl": 0.0,
84
+ "train/logps/chosen": -2737.1844405594406,
85
+ "train/logps/rejected": -2257.276836158192,
86
+ "train/rewards/chosen": -243.65840799825176,
87
+ "train/rewards/margins": -45.40216241209356,
88
+ "train/rewards/rejected": -198.2562455861582
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval/kl": 0.0,
93
+ "eval/logps/chosen": -2343.730193661972,
94
+ "eval/logps/rejected": -2118.9036787974683,
95
+ "eval/rewards/chosen": -206.00756492077466,
96
+ "eval/rewards/margins": -20.55440283612276,
97
+ "eval/rewards/rejected": -185.4531620846519,
98
  "eval_loss": 0.47333332896232605,
99
+ "eval_runtime": 140.7532,
100
+ "eval_samples_per_second": 2.131,
101
  "eval_steps_per_second": 0.533,
102
  "step": 100
103
  }
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8af4fa13b4cd94fa3928354a40920f92728810d4e9e99adc8fe770546867309
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4967bdcccc06e58769a9d818abb4a59d6b51f317661e09036a6f536bee3b86b2
3
  size 5688