sedrickkeh commited on
Commit
fd86056
·
verified ·
1 Parent(s): fe81af6

Training in progress, epoch 0

Browse files
config.json CHANGED
@@ -30,7 +30,7 @@
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
- "transformers_version": "4.45.2",
34
  "use_cache": false,
35
  "vocab_size": 128256
36
  }
 
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.46.1",
34
  "use_cache": false,
35
  "vocab_size": 128256
36
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:988b9ff4ccd49b507baa3e95d5682b1f3b3b20556c19347a9c5ff7829c5e0fc8
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066b3a7a5cf27a05c66469227322ea54d585bd7dd867095f6609c2314a9aed6c
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eb01805cd0e0f3d1e2f79859de903ca844b38444c1da423c4e656c2fc617958
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae10e4075b7357ae976c42cb7a4c966165b2b685a6bc0b7c2b71d8f642ed7c9
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:370298f2442557c24519569c5e25f4271e2c5754428d58d47349955ce4f6aabd
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c1823d209ec9ea7c7004f85d603ae41fbe9d65e557568f49837598e1543756
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9213b31e873a85ff740cb0060c79ff1195f2a9eb92e79b9e96bcbe1678cdd9ff
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3609e213adee70875805ab34294ced85b87f699c57bfaa35e2fd2ae7582553b
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,43 +1,43 @@
1
- {"current_steps": 10, "total_steps": 1266, "loss": 0.8891, "learning_rate": 5e-06, "epoch": 0.02365464222353637, "percentage": 0.79, "elapsed_time": "0:04:56", "remaining_time": "10:20:25"}
2
- {"current_steps": 20, "total_steps": 1266, "loss": 0.7949, "learning_rate": 5e-06, "epoch": 0.04730928444707274, "percentage": 1.58, "elapsed_time": "0:09:49", "remaining_time": "10:11:35"}
3
- {"current_steps": 30, "total_steps": 1266, "loss": 0.7599, "learning_rate": 5e-06, "epoch": 0.0709639266706091, "percentage": 2.37, "elapsed_time": "0:14:42", "remaining_time": "10:05:51"}
4
- {"current_steps": 40, "total_steps": 1266, "loss": 0.7391, "learning_rate": 5e-06, "epoch": 0.09461856889414548, "percentage": 3.16, "elapsed_time": "0:19:35", "remaining_time": "10:00:28"}
5
- {"current_steps": 50, "total_steps": 1266, "loss": 0.7272, "learning_rate": 5e-06, "epoch": 0.11827321111768184, "percentage": 3.95, "elapsed_time": "0:24:28", "remaining_time": "9:55:21"}
6
- {"current_steps": 60, "total_steps": 1266, "loss": 0.7159, "learning_rate": 5e-06, "epoch": 0.1419278533412182, "percentage": 4.74, "elapsed_time": "0:29:21", "remaining_time": "9:50:14"}
7
- {"current_steps": 70, "total_steps": 1266, "loss": 0.7118, "learning_rate": 5e-06, "epoch": 0.16558249556475457, "percentage": 5.53, "elapsed_time": "0:34:14", "remaining_time": "9:44:54"}
8
- {"current_steps": 80, "total_steps": 1266, "loss": 0.7019, "learning_rate": 5e-06, "epoch": 0.18923713778829096, "percentage": 6.32, "elapsed_time": "0:39:07", "remaining_time": "9:40:00"}
9
- {"current_steps": 90, "total_steps": 1266, "loss": 0.6872, "learning_rate": 5e-06, "epoch": 0.21289178001182732, "percentage": 7.11, "elapsed_time": "0:44:00", "remaining_time": "9:35:06"}
10
- {"current_steps": 100, "total_steps": 1266, "loss": 0.6853, "learning_rate": 5e-06, "epoch": 0.23654642223536368, "percentage": 7.9, "elapsed_time": "0:48:54", "remaining_time": "9:30:11"}
11
- {"current_steps": 110, "total_steps": 1266, "loss": 0.6909, "learning_rate": 5e-06, "epoch": 0.26020106445890007, "percentage": 8.69, "elapsed_time": "0:53:47", "remaining_time": "9:25:19"}
12
- {"current_steps": 120, "total_steps": 1266, "loss": 0.6782, "learning_rate": 5e-06, "epoch": 0.2838557066824364, "percentage": 9.48, "elapsed_time": "0:58:40", "remaining_time": "9:20:24"}
13
- {"current_steps": 130, "total_steps": 1266, "loss": 0.6744, "learning_rate": 5e-06, "epoch": 0.3075103489059728, "percentage": 10.27, "elapsed_time": "1:03:34", "remaining_time": "9:15:31"}
14
- {"current_steps": 140, "total_steps": 1266, "loss": 0.6828, "learning_rate": 5e-06, "epoch": 0.33116499112950915, "percentage": 11.06, "elapsed_time": "1:08:27", "remaining_time": "9:10:38"}
15
- {"current_steps": 150, "total_steps": 1266, "loss": 0.6661, "learning_rate": 5e-06, "epoch": 0.35481963335304556, "percentage": 11.85, "elapsed_time": "1:13:20", "remaining_time": "9:05:40"}
16
- {"current_steps": 160, "total_steps": 1266, "loss": 0.6701, "learning_rate": 5e-06, "epoch": 0.3784742755765819, "percentage": 12.64, "elapsed_time": "1:18:14", "remaining_time": "9:00:47"}
17
- {"current_steps": 170, "total_steps": 1266, "loss": 0.6677, "learning_rate": 5e-06, "epoch": 0.4021289178001183, "percentage": 13.43, "elapsed_time": "1:23:07", "remaining_time": "8:55:54"}
18
- {"current_steps": 180, "total_steps": 1266, "loss": 0.6742, "learning_rate": 5e-06, "epoch": 0.42578356002365464, "percentage": 14.22, "elapsed_time": "1:28:00", "remaining_time": "8:51:01"}
19
- {"current_steps": 190, "total_steps": 1266, "loss": 0.6711, "learning_rate": 5e-06, "epoch": 0.449438202247191, "percentage": 15.01, "elapsed_time": "1:32:54", "remaining_time": "8:46:08"}
20
- {"current_steps": 200, "total_steps": 1266, "loss": 0.6689, "learning_rate": 5e-06, "epoch": 0.47309284447072736, "percentage": 15.8, "elapsed_time": "1:37:47", "remaining_time": "8:41:15"}
21
- {"current_steps": 210, "total_steps": 1266, "loss": 0.6707, "learning_rate": 5e-06, "epoch": 0.4967474866942638, "percentage": 16.59, "elapsed_time": "1:42:40", "remaining_time": "8:36:20"}
22
- {"current_steps": 220, "total_steps": 1266, "loss": 0.6699, "learning_rate": 5e-06, "epoch": 0.5204021289178001, "percentage": 17.38, "elapsed_time": "1:47:33", "remaining_time": "8:31:25"}
23
- {"current_steps": 230, "total_steps": 1266, "loss": 0.6615, "learning_rate": 5e-06, "epoch": 0.5440567711413364, "percentage": 18.17, "elapsed_time": "1:52:27", "remaining_time": "8:26:31"}
24
- {"current_steps": 240, "total_steps": 1266, "loss": 0.6657, "learning_rate": 5e-06, "epoch": 0.5677114133648729, "percentage": 18.96, "elapsed_time": "1:57:19", "remaining_time": "8:21:34"}
25
- {"current_steps": 250, "total_steps": 1266, "loss": 0.6633, "learning_rate": 5e-06, "epoch": 0.5913660555884093, "percentage": 19.75, "elapsed_time": "2:02:12", "remaining_time": "8:16:39"}
26
- {"current_steps": 260, "total_steps": 1266, "loss": 0.6531, "learning_rate": 5e-06, "epoch": 0.6150206978119456, "percentage": 20.54, "elapsed_time": "2:07:05", "remaining_time": "8:11:46"}
27
- {"current_steps": 270, "total_steps": 1266, "loss": 0.6652, "learning_rate": 5e-06, "epoch": 0.638675340035482, "percentage": 21.33, "elapsed_time": "2:11:59", "remaining_time": "8:06:52"}
28
- {"current_steps": 280, "total_steps": 1266, "loss": 0.6676, "learning_rate": 5e-06, "epoch": 0.6623299822590183, "percentage": 22.12, "elapsed_time": "2:16:52", "remaining_time": "8:01:59"}
29
- {"current_steps": 290, "total_steps": 1266, "loss": 0.6601, "learning_rate": 5e-06, "epoch": 0.6859846244825547, "percentage": 22.91, "elapsed_time": "2:21:45", "remaining_time": "7:57:05"}
30
- {"current_steps": 300, "total_steps": 1266, "loss": 0.6554, "learning_rate": 5e-06, "epoch": 0.7096392667060911, "percentage": 23.7, "elapsed_time": "2:26:37", "remaining_time": "7:52:08"}
31
- {"current_steps": 310, "total_steps": 1266, "loss": 0.6617, "learning_rate": 5e-06, "epoch": 0.7332939089296274, "percentage": 24.49, "elapsed_time": "2:31:30", "remaining_time": "7:47:12"}
32
- {"current_steps": 320, "total_steps": 1266, "loss": 0.6563, "learning_rate": 5e-06, "epoch": 0.7569485511531638, "percentage": 25.28, "elapsed_time": "2:36:22", "remaining_time": "7:42:16"}
33
- {"current_steps": 330, "total_steps": 1266, "loss": 0.654, "learning_rate": 5e-06, "epoch": 0.7806031933767001, "percentage": 26.07, "elapsed_time": "2:41:14", "remaining_time": "7:37:20"}
34
- {"current_steps": 340, "total_steps": 1266, "loss": 0.6543, "learning_rate": 5e-06, "epoch": 0.8042578356002366, "percentage": 26.86, "elapsed_time": "2:46:07", "remaining_time": "7:32:26"}
35
- {"current_steps": 350, "total_steps": 1266, "loss": 0.6621, "learning_rate": 5e-06, "epoch": 0.8279124778237729, "percentage": 27.65, "elapsed_time": "2:51:00", "remaining_time": "7:27:33"}
36
- {"current_steps": 360, "total_steps": 1266, "loss": 0.6546, "learning_rate": 5e-06, "epoch": 0.8515671200473093, "percentage": 28.44, "elapsed_time": "2:55:53", "remaining_time": "7:22:39"}
37
- {"current_steps": 370, "total_steps": 1266, "loss": 0.6576, "learning_rate": 5e-06, "epoch": 0.8752217622708457, "percentage": 29.23, "elapsed_time": "3:00:46", "remaining_time": "7:17:45"}
38
- {"current_steps": 380, "total_steps": 1266, "loss": 0.6489, "learning_rate": 5e-06, "epoch": 0.898876404494382, "percentage": 30.02, "elapsed_time": "3:05:39", "remaining_time": "7:12:52"}
39
- {"current_steps": 390, "total_steps": 1266, "loss": 0.6548, "learning_rate": 5e-06, "epoch": 0.9225310467179184, "percentage": 30.81, "elapsed_time": "3:10:32", "remaining_time": "7:07:58"}
40
- {"current_steps": 400, "total_steps": 1266, "loss": 0.6473, "learning_rate": 5e-06, "epoch": 0.9461856889414547, "percentage": 31.6, "elapsed_time": "3:15:25", "remaining_time": "7:03:06"}
41
- {"current_steps": 410, "total_steps": 1266, "loss": 0.6504, "learning_rate": 5e-06, "epoch": 0.9698403311649911, "percentage": 32.39, "elapsed_time": "3:20:19", "remaining_time": "6:58:13"}
42
- {"current_steps": 420, "total_steps": 1266, "loss": 0.6489, "learning_rate": 5e-06, "epoch": 0.9934949733885275, "percentage": 33.18, "elapsed_time": "3:25:12", "remaining_time": "6:53:21"}
43
- {"current_steps": 422, "total_steps": 1266, "eval_loss": 0.6508141756057739, "epoch": 0.9982259018332348, "percentage": 33.33, "elapsed_time": "3:30:19", "remaining_time": "7:00:38"}
 
1
+ {"current_steps": 10, "total_steps": 1266, "loss": 0.8897, "lr": 5e-06, "epoch": 0.023661638568470866, "percentage": 0.79, "elapsed_time": "0:05:26", "remaining_time": "11:24:07"}
2
+ {"current_steps": 20, "total_steps": 1266, "loss": 0.7877, "lr": 5e-06, "epoch": 0.04732327713694173, "percentage": 1.58, "elapsed_time": "0:10:48", "remaining_time": "11:13:38"}
3
+ {"current_steps": 30, "total_steps": 1266, "loss": 0.7533, "lr": 5e-06, "epoch": 0.0709849157054126, "percentage": 2.37, "elapsed_time": "0:16:10", "remaining_time": "11:06:19"}
4
+ {"current_steps": 40, "total_steps": 1266, "loss": 0.7354, "lr": 5e-06, "epoch": 0.09464655427388347, "percentage": 3.16, "elapsed_time": "0:21:30", "remaining_time": "10:59:22"}
5
+ {"current_steps": 50, "total_steps": 1266, "loss": 0.726, "lr": 5e-06, "epoch": 0.11830819284235433, "percentage": 3.95, "elapsed_time": "0:26:52", "remaining_time": "10:53:27"}
6
+ {"current_steps": 60, "total_steps": 1266, "loss": 0.7152, "lr": 5e-06, "epoch": 0.1419698314108252, "percentage": 4.74, "elapsed_time": "0:32:13", "remaining_time": "10:47:45"}
7
+ {"current_steps": 70, "total_steps": 1266, "loss": 0.7101, "lr": 5e-06, "epoch": 0.16563146997929606, "percentage": 5.53, "elapsed_time": "0:37:35", "remaining_time": "10:42:15"}
8
+ {"current_steps": 80, "total_steps": 1266, "loss": 0.7007, "lr": 5e-06, "epoch": 0.18929310854776693, "percentage": 6.32, "elapsed_time": "0:42:55", "remaining_time": "10:36:19"}
9
+ {"current_steps": 90, "total_steps": 1266, "loss": 0.6856, "lr": 5e-06, "epoch": 0.2129547471162378, "percentage": 7.11, "elapsed_time": "0:48:17", "remaining_time": "10:30:55"}
10
+ {"current_steps": 100, "total_steps": 1266, "loss": 0.685, "lr": 5e-06, "epoch": 0.23661638568470866, "percentage": 7.9, "elapsed_time": "0:53:35", "remaining_time": "10:24:58"}
11
+ {"current_steps": 110, "total_steps": 1266, "loss": 0.6898, "lr": 5e-06, "epoch": 0.26027802425317953, "percentage": 8.69, "elapsed_time": "0:58:57", "remaining_time": "10:19:32"}
12
+ {"current_steps": 120, "total_steps": 1266, "loss": 0.6772, "lr": 5e-06, "epoch": 0.2839396628216504, "percentage": 9.48, "elapsed_time": "1:04:19", "remaining_time": "10:14:16"}
13
+ {"current_steps": 130, "total_steps": 1266, "loss": 0.6733, "lr": 5e-06, "epoch": 0.30760130139012126, "percentage": 10.27, "elapsed_time": "1:09:41", "remaining_time": "10:08:56"}
14
+ {"current_steps": 140, "total_steps": 1266, "loss": 0.6819, "lr": 5e-06, "epoch": 0.33126293995859213, "percentage": 11.06, "elapsed_time": "1:15:03", "remaining_time": "10:03:43"}
15
+ {"current_steps": 150, "total_steps": 1266, "loss": 0.6648, "lr": 5e-06, "epoch": 0.354924578527063, "percentage": 11.85, "elapsed_time": "1:20:26", "remaining_time": "9:58:26"}
16
+ {"current_steps": 160, "total_steps": 1266, "loss": 0.6691, "lr": 5e-06, "epoch": 0.37858621709553386, "percentage": 12.64, "elapsed_time": "1:25:46", "remaining_time": "9:52:58"}
17
+ {"current_steps": 170, "total_steps": 1266, "loss": 0.667, "lr": 5e-06, "epoch": 0.4022478556640047, "percentage": 13.43, "elapsed_time": "1:31:05", "remaining_time": "9:47:19"}
18
+ {"current_steps": 180, "total_steps": 1266, "loss": 0.6739, "lr": 5e-06, "epoch": 0.4259094942324756, "percentage": 14.22, "elapsed_time": "1:36:24", "remaining_time": "9:41:38"}
19
+ {"current_steps": 190, "total_steps": 1266, "loss": 0.6698, "lr": 5e-06, "epoch": 0.44957113280094646, "percentage": 15.01, "elapsed_time": "1:41:45", "remaining_time": "9:36:15"}
20
+ {"current_steps": 200, "total_steps": 1266, "loss": 0.6688, "lr": 5e-06, "epoch": 0.4732327713694173, "percentage": 15.8, "elapsed_time": "1:47:07", "remaining_time": "9:30:59"}
21
+ {"current_steps": 210, "total_steps": 1266, "loss": 0.6697, "lr": 5e-06, "epoch": 0.4968944099378882, "percentage": 16.59, "elapsed_time": "1:52:30", "remaining_time": "9:25:43"}
22
+ {"current_steps": 220, "total_steps": 1266, "loss": 0.6681, "lr": 5e-06, "epoch": 0.5205560485063591, "percentage": 17.38, "elapsed_time": "1:57:53", "remaining_time": "9:20:29"}
23
+ {"current_steps": 230, "total_steps": 1266, "loss": 0.6603, "lr": 5e-06, "epoch": 0.54421768707483, "percentage": 18.17, "elapsed_time": "2:03:14", "remaining_time": "9:15:05"}
24
+ {"current_steps": 240, "total_steps": 1266, "loss": 0.6645, "lr": 5e-06, "epoch": 0.5678793256433008, "percentage": 18.96, "elapsed_time": "2:08:35", "remaining_time": "9:09:45"}
25
+ {"current_steps": 250, "total_steps": 1266, "loss": 0.6616, "lr": 5e-06, "epoch": 0.5915409642117717, "percentage": 19.75, "elapsed_time": "2:13:56", "remaining_time": "9:04:20"}
26
+ {"current_steps": 260, "total_steps": 1266, "loss": 0.652, "lr": 5e-06, "epoch": 0.6152026027802425, "percentage": 20.54, "elapsed_time": "2:19:19", "remaining_time": "8:59:03"}
27
+ {"current_steps": 270, "total_steps": 1266, "loss": 0.6641, "lr": 5e-06, "epoch": 0.6388642413487134, "percentage": 21.33, "elapsed_time": "2:24:40", "remaining_time": "8:53:41"}
28
+ {"current_steps": 280, "total_steps": 1266, "loss": 0.6652, "lr": 5e-06, "epoch": 0.6625258799171843, "percentage": 22.12, "elapsed_time": "2:30:01", "remaining_time": "8:48:18"}
29
+ {"current_steps": 290, "total_steps": 1266, "loss": 0.6597, "lr": 5e-06, "epoch": 0.6861875184856552, "percentage": 22.91, "elapsed_time": "2:35:23", "remaining_time": "8:42:57"}
30
+ {"current_steps": 300, "total_steps": 1266, "loss": 0.6535, "lr": 5e-06, "epoch": 0.709849157054126, "percentage": 23.7, "elapsed_time": "2:40:43", "remaining_time": "8:37:31"}
31
+ {"current_steps": 310, "total_steps": 1266, "loss": 0.6619, "lr": 5e-06, "epoch": 0.7335107956225969, "percentage": 24.49, "elapsed_time": "2:46:04", "remaining_time": "8:32:09"}
32
+ {"current_steps": 320, "total_steps": 1266, "loss": 0.6556, "lr": 5e-06, "epoch": 0.7571724341910677, "percentage": 25.28, "elapsed_time": "2:51:25", "remaining_time": "8:26:47"}
33
+ {"current_steps": 330, "total_steps": 1266, "loss": 0.6541, "lr": 5e-06, "epoch": 0.7808340727595386, "percentage": 26.07, "elapsed_time": "2:56:48", "remaining_time": "8:21:28"}
34
+ {"current_steps": 340, "total_steps": 1266, "loss": 0.6538, "lr": 5e-06, "epoch": 0.8044957113280095, "percentage": 26.86, "elapsed_time": "3:02:10", "remaining_time": "8:16:09"}
35
+ {"current_steps": 350, "total_steps": 1266, "loss": 0.6602, "lr": 5e-06, "epoch": 0.8281573498964804, "percentage": 27.65, "elapsed_time": "3:07:31", "remaining_time": "8:10:47"}
36
+ {"current_steps": 360, "total_steps": 1266, "loss": 0.6538, "lr": 5e-06, "epoch": 0.8518189884649512, "percentage": 28.44, "elapsed_time": "3:12:54", "remaining_time": "8:05:28"}
37
+ {"current_steps": 370, "total_steps": 1266, "loss": 0.6567, "lr": 5e-06, "epoch": 0.8754806270334221, "percentage": 29.23, "elapsed_time": "3:18:15", "remaining_time": "8:00:07"}
38
+ {"current_steps": 380, "total_steps": 1266, "loss": 0.6478, "lr": 5e-06, "epoch": 0.8991422656018929, "percentage": 30.02, "elapsed_time": "3:23:35", "remaining_time": "7:54:42"}
39
+ {"current_steps": 390, "total_steps": 1266, "loss": 0.6553, "lr": 5e-06, "epoch": 0.9228039041703638, "percentage": 30.81, "elapsed_time": "3:28:58", "remaining_time": "7:49:22"}
40
+ {"current_steps": 400, "total_steps": 1266, "loss": 0.6466, "lr": 5e-06, "epoch": 0.9464655427388347, "percentage": 31.6, "elapsed_time": "3:34:17", "remaining_time": "7:43:57"}
41
+ {"current_steps": 410, "total_steps": 1266, "loss": 0.6491, "lr": 5e-06, "epoch": 0.9701271813073056, "percentage": 32.39, "elapsed_time": "3:39:39", "remaining_time": "7:38:35"}
42
+ {"current_steps": 420, "total_steps": 1266, "loss": 0.648, "lr": 5e-06, "epoch": 0.9937888198757764, "percentage": 33.18, "elapsed_time": "3:44:59", "remaining_time": "7:33:11"}
43
+ {"current_steps": 422, "total_steps": 1266, "eval_loss": 0.6503860950469971, "epoch": 0.9985211475894705, "percentage": 33.33, "elapsed_time": "3:51:15", "remaining_time": "7:42:30"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c775a50bf6b8748280f7efe05f5faa861aca1f50449331e3aefcb5ed8e0c28af
3
  size 7160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40af703c5546f27a9d1135055abc9eaf5ab61d0f4764a6276dc4a132285c4a06
3
  size 7160