sedrickkeh
commited on
Training in progress, epoch 0
Browse files- config.json +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +43 -43
- training_args.bin +1 -1
config.json
CHANGED
@@ -30,7 +30,7 @@
|
|
30 |
"rope_theta": 500000.0,
|
31 |
"tie_word_embeddings": false,
|
32 |
"torch_dtype": "bfloat16",
|
33 |
-
"transformers_version": "4.
|
34 |
"use_cache": false,
|
35 |
"vocab_size": 128256
|
36 |
}
|
|
|
30 |
"rope_theta": 500000.0,
|
31 |
"tie_word_embeddings": false,
|
32 |
"torch_dtype": "bfloat16",
|
33 |
+
"transformers_version": "4.46.1",
|
34 |
"use_cache": false,
|
35 |
"vocab_size": 128256
|
36 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:066b3a7a5cf27a05c66469227322ea54d585bd7dd867095f6609c2314a9aed6c
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ae10e4075b7357ae976c42cb7a4c966165b2b685a6bc0b7c2b71d8f642ed7c9
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08c1823d209ec9ea7c7004f85d603ae41fbe9d65e557568f49837598e1543756
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3609e213adee70875805ab34294ced85b87f699c57bfaa35e2fd2ae7582553b
|
3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
@@ -1,43 +1,43 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 1266, "loss": 0.
|
2 |
-
{"current_steps": 20, "total_steps": 1266, "loss": 0.
|
3 |
-
{"current_steps": 30, "total_steps": 1266, "loss": 0.
|
4 |
-
{"current_steps": 40, "total_steps": 1266, "loss": 0.
|
5 |
-
{"current_steps": 50, "total_steps": 1266, "loss": 0.
|
6 |
-
{"current_steps": 60, "total_steps": 1266, "loss": 0.
|
7 |
-
{"current_steps": 70, "total_steps": 1266, "loss": 0.
|
8 |
-
{"current_steps": 80, "total_steps": 1266, "loss": 0.
|
9 |
-
{"current_steps": 90, "total_steps": 1266, "loss": 0.
|
10 |
-
{"current_steps": 100, "total_steps": 1266, "loss": 0.
|
11 |
-
{"current_steps": 110, "total_steps": 1266, "loss": 0.
|
12 |
-
{"current_steps": 120, "total_steps": 1266, "loss": 0.
|
13 |
-
{"current_steps": 130, "total_steps": 1266, "loss": 0.
|
14 |
-
{"current_steps": 140, "total_steps": 1266, "loss": 0.
|
15 |
-
{"current_steps": 150, "total_steps": 1266, "loss": 0.
|
16 |
-
{"current_steps": 160, "total_steps": 1266, "loss": 0.
|
17 |
-
{"current_steps": 170, "total_steps": 1266, "loss": 0.
|
18 |
-
{"current_steps": 180, "total_steps": 1266, "loss": 0.
|
19 |
-
{"current_steps": 190, "total_steps": 1266, "loss": 0.
|
20 |
-
{"current_steps": 200, "total_steps": 1266, "loss": 0.
|
21 |
-
{"current_steps": 210, "total_steps": 1266, "loss": 0.
|
22 |
-
{"current_steps": 220, "total_steps": 1266, "loss": 0.
|
23 |
-
{"current_steps": 230, "total_steps": 1266, "loss": 0.
|
24 |
-
{"current_steps": 240, "total_steps": 1266, "loss": 0.
|
25 |
-
{"current_steps": 250, "total_steps": 1266, "loss": 0.
|
26 |
-
{"current_steps": 260, "total_steps": 1266, "loss": 0.
|
27 |
-
{"current_steps": 270, "total_steps": 1266, "loss": 0.
|
28 |
-
{"current_steps": 280, "total_steps": 1266, "loss": 0.
|
29 |
-
{"current_steps": 290, "total_steps": 1266, "loss": 0.
|
30 |
-
{"current_steps": 300, "total_steps": 1266, "loss": 0.
|
31 |
-
{"current_steps": 310, "total_steps": 1266, "loss": 0.
|
32 |
-
{"current_steps": 320, "total_steps": 1266, "loss": 0.
|
33 |
-
{"current_steps": 330, "total_steps": 1266, "loss": 0.
|
34 |
-
{"current_steps": 340, "total_steps": 1266, "loss": 0.
|
35 |
-
{"current_steps": 350, "total_steps": 1266, "loss": 0.
|
36 |
-
{"current_steps": 360, "total_steps": 1266, "loss": 0.
|
37 |
-
{"current_steps": 370, "total_steps": 1266, "loss": 0.
|
38 |
-
{"current_steps": 380, "total_steps": 1266, "loss": 0.
|
39 |
-
{"current_steps": 390, "total_steps": 1266, "loss": 0.
|
40 |
-
{"current_steps": 400, "total_steps": 1266, "loss": 0.
|
41 |
-
{"current_steps": 410, "total_steps": 1266, "loss": 0.
|
42 |
-
{"current_steps": 420, "total_steps": 1266, "loss": 0.
|
43 |
-
{"current_steps": 422, "total_steps": 1266, "eval_loss": 0.
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 1266, "loss": 0.8897, "lr": 5e-06, "epoch": 0.023661638568470866, "percentage": 0.79, "elapsed_time": "0:05:26", "remaining_time": "11:24:07"}
|
2 |
+
{"current_steps": 20, "total_steps": 1266, "loss": 0.7877, "lr": 5e-06, "epoch": 0.04732327713694173, "percentage": 1.58, "elapsed_time": "0:10:48", "remaining_time": "11:13:38"}
|
3 |
+
{"current_steps": 30, "total_steps": 1266, "loss": 0.7533, "lr": 5e-06, "epoch": 0.0709849157054126, "percentage": 2.37, "elapsed_time": "0:16:10", "remaining_time": "11:06:19"}
|
4 |
+
{"current_steps": 40, "total_steps": 1266, "loss": 0.7354, "lr": 5e-06, "epoch": 0.09464655427388347, "percentage": 3.16, "elapsed_time": "0:21:30", "remaining_time": "10:59:22"}
|
5 |
+
{"current_steps": 50, "total_steps": 1266, "loss": 0.726, "lr": 5e-06, "epoch": 0.11830819284235433, "percentage": 3.95, "elapsed_time": "0:26:52", "remaining_time": "10:53:27"}
|
6 |
+
{"current_steps": 60, "total_steps": 1266, "loss": 0.7152, "lr": 5e-06, "epoch": 0.1419698314108252, "percentage": 4.74, "elapsed_time": "0:32:13", "remaining_time": "10:47:45"}
|
7 |
+
{"current_steps": 70, "total_steps": 1266, "loss": 0.7101, "lr": 5e-06, "epoch": 0.16563146997929606, "percentage": 5.53, "elapsed_time": "0:37:35", "remaining_time": "10:42:15"}
|
8 |
+
{"current_steps": 80, "total_steps": 1266, "loss": 0.7007, "lr": 5e-06, "epoch": 0.18929310854776693, "percentage": 6.32, "elapsed_time": "0:42:55", "remaining_time": "10:36:19"}
|
9 |
+
{"current_steps": 90, "total_steps": 1266, "loss": 0.6856, "lr": 5e-06, "epoch": 0.2129547471162378, "percentage": 7.11, "elapsed_time": "0:48:17", "remaining_time": "10:30:55"}
|
10 |
+
{"current_steps": 100, "total_steps": 1266, "loss": 0.685, "lr": 5e-06, "epoch": 0.23661638568470866, "percentage": 7.9, "elapsed_time": "0:53:35", "remaining_time": "10:24:58"}
|
11 |
+
{"current_steps": 110, "total_steps": 1266, "loss": 0.6898, "lr": 5e-06, "epoch": 0.26027802425317953, "percentage": 8.69, "elapsed_time": "0:58:57", "remaining_time": "10:19:32"}
|
12 |
+
{"current_steps": 120, "total_steps": 1266, "loss": 0.6772, "lr": 5e-06, "epoch": 0.2839396628216504, "percentage": 9.48, "elapsed_time": "1:04:19", "remaining_time": "10:14:16"}
|
13 |
+
{"current_steps": 130, "total_steps": 1266, "loss": 0.6733, "lr": 5e-06, "epoch": 0.30760130139012126, "percentage": 10.27, "elapsed_time": "1:09:41", "remaining_time": "10:08:56"}
|
14 |
+
{"current_steps": 140, "total_steps": 1266, "loss": 0.6819, "lr": 5e-06, "epoch": 0.33126293995859213, "percentage": 11.06, "elapsed_time": "1:15:03", "remaining_time": "10:03:43"}
|
15 |
+
{"current_steps": 150, "total_steps": 1266, "loss": 0.6648, "lr": 5e-06, "epoch": 0.354924578527063, "percentage": 11.85, "elapsed_time": "1:20:26", "remaining_time": "9:58:26"}
|
16 |
+
{"current_steps": 160, "total_steps": 1266, "loss": 0.6691, "lr": 5e-06, "epoch": 0.37858621709553386, "percentage": 12.64, "elapsed_time": "1:25:46", "remaining_time": "9:52:58"}
|
17 |
+
{"current_steps": 170, "total_steps": 1266, "loss": 0.667, "lr": 5e-06, "epoch": 0.4022478556640047, "percentage": 13.43, "elapsed_time": "1:31:05", "remaining_time": "9:47:19"}
|
18 |
+
{"current_steps": 180, "total_steps": 1266, "loss": 0.6739, "lr": 5e-06, "epoch": 0.4259094942324756, "percentage": 14.22, "elapsed_time": "1:36:24", "remaining_time": "9:41:38"}
|
19 |
+
{"current_steps": 190, "total_steps": 1266, "loss": 0.6698, "lr": 5e-06, "epoch": 0.44957113280094646, "percentage": 15.01, "elapsed_time": "1:41:45", "remaining_time": "9:36:15"}
|
20 |
+
{"current_steps": 200, "total_steps": 1266, "loss": 0.6688, "lr": 5e-06, "epoch": 0.4732327713694173, "percentage": 15.8, "elapsed_time": "1:47:07", "remaining_time": "9:30:59"}
|
21 |
+
{"current_steps": 210, "total_steps": 1266, "loss": 0.6697, "lr": 5e-06, "epoch": 0.4968944099378882, "percentage": 16.59, "elapsed_time": "1:52:30", "remaining_time": "9:25:43"}
|
22 |
+
{"current_steps": 220, "total_steps": 1266, "loss": 0.6681, "lr": 5e-06, "epoch": 0.5205560485063591, "percentage": 17.38, "elapsed_time": "1:57:53", "remaining_time": "9:20:29"}
|
23 |
+
{"current_steps": 230, "total_steps": 1266, "loss": 0.6603, "lr": 5e-06, "epoch": 0.54421768707483, "percentage": 18.17, "elapsed_time": "2:03:14", "remaining_time": "9:15:05"}
|
24 |
+
{"current_steps": 240, "total_steps": 1266, "loss": 0.6645, "lr": 5e-06, "epoch": 0.5678793256433008, "percentage": 18.96, "elapsed_time": "2:08:35", "remaining_time": "9:09:45"}
|
25 |
+
{"current_steps": 250, "total_steps": 1266, "loss": 0.6616, "lr": 5e-06, "epoch": 0.5915409642117717, "percentage": 19.75, "elapsed_time": "2:13:56", "remaining_time": "9:04:20"}
|
26 |
+
{"current_steps": 260, "total_steps": 1266, "loss": 0.652, "lr": 5e-06, "epoch": 0.6152026027802425, "percentage": 20.54, "elapsed_time": "2:19:19", "remaining_time": "8:59:03"}
|
27 |
+
{"current_steps": 270, "total_steps": 1266, "loss": 0.6641, "lr": 5e-06, "epoch": 0.6388642413487134, "percentage": 21.33, "elapsed_time": "2:24:40", "remaining_time": "8:53:41"}
|
28 |
+
{"current_steps": 280, "total_steps": 1266, "loss": 0.6652, "lr": 5e-06, "epoch": 0.6625258799171843, "percentage": 22.12, "elapsed_time": "2:30:01", "remaining_time": "8:48:18"}
|
29 |
+
{"current_steps": 290, "total_steps": 1266, "loss": 0.6597, "lr": 5e-06, "epoch": 0.6861875184856552, "percentage": 22.91, "elapsed_time": "2:35:23", "remaining_time": "8:42:57"}
|
30 |
+
{"current_steps": 300, "total_steps": 1266, "loss": 0.6535, "lr": 5e-06, "epoch": 0.709849157054126, "percentage": 23.7, "elapsed_time": "2:40:43", "remaining_time": "8:37:31"}
|
31 |
+
{"current_steps": 310, "total_steps": 1266, "loss": 0.6619, "lr": 5e-06, "epoch": 0.7335107956225969, "percentage": 24.49, "elapsed_time": "2:46:04", "remaining_time": "8:32:09"}
|
32 |
+
{"current_steps": 320, "total_steps": 1266, "loss": 0.6556, "lr": 5e-06, "epoch": 0.7571724341910677, "percentage": 25.28, "elapsed_time": "2:51:25", "remaining_time": "8:26:47"}
|
33 |
+
{"current_steps": 330, "total_steps": 1266, "loss": 0.6541, "lr": 5e-06, "epoch": 0.7808340727595386, "percentage": 26.07, "elapsed_time": "2:56:48", "remaining_time": "8:21:28"}
|
34 |
+
{"current_steps": 340, "total_steps": 1266, "loss": 0.6538, "lr": 5e-06, "epoch": 0.8044957113280095, "percentage": 26.86, "elapsed_time": "3:02:10", "remaining_time": "8:16:09"}
|
35 |
+
{"current_steps": 350, "total_steps": 1266, "loss": 0.6602, "lr": 5e-06, "epoch": 0.8281573498964804, "percentage": 27.65, "elapsed_time": "3:07:31", "remaining_time": "8:10:47"}
|
36 |
+
{"current_steps": 360, "total_steps": 1266, "loss": 0.6538, "lr": 5e-06, "epoch": 0.8518189884649512, "percentage": 28.44, "elapsed_time": "3:12:54", "remaining_time": "8:05:28"}
|
37 |
+
{"current_steps": 370, "total_steps": 1266, "loss": 0.6567, "lr": 5e-06, "epoch": 0.8754806270334221, "percentage": 29.23, "elapsed_time": "3:18:15", "remaining_time": "8:00:07"}
|
38 |
+
{"current_steps": 380, "total_steps": 1266, "loss": 0.6478, "lr": 5e-06, "epoch": 0.8991422656018929, "percentage": 30.02, "elapsed_time": "3:23:35", "remaining_time": "7:54:42"}
|
39 |
+
{"current_steps": 390, "total_steps": 1266, "loss": 0.6553, "lr": 5e-06, "epoch": 0.9228039041703638, "percentage": 30.81, "elapsed_time": "3:28:58", "remaining_time": "7:49:22"}
|
40 |
+
{"current_steps": 400, "total_steps": 1266, "loss": 0.6466, "lr": 5e-06, "epoch": 0.9464655427388347, "percentage": 31.6, "elapsed_time": "3:34:17", "remaining_time": "7:43:57"}
|
41 |
+
{"current_steps": 410, "total_steps": 1266, "loss": 0.6491, "lr": 5e-06, "epoch": 0.9701271813073056, "percentage": 32.39, "elapsed_time": "3:39:39", "remaining_time": "7:38:35"}
|
42 |
+
{"current_steps": 420, "total_steps": 1266, "loss": 0.648, "lr": 5e-06, "epoch": 0.9937888198757764, "percentage": 33.18, "elapsed_time": "3:44:59", "remaining_time": "7:33:11"}
|
43 |
+
{"current_steps": 422, "total_steps": 1266, "eval_loss": 0.6503860950469971, "epoch": 0.9985211475894705, "percentage": 33.33, "elapsed_time": "3:51:15", "remaining_time": "7:42:30"}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7160
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40af703c5546f27a9d1135055abc9eaf5ab61d0f4764a6276dc4a132285c4a06
|
3 |
size 7160
|