sedrickkeh
commited on
Training in progress, epoch 1
Browse files
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79e9598e4243c1154b7e75be9d37e5b16f080cb6b2564220d3b9be0f36d69e1f
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:850681d155ce0045135e89f391499a09d79eeae6b52f0c13bcbf004d901e6481
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ecb0869cc5a882945e90424fc8225a7be467a48e11471d5c7b23014a37b904a
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5db88af80c3df732f02175f3a7a428d0c3ec2270a35248e11a800aa0c800e08f
|
3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
@@ -43,3 +43,48 @@
|
|
43 |
{"current_steps": 430, "total_steps": 1314, "loss": 0.6355, "learning_rate": 4.1197132152696215e-06, "epoch": 0.98005698005698, "percentage": 32.72, "elapsed_time": "6:16:23", "remaining_time": "12:53:48"}
|
44 |
{"current_steps": 438, "total_steps": 1314, "eval_loss": 0.6260784268379211, "epoch": 0.9982905982905983, "percentage": 33.33, "elapsed_time": "6:31:13", "remaining_time": "13:02:26"}
|
45 |
{"current_steps": 440, "total_steps": 1314, "loss": 0.6486, "learning_rate": 4.074349032852293e-06, "epoch": 1.0034188034188034, "percentage": 33.49, "elapsed_time": "6:33:38", "remaining_time": "13:01:54"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
{"current_steps": 430, "total_steps": 1314, "loss": 0.6355, "learning_rate": 4.1197132152696215e-06, "epoch": 0.98005698005698, "percentage": 32.72, "elapsed_time": "6:16:23", "remaining_time": "12:53:48"}
|
44 |
{"current_steps": 438, "total_steps": 1314, "eval_loss": 0.6260784268379211, "epoch": 0.9982905982905983, "percentage": 33.33, "elapsed_time": "6:31:13", "remaining_time": "13:02:26"}
|
45 |
{"current_steps": 440, "total_steps": 1314, "loss": 0.6486, "learning_rate": 4.074349032852293e-06, "epoch": 1.0034188034188034, "percentage": 33.49, "elapsed_time": "6:33:38", "remaining_time": "13:01:54"}
|
46 |
+
{"current_steps": 450, "total_steps": 1314, "loss": 0.6053, "learning_rate": 4.028145680145101e-06, "epoch": 1.0262108262108263, "percentage": 34.25, "elapsed_time": "6:42:24", "remaining_time": "12:52:37"}
|
47 |
+
{"current_steps": 460, "total_steps": 1314, "loss": 0.5989, "learning_rate": 3.981132433781259e-06, "epoch": 1.049002849002849, "percentage": 35.01, "elapsed_time": "6:51:11", "remaining_time": "12:43:22"}
|
48 |
+
{"current_steps": 470, "total_steps": 1314, "loss": 0.5999, "learning_rate": 3.9333390835809745e-06, "epoch": 1.0717948717948718, "percentage": 35.77, "elapsed_time": "6:59:57", "remaining_time": "12:34:08"}
|
49 |
+
{"current_steps": 480, "total_steps": 1314, "loss": 0.6073, "learning_rate": 3.8847959136752025e-06, "epoch": 1.0945868945868946, "percentage": 36.53, "elapsed_time": "7:08:42", "remaining_time": "12:24:53"}
|
50 |
+
{"current_steps": 490, "total_steps": 1314, "loss": 0.6123, "learning_rate": 3.835533683316186e-06, "epoch": 1.1173789173789175, "percentage": 37.29, "elapsed_time": "7:17:28", "remaining_time": "12:15:40"}
|
51 |
+
{"current_steps": 500, "total_steps": 1314, "loss": 0.6022, "learning_rate": 3.7855836073869388e-06, "epoch": 1.1401709401709401, "percentage": 38.05, "elapsed_time": "7:26:13", "remaining_time": "12:06:28"}
|
52 |
+
{"current_steps": 510, "total_steps": 1314, "loss": 0.6, "learning_rate": 3.734977336622016e-06, "epoch": 1.162962962962963, "percentage": 38.81, "elapsed_time": "7:34:58", "remaining_time": "11:57:15"}
|
53 |
+
{"current_steps": 520, "total_steps": 1314, "loss": 0.6057, "learning_rate": 3.683746937552109e-06, "epoch": 1.1857549857549858, "percentage": 39.57, "elapsed_time": "7:43:43", "remaining_time": "11:48:03"}
|
54 |
+
{"current_steps": 530, "total_steps": 1314, "loss": 0.5986, "learning_rate": 3.631924872185169e-06, "epoch": 1.2085470085470085, "percentage": 40.33, "elapsed_time": "7:52:28", "remaining_time": "11:38:53"}
|
55 |
+
{"current_steps": 540, "total_steps": 1314, "loss": 0.6088, "learning_rate": 3.579543977436948e-06, "epoch": 1.2313390313390313, "percentage": 41.1, "elapsed_time": "8:01:13", "remaining_time": "11:29:45"}
|
56 |
+
{"current_steps": 550, "total_steps": 1314, "loss": 0.6052, "learning_rate": 3.5266374443239652e-06, "epoch": 1.2541310541310542, "percentage": 41.86, "elapsed_time": "8:09:59", "remaining_time": "11:20:38"}
|
57 |
+
{"current_steps": 560, "total_steps": 1314, "loss": 0.6068, "learning_rate": 3.473238796932114e-06, "epoch": 1.2769230769230768, "percentage": 42.62, "elapsed_time": "8:18:43", "remaining_time": "11:11:30"}
|
58 |
+
{"current_steps": 570, "total_steps": 1314, "loss": 0.6007, "learning_rate": 3.419381871174207e-06, "epoch": 1.2997150997150997, "percentage": 43.38, "elapsed_time": "8:27:29", "remaining_time": "11:02:25"}
|
59 |
+
{"current_steps": 580, "total_steps": 1314, "loss": 0.5991, "learning_rate": 3.365100793349943e-06, "epoch": 1.3225071225071225, "percentage": 44.14, "elapsed_time": "8:36:15", "remaining_time": "10:53:19"}
|
60 |
+
{"current_steps": 590, "total_steps": 1314, "loss": 0.5991, "learning_rate": 3.310429958521862e-06, "epoch": 1.3452991452991454, "percentage": 44.9, "elapsed_time": "8:45:00", "remaining_time": "10:44:15"}
|
61 |
+
{"current_steps": 600, "total_steps": 1314, "loss": 0.6045, "learning_rate": 3.255404008721009e-06, "epoch": 1.368091168091168, "percentage": 45.66, "elapsed_time": "8:53:47", "remaining_time": "10:35:12"}
|
62 |
+
{"current_steps": 610, "total_steps": 1314, "loss": 0.6046, "learning_rate": 3.2000578109961007e-06, "epoch": 1.390883190883191, "percentage": 46.42, "elapsed_time": "9:02:34", "remaining_time": "10:26:11"}
|
63 |
+
{"current_steps": 620, "total_steps": 1314, "loss": 0.6072, "learning_rate": 3.1444264353201075e-06, "epoch": 1.4136752136752135, "percentage": 47.18, "elapsed_time": "9:11:20", "remaining_time": "10:17:08"}
|
64 |
+
{"current_steps": 630, "total_steps": 1314, "loss": 0.605, "learning_rate": 3.0885451323682624e-06, "epoch": 1.4364672364672364, "percentage": 47.95, "elapsed_time": "9:20:06", "remaining_time": "10:08:07"}
|
65 |
+
{"current_steps": 640, "total_steps": 1314, "loss": 0.5998, "learning_rate": 3.0324493111815605e-06, "epoch": 1.4592592592592593, "percentage": 48.71, "elapsed_time": "9:28:52", "remaining_time": "9:59:05"}
|
66 |
+
{"current_steps": 650, "total_steps": 1314, "loss": 0.5976, "learning_rate": 2.976174516729914e-06, "epoch": 1.4820512820512821, "percentage": 49.47, "elapsed_time": "9:37:38", "remaining_time": "9:50:04"}
|
67 |
+
{"current_steps": 660, "total_steps": 1314, "loss": 0.5975, "learning_rate": 2.919756407389174e-06, "epoch": 1.504843304843305, "percentage": 50.23, "elapsed_time": "9:46:22", "remaining_time": "9:41:03"}
|
68 |
+
{"current_steps": 670, "total_steps": 1314, "loss": 0.605, "learning_rate": 2.8632307323462955e-06, "epoch": 1.5276353276353276, "percentage": 50.99, "elapsed_time": "9:55:08", "remaining_time": "9:32:02"}
|
69 |
+
{"current_steps": 680, "total_steps": 1314, "loss": 0.5936, "learning_rate": 2.8066333089469583e-06, "epoch": 1.5504273504273505, "percentage": 51.75, "elapsed_time": "10:03:54", "remaining_time": "9:23:03"}
|
70 |
+
{"current_steps": 690, "total_steps": 1314, "loss": 0.5975, "learning_rate": 2.7500000000000004e-06, "epoch": 1.573219373219373, "percentage": 52.51, "elapsed_time": "10:12:39", "remaining_time": "9:14:03"}
|
71 |
+
{"current_steps": 700, "total_steps": 1314, "loss": 0.6131, "learning_rate": 2.693366691053043e-06, "epoch": 1.596011396011396, "percentage": 53.27, "elapsed_time": "10:21:25", "remaining_time": "9:05:04"}
|
72 |
+
{"current_steps": 710, "total_steps": 1314, "loss": 0.6001, "learning_rate": 2.6367692676537048e-06, "epoch": 1.6188034188034188, "percentage": 54.03, "elapsed_time": "10:30:11", "remaining_time": "8:56:06"}
|
73 |
+
{"current_steps": 720, "total_steps": 1314, "loss": 0.6046, "learning_rate": 2.5802435926108264e-06, "epoch": 1.6415954415954417, "percentage": 54.79, "elapsed_time": "10:38:57", "remaining_time": "8:47:08"}
|
74 |
+
{"current_steps": 730, "total_steps": 1314, "loss": 0.5976, "learning_rate": 2.523825483270087e-06, "epoch": 1.6643874643874645, "percentage": 55.56, "elapsed_time": "10:47:43", "remaining_time": "8:38:10"}
|
75 |
+
{"current_steps": 740, "total_steps": 1314, "loss": 0.5986, "learning_rate": 2.4675506888184407e-06, "epoch": 1.6871794871794872, "percentage": 56.32, "elapsed_time": "10:56:28", "remaining_time": "8:29:13"}
|
76 |
+
{"current_steps": 750, "total_steps": 1314, "loss": 0.5959, "learning_rate": 2.4114548676317383e-06, "epoch": 1.7099715099715098, "percentage": 57.08, "elapsed_time": "11:05:14", "remaining_time": "8:20:16"}
|
77 |
+
{"current_steps": 760, "total_steps": 1314, "loss": 0.6001, "learning_rate": 2.355573564679893e-06, "epoch": 1.7327635327635327, "percentage": 57.84, "elapsed_time": "11:13:59", "remaining_time": "8:11:18"}
|
78 |
+
{"current_steps": 770, "total_steps": 1314, "loss": 0.5966, "learning_rate": 2.2999421890039004e-06, "epoch": 1.7555555555555555, "percentage": 58.6, "elapsed_time": "11:22:44", "remaining_time": "8:02:21"}
|
79 |
+
{"current_steps": 780, "total_steps": 1314, "loss": 0.6036, "learning_rate": 2.244595991278992e-06, "epoch": 1.7783475783475784, "percentage": 59.36, "elapsed_time": "11:31:30", "remaining_time": "7:53:24"}
|
80 |
+
{"current_steps": 790, "total_steps": 1314, "loss": 0.6011, "learning_rate": 2.189570041478139e-06, "epoch": 1.8011396011396013, "percentage": 60.12, "elapsed_time": "11:40:16", "remaining_time": "7:44:29"}
|
81 |
+
{"current_steps": 800, "total_steps": 1314, "loss": 0.588, "learning_rate": 2.134899206650058e-06, "epoch": 1.823931623931624, "percentage": 60.88, "elapsed_time": "11:49:02", "remaining_time": "7:35:33"}
|
82 |
+
{"current_steps": 810, "total_steps": 1314, "loss": 0.5958, "learning_rate": 2.0806181288257937e-06, "epoch": 1.8467236467236468, "percentage": 61.64, "elapsed_time": "11:57:48", "remaining_time": "7:26:38"}
|
83 |
+
{"current_steps": 820, "total_steps": 1314, "loss": 0.5936, "learning_rate": 2.026761203067887e-06, "epoch": 1.8695156695156694, "percentage": 62.4, "elapsed_time": "12:06:34", "remaining_time": "7:17:42"}
|
84 |
+
{"current_steps": 830, "total_steps": 1314, "loss": 0.5973, "learning_rate": 1.973362555676035e-06, "epoch": 1.8923076923076922, "percentage": 63.17, "elapsed_time": "12:15:19", "remaining_time": "7:08:47"}
|
85 |
+
{"current_steps": 840, "total_steps": 1314, "loss": 0.602, "learning_rate": 1.920456022563053e-06, "epoch": 1.915099715099715, "percentage": 63.93, "elapsed_time": "12:24:03", "remaining_time": "6:59:51"}
|
86 |
+
{"current_steps": 850, "total_steps": 1314, "loss": 0.5999, "learning_rate": 1.8680751278148315e-06, "epoch": 1.937891737891738, "percentage": 64.69, "elapsed_time": "12:32:49", "remaining_time": "6:50:57"}
|
87 |
+
{"current_steps": 860, "total_steps": 1314, "loss": 0.6011, "learning_rate": 1.8162530624478918e-06, "epoch": 1.9606837606837608, "percentage": 65.45, "elapsed_time": "12:41:35", "remaining_time": "6:42:02"}
|
88 |
+
{"current_steps": 870, "total_steps": 1314, "loss": 0.5987, "learning_rate": 1.7650226633779838e-06, "epoch": 1.9834757834757835, "percentage": 66.21, "elapsed_time": "12:50:20", "remaining_time": "6:33:08"}
|
89 |
+
{"current_steps": 877, "total_steps": 1314, "eval_loss": 0.6201021671295166, "epoch": 1.9994301994301993, "percentage": 66.74, "elapsed_time": "13:03:48", "remaining_time": "6:30:33"}
|
90 |
+
{"current_steps": 880, "total_steps": 1314, "loss": 0.6215, "learning_rate": 1.7144163926130621e-06, "epoch": 2.006837606837607, "percentage": 66.97, "elapsed_time": "13:07:32", "remaining_time": "6:28:23"}
|