TroyDoesAI
commited on
Some very light instruct tuning for context obedience but mostly pretraining data
Browse files- The model is showing signs of really understanding what the user is saying deeply, lets see if it continues to improve as I grokk this pretraining data to convergence for the next 2 weeks lol!
- config.json +1 -1
- model-00001-of-00005.safetensors +1 -1
- model-00002-of-00005.safetensors +1 -1
- model-00003-of-00005.safetensors +1 -1
- model-00004-of-00005.safetensors +1 -1
- model-00005-of-00005.safetensors +1 -1
- traininglog.txt +109 -0
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": ".\\BlackSheep
|
3 |
"architectures": [
|
4 |
"MixtralForCausalLM"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": ".\\BlackSheep",
|
3 |
"architectures": [
|
4 |
"MixtralForCausalLM"
|
5 |
],
|
model-00001-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4991365712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9feb3cb1f21af9f9f99a7a9cdbc743038df76a41a44e576f5f6f33b602b27484
|
3 |
size 4991365712
|
model-00002-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4995716272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d10dd440267b2650ca251d69b08f08078e229d678ec5ff8e73c4b1f594fd725
|
3 |
size 4995716272
|
model-00003-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957942672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3e7aa064caf67c5c6955c3577ca756605dd1f3cb82f77bebbf183e65ac6d5c8
|
3 |
size 4957942672
|
model-00004-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4995704152
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:865cd498e87b6758323f9f1fc7fcde07563eeaa09f6f5acd0bcf8ba3326d15b1
|
3 |
size 4995704152
|
model-00005-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2197808096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfe1e19cfe278adf34eff4c60d891f1954bcd23bb68a85a794ade83e11fb0cf7
|
3 |
size 2197808096
|
traininglog.txt
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Step: 4 {'loss': 0.6225, 'grad_norm': 0.5366921424865723, 'learning_rate': 3.9999553115423906e-05, 'epoch': 0.000268802752540186}
|
2 |
+
Step: 9 {'loss': 0.5018, 'grad_norm': 0.2231733202934265, 'learning_rate': 3.99991062308478e-05, 'epoch': 0.000537605505080372}
|
3 |
+
Step: 14 {'loss': 0.6071, 'grad_norm': 0.6181017756462097, 'learning_rate': 3.999865934627171e-05, 'epoch': 0.000806408257620558}
|
4 |
+
Step: 19 {'loss': 0.3221, 'grad_norm': 0.2883134186267853, 'learning_rate': 3.9998212461695615e-05, 'epoch': 0.001075211010160744}
|
5 |
+
Step: 24 {'loss': 0.6598, 'grad_norm': 0.6283852458000183, 'learning_rate': 3.999776557711951e-05, 'epoch': 0.00134401376270093}
|
6 |
+
Step: 29 {'loss': 0.3794, 'grad_norm': 0.7882901430130005, 'learning_rate': 3.9997318692543414e-05, 'epoch': 0.001612816515241116}
|
7 |
+
Step: 34 {'loss': 0.4322, 'grad_norm': 0.29714861512184143, 'learning_rate': 3.999687180796732e-05, 'epoch': 0.001881619267781302}
|
8 |
+
Step: 39 {'loss': 0.6519, 'grad_norm': 0.8262774348258972, 'learning_rate': 3.999642492339122e-05, 'epoch': 0.002150422020321488}
|
9 |
+
Step: 44 {'loss': 0.5566, 'grad_norm': 0.18315744400024414, 'learning_rate': 3.999597803881512e-05, 'epoch': 0.002419224772861674}
|
10 |
+
Step: 49 {'loss': 0.5166, 'grad_norm': 0.2871042788028717, 'learning_rate': 3.9995531154239026e-05, 'epoch': 0.00268802752540186}
|
11 |
+
Step: 54 {'loss': 0.4397, 'grad_norm': 0.3087766468524933, 'learning_rate': 3.999508426966292e-05, 'epoch': 0.0029568302779420462}
|
12 |
+
Step: 59 {'loss': 0.5999, 'grad_norm': 0.3319031596183777, 'learning_rate': 3.9994637385086825e-05, 'epoch': 0.003225633030482232}
|
13 |
+
Step: 64 {'loss': 0.4279, 'grad_norm': 0.152162566781044, 'learning_rate': 3.999419050051073e-05, 'epoch': 0.0034944357830224183}
|
14 |
+
Step: 69 {'loss': 0.4612, 'grad_norm': 0.4317966103553772, 'learning_rate': 3.999374361593463e-05, 'epoch': 0.003763238535562604}
|
15 |
+
Step: 74 {'loss': 0.702, 'grad_norm': 0.4360438287258148, 'learning_rate': 3.9993296731358534e-05, 'epoch': 0.00403204128810279}
|
16 |
+
Step: 79 {'loss': 0.3772, 'grad_norm': 0.2938617467880249, 'learning_rate': 3.999284984678243e-05, 'epoch': 0.004300844040642976}
|
17 |
+
Step: 84 {'loss': 0.5677, 'grad_norm': 0.41090044379234314, 'learning_rate': 3.9992402962206334e-05, 'epoch': 0.0045696467931831625}
|
18 |
+
Step: 89 {'loss': 0.2434, 'grad_norm': 0.2661038041114807, 'learning_rate': 3.9991956077630236e-05, 'epoch': 0.004838449545723348}
|
19 |
+
Step: 94 {'loss': 0.162, 'grad_norm': 0.31489041447639465, 'learning_rate': 3.999150919305414e-05, 'epoch': 0.005107252298263534}
|
20 |
+
Step: 99 {'loss': 0.323, 'grad_norm': 0.21471913158893585, 'learning_rate': 3.999106230847804e-05, 'epoch': 0.00537605505080372}
|
21 |
+
Step: 104 {'loss': 0.9207, 'grad_norm': 0.856233537197113, 'learning_rate': 3.9990615423901945e-05, 'epoch': 0.005644857803343907}
|
22 |
+
Step: 109 {'loss': 0.3481, 'grad_norm': 0.29812806844711304, 'learning_rate': 3.999016853932584e-05, 'epoch': 0.0059136605558840925}
|
23 |
+
Step: 114 {'loss': 0.6372, 'grad_norm': 0.4753149449825287, 'learning_rate': 3.9989721654749745e-05, 'epoch': 0.006182463308424278}
|
24 |
+
Step: 119 {'loss': 0.3034, 'grad_norm': 0.2815486490726471, 'learning_rate': 3.9989274770173654e-05, 'epoch': 0.006451266060964464}
|
25 |
+
Step: 124 {'loss': 0.5537, 'grad_norm': 0.601784884929657, 'learning_rate': 3.998882788559755e-05, 'epoch': 0.00672006881350465}
|
26 |
+
Step: 129 {'loss': 0.3721, 'grad_norm': 0.2789490520954132, 'learning_rate': 3.9988381001021454e-05, 'epoch': 0.006988871566044837}
|
27 |
+
Step: 134 {'loss': 0.5, 'grad_norm': 0.1338292360305786, 'learning_rate': 3.9987934116445356e-05, 'epoch': 0.0072576743185850225}
|
28 |
+
Step: 139 {'loss': 0.3479, 'grad_norm': 0.3812941312789917, 'learning_rate': 3.998748723186926e-05, 'epoch': 0.007526477071125208}
|
29 |
+
Step: 144 {'loss': 0.4069, 'grad_norm': 0.11460933834314346, 'learning_rate': 3.998704034729316e-05, 'epoch': 0.007795279823665394}
|
30 |
+
Step: 149 {'loss': 0.2768, 'grad_norm': 0.22373315691947937, 'learning_rate': 3.9986593462717065e-05, 'epoch': 0.00806408257620558}
|
31 |
+
Step: 154 {'loss': 0.3149, 'grad_norm': 0.2564243972301483, 'learning_rate': 3.998614657814096e-05, 'epoch': 0.008332885328745767}
|
32 |
+
Step: 159 {'loss': 0.5705, 'grad_norm': 0.29972872138023376, 'learning_rate': 3.9985699693564865e-05, 'epoch': 0.008601688081285952}
|
33 |
+
Step: 164 {'loss': 0.7367, 'grad_norm': 0.9685312509536743, 'learning_rate': 3.998525280898877e-05, 'epoch': 0.008870490833826138}
|
34 |
+
Step: 169 {'loss': 0.5446, 'grad_norm': 0.258772075176239, 'learning_rate': 3.998480592441267e-05, 'epoch': 0.009139293586366325}
|
35 |
+
Step: 174 {'loss': 0.3431, 'grad_norm': 0.7926813364028931, 'learning_rate': 3.9984359039836574e-05, 'epoch': 0.00940809633890651}
|
36 |
+
Step: 179 {'loss': 0.5838, 'grad_norm': 0.7501709461212158, 'learning_rate': 3.9983912155260476e-05, 'epoch': 0.009676899091446697}
|
37 |
+
Step: 184 {'loss': 0.2948, 'grad_norm': 0.19987852871418, 'learning_rate': 3.998346527068437e-05, 'epoch': 0.009945701843986882}
|
38 |
+
Step: 189 {'loss': 0.3696, 'grad_norm': 0.10254683345556259, 'learning_rate': 3.9983018386108276e-05, 'epoch': 0.010214504596527068}
|
39 |
+
Step: 194 {'loss': 0.7386, 'grad_norm': 0.42202311754226685, 'learning_rate': 3.998257150153218e-05, 'epoch': 0.010483307349067255}
|
40 |
+
Step: 199 {'loss': 0.4241, 'grad_norm': 0.2576633393764496, 'learning_rate': 3.998212461695608e-05, 'epoch': 0.01075211010160744}
|
41 |
+
Step: 204 {'loss': 0.5949, 'grad_norm': 0.437381386756897, 'learning_rate': 3.9981677732379985e-05, 'epoch': 0.011020912854147627}
|
42 |
+
Step: 209 {'loss': 0.3349, 'grad_norm': 0.20532897114753723, 'learning_rate': 1.9981278894742503e-05, 'epoch': 0.022578217395978928}
|
43 |
+
Step: 214 {'loss': 0.4569, 'grad_norm': 0.1352977603673935, 'learning_rate': 1.9980833154141132e-05, 'epoch': 0.023115794000645094}
|
44 |
+
Step: 219 {'loss': 0.5782, 'grad_norm': 0.12661641836166382, 'learning_rate': 1.9980387413539764e-05, 'epoch': 0.023653370605311256}
|
45 |
+
Step: 224 {'loss': 0.479, 'grad_norm': 0.22078381478786469, 'learning_rate': 1.9979941672938397e-05, 'epoch': 0.02419094720997742}
|
46 |
+
Step: 229 {'loss': 0.4898, 'grad_norm': 0.1355533003807068, 'learning_rate': 1.9979495932337026e-05, 'epoch': 0.024728523814643587}
|
47 |
+
Step: 234 {'loss': 0.7018, 'grad_norm': 0.16286955773830414, 'learning_rate': 1.997905019173566e-05, 'epoch': 0.025266100419309753}
|
48 |
+
Step: 239 {'loss': 0.4878, 'grad_norm': 0.2710496187210083, 'learning_rate': 1.9978604451134288e-05, 'epoch': 0.025803677023975916}
|
49 |
+
Step: 244 {'loss': 0.4079, 'grad_norm': 0.16539081931114197, 'learning_rate': 1.997815871053292e-05, 'epoch': 0.02634125362864208}
|
50 |
+
Step: 249 {'loss': 0.5864, 'grad_norm': 0.14566001296043396, 'learning_rate': 1.997771296993155e-05, 'epoch': 0.026878830233308247}
|
51 |
+
Step: 254 {'loss': 0.4005, 'grad_norm': 0.12252970039844513, 'learning_rate': 1.997726722933018e-05, 'epoch': 0.027416406837974413}
|
52 |
+
Step: 259 {'loss': 0.5404, 'grad_norm': 0.24383878707885742, 'learning_rate': 1.997682148872881e-05, 'epoch': 0.027953983442640575}
|
53 |
+
Step: 264 {'loss': 0.3172, 'grad_norm': 0.2093472182750702, 'learning_rate': 1.9976375748127443e-05, 'epoch': 0.02849156004730674}
|
54 |
+
Step: 269 {'loss': 0.3715, 'grad_norm': 0.19576534628868103, 'learning_rate': 1.9975930007526076e-05, 'epoch': 0.029029136651972907}
|
55 |
+
Step: 274 {'loss': 0.303, 'grad_norm': 0.22658678889274597, 'learning_rate': 1.9975484266924705e-05, 'epoch': 0.029566713256639073}
|
56 |
+
Step: 279 {'loss': 0.4399, 'grad_norm': 0.17113405466079712, 'learning_rate': 1.9975038526323334e-05, 'epoch': 0.030104289861305235}
|
57 |
+
Step: 284 {'loss': 0.6118, 'grad_norm': 0.3955773711204529, 'learning_rate': 1.9974592785721967e-05, 'epoch': 0.0306418664659714}
|
58 |
+
Step: 289 {'loss': 0.537, 'grad_norm': 0.18520204722881317, 'learning_rate': 1.99741470451206e-05, 'epoch': 0.031179443070637566}
|
59 |
+
Step: 294 {'loss': 0.5581, 'grad_norm': 0.31099817156791687, 'learning_rate': 1.9973701304519228e-05, 'epoch': 0.03171701967530373}
|
60 |
+
Step: 299 {'loss': 0.5116, 'grad_norm': 0.14776164293289185, 'learning_rate': 1.997325556391786e-05, 'epoch': 0.0322545962799699}
|
61 |
+
Step: 304 {'loss': 0.63, 'grad_norm': 0.23881855607032776, 'learning_rate': 1.9972809823316493e-05, 'epoch': 0.03279217288463606}
|
62 |
+
Step: 309 {'loss': 0.3402, 'grad_norm': 0.17691396176815033, 'learning_rate': 1.9972364082715122e-05, 'epoch': 0.03332974948930222}
|
63 |
+
Step: 314 {'loss': 0.5772, 'grad_norm': 0.14926782250404358, 'learning_rate': 1.997191834211375e-05, 'epoch': 0.03386732609396839}
|
64 |
+
Step: 319 {'loss': 0.4408, 'grad_norm': 0.16565901041030884, 'learning_rate': 1.9971472601512384e-05, 'epoch': 0.034404902698634554}
|
65 |
+
Step: 324 {'loss': 0.5823, 'grad_norm': 0.21418456733226776, 'learning_rate': 1.9971026860911013e-05, 'epoch': 0.03494247930330072}
|
66 |
+
Step: 329 {'loss': 0.48, 'grad_norm': 0.14608466625213623, 'learning_rate': 1.9970581120309646e-05, 'epoch': 0.035480055907966886}
|
67 |
+
Step: 334 {'loss': 0.4705, 'grad_norm': 0.26669827103614807, 'learning_rate': 1.9970135379708278e-05, 'epoch': 0.03601763251263305}
|
68 |
+
Step: 339 {'loss': 0.4378, 'grad_norm': 0.33481690287590027, 'learning_rate': 1.9969689639106907e-05, 'epoch': 0.03655520911729922}
|
69 |
+
Step: 344 {'loss': 0.6453, 'grad_norm': 0.2946885824203491, 'learning_rate': 1.996924389850554e-05, 'epoch': 0.03709278572196538}
|
70 |
+
Step: 349 {'loss': 0.5199, 'grad_norm': 0.4816878139972687, 'learning_rate': 1.996879815790417e-05, 'epoch': 0.03763036232663154}
|
71 |
+
Step: 354 {'loss': 0.5932, 'grad_norm': 0.11819624155759811, 'learning_rate': 1.99683524173028e-05, 'epoch': 0.03816793893129771}
|
72 |
+
Step: 359 {'loss': 0.6298, 'grad_norm': 0.460887610912323, 'learning_rate': 1.996790667670143e-05, 'epoch': 0.03870551553596387}
|
73 |
+
Step: 364 {'loss': 0.6517, 'grad_norm': 0.31503042578697205, 'learning_rate': 1.9967460936100063e-05, 'epoch': 0.03924309214063004}
|
74 |
+
Step: 369 {'loss': 0.5211, 'grad_norm': 0.1864064633846283, 'learning_rate': 1.9967015195498692e-05, 'epoch': 0.039780668745296205}
|
75 |
+
Step: 374 {'loss': 0.5993, 'grad_norm': 0.16560548543930054, 'learning_rate': 1.9966569454897325e-05, 'epoch': 0.04031824534996237}
|
76 |
+
Step: 379 {'loss': 0.6103, 'grad_norm': 0.2273532599210739, 'learning_rate': 1.9966123714295957e-05, 'epoch': 0.040855821954628536}
|
77 |
+
Step: 384 {'loss': 0.4817, 'grad_norm': 0.140832781791687, 'learning_rate': 1.9965677973694586e-05, 'epoch': 0.0413933985592947}
|
78 |
+
Step: 389 {'loss': 0.301, 'grad_norm': 0.10496355593204498, 'learning_rate': 1.9965232233093215e-05, 'epoch': 0.04193097516396086}
|
79 |
+
Step: 394 {'loss': 0.5082, 'grad_norm': 0.29614022374153137, 'learning_rate': 1.9964786492491848e-05, 'epoch': 0.04246855176862703}
|
80 |
+
Step: 399 {'loss': 0.4294, 'grad_norm': 0.5071314573287964, 'learning_rate': 1.996434075189048e-05, 'epoch': 0.04300612837329319}
|
81 |
+
Step: 404 {'loss': 0.4803, 'grad_norm': 0.2569158375263214, 'learning_rate': 1.996389501128911e-05, 'epoch': 0.04354370497795936}
|
82 |
+
Step: 409 {'loss': 0.5818, 'grad_norm': 0.17603649199008942, 'learning_rate': 1.9963449270687742e-05, 'epoch': 0.044081281582625524}
|
83 |
+
Step: 414 {'loss': 0.4718, 'grad_norm': 0.22577953338623047, 'learning_rate': 1.9963003530086375e-05, 'epoch': 0.044618858187291686}
|
84 |
+
Step: 419 {'loss': 0.4718, 'grad_norm': 0.16803650557994843, 'learning_rate': 1.9962557789485004e-05, 'epoch': 0.045156434791957856}
|
85 |
+
Step: 424 {'loss': 0.3851, 'grad_norm': 0.2118372768163681, 'learning_rate': 1.9962112048883633e-05, 'epoch': 0.04569401139662402}
|
86 |
+
Interrupted by user
|
87 |
+
Step: 425 {'train_runtime': 6098.8217, 'train_samples_per_second': 73.198, 'train_steps_per_second': 36.601, 'train_loss': 0.4727456678107311, 'epoch': 0.04580152671755725}
|
88 |
+
01:11:05-200655 INFO LoRA training run is completed and saved.
|
89 |
+
01:11:05-307213 INFO Training complete, saving
|
90 |
+
01:11:05-399211 INFO Training interrupted.
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
|