diff --git a/Hunyuan-A52B-Instruct-FP8/model-00001-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00001-of-00080.safetensors index 128f55efda9e512b75bb63e0ff39e83fbcf8ac97..0440c036ee9c5910de9975c82f6a576dfd336a73 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00001-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00001-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f128ac795246b3112495e297e598e472711636189ad4fb0e259e58df0ac9dcc +oid sha256:521c6c7aabefd888aeb2937ed6eecb1d9d7dd30b866e0e6eb7beeed2dbaef264 size 4904971448 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00002-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00002-of-00080.safetensors index 58e18e296731946dd81012055474f2aedab6c9e3..05878453a837a0b968221f8111bd64e691611845 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00002-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00002-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:093e6582fdffce94bd452e994b69d26a1d11d8ffed4b3aca43746af519a6ef91 +oid sha256:c45b39e3fd58782d342c076a607eb493799bc22a503766ef645588f822aa51dc size 4885340536 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00003-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00003-of-00080.safetensors index dfedf72e6c859196839a71047cb460ba4d7295c7..57182d16198c38dd4a30db8e0a88768031393c9f 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00003-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00003-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dfd96542c9649179889fe586b4f24859096413f9ccd67d7cbb599efe8a3aef9e +oid sha256:5050ea793ed99055b7ab92c6ea3a0c47e123d14f858db034f84a8a66b9cd3b1e size 4893533216 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00004-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00004-of-00080.safetensors index 4f7db89f83c8e9e4a60321236be4723fc01a5fe4..39919b4c907a1c283d0c93b8496022f8ffb01fde 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00004-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00004-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:662baedcb8b428ab758495a5a90be2ef9fbcb7ac98d04bd78e179dba1fd28a5a +oid sha256:0119bcf5d96466178f6ed5cb08af964b8acfe8268cc7f4a86584b376674f1a27 size 4920129704 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00005-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00005-of-00080.safetensors index 6c54eb857e3083d86a54ba1849bbb07cd3ad3f33..6e986fee7bae966323e9e374f13e7ed66f6aca8a 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00005-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00005-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91c08a4ae7c4e8d44a60f2e253e334e84561f550d12fc86b5805844d2c4cbb21 +oid sha256:c9dc50b08030c0bcb750505d63ee7fc74003d07c297f95d8a4bad047b8148809 size 4885340512 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00006-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00006-of-00080.safetensors index 8ca823f317d7def7b02e29072a543dcaccd1a2f4..b3df079a51d1596cf30184b840136f037d97e523 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00006-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00006-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d15150b9454a9546c0ed92a754f3a8726af82c795b76926a77fb3eaf8871e480 +oid sha256:73be942dcde5dab69298870ee0f3c32f44d057f82315ebf1f8a1f22a02fb75a2 size 4893533192 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00007-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00007-of-00080.safetensors index 76a72711ced1fcbd605d9da1aecd58cafc9ba6da..e8c8db7c722876cf8d898c97b85b79603bfcb120 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00007-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00007-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1a9ca7c6be84ea60b9dafb665bfda35cfc9d065b6fc583ddd2b3c6b4c080951 +oid sha256:08532bee0ab217deca4c72e97e2f69cb2e030db30949619f4d1836db602cac1e size 4885340544 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00008-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00008-of-00080.safetensors index f61a55b0ff7aee889608abe069e24253813c1020..cc36227a997030ef1633c1bd5454ec1eacf9a34f 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00008-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00008-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21766deed6e70e5b1f6b58de4eaa4d70d94829334e90fc3834efefb79511a8f0 +oid sha256:6c994ce4b912108eaef3f236d5684f55ef70b8501c1449ca5b1e89f864c67cfb size 4893533208 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00009-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00009-of-00080.safetensors index 52c65fdd47e7d51325005c2d5b337886b9d01789..c170adec1e062e3de89b4b0cb03602048123503d 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00009-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00009-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06614372feab433764aed7cf660650e71dc519cef60f64f9bdbfe6c9b5c200b3 +oid sha256:ad0fbbc4e3f5d5c2beb01c28f010d50d0fdef0f1da31963ed6f8d043138b05a4 size 4961115848 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00010-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00010-of-00080.safetensors index 6b9a064e0949d19e7bd626d7a75d372af832cb9b..c8b674ae772c175eba832ce76f5a826279218ce0 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00010-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00010-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4457ef3b320059f215e981fd515d642eec32a8763ec4a7c89df53eecd9133f66 +oid sha256:129373a286a7753372a69432b582f751897bc3e8ae4ddb3aa7a6e96445299e01 size 4961500304 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00011-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00011-of-00080.safetensors index c433a387b53b4319c47f93ed79eefe44f10782ab..16fe21606be9d35321007db8d2cad01195aab7e8 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00011-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00011-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:63480f418f198fb93302bea5570a8a2abf629329dbeb6f36d77dd0d120007c9c +oid sha256:8c8a23805f98451641cb02b21065a46839d2d2f732bd735ef4f330bcd21bfceb size 4893533184 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00012-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00012-of-00080.safetensors index 1980bb811009084d36a73f1a242a5d2f18eec60e..95595b52e554a64a7fa2d85f26f3b7ce43cb47df 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00012-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00012-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5456b390570889577077b4311478baecfa94c15dc3fbdea48a334986c4477083 +oid sha256:fdc3b719e758b640ea7318623212b517fa5395b674a0113f08cb9e01b10b0380 size 4885340544 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00013-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00013-of-00080.safetensors index 75061d2c75d6c4cd492e7d84f336671777e70be1..510d1cdcda8d358d48cae956141b4e62cbefeea4 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00013-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00013-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07531b6c78253224421ad5a2984efb1e5ed5dc8afa2aa9b6ef5f0fb4774edc9b +oid sha256:813a495ed8115ca9e8b8151d1b3ab8ac32292250f26aa5e40ea7e539de857ebe size 4893533624 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00014-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00014-of-00080.safetensors index 3a88e4d932e79171681ca598052ade8dc91a39d2..6289f18bb0723042aab072ec87d72a5fb2b40712 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00014-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00014-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77818a52a915dff79d019a1956a3dbb4516885b2eec2a910b7b102d2f1c9452e +oid sha256:99db7c25f88a46043bffb7500c8379671596d8f5e9f3c4892a82918978d33c7d size 4884930896 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00015-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00015-of-00080.safetensors index b89770020ec1d027fc8c4527331b8cd8e3498bdb..308402d6415a446947d5ea7247ef21e723ee5622 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00015-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00015-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1a6b82f1305f38ad5c8cc7966600342bd342634d5f4122d0d1a945e320ce111 +oid sha256:5c653a924e70b1e5963985ba1ea73d4bff7bb450833c71b1eacf7f6712bd6747 size 4920539536 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00016-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00016-of-00080.safetensors index 933ca77999e497b671d92371927543c0e23d283f..0fe5a4dd4863c3ebc705e4abfad73c9cfd62a3b3 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00016-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00016-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:07956574fddeb21b905d274e7091f6b5cd69ae3407b3d197d1ffd5669eb6fad6 +oid sha256:0524647adf429e312bb6c58d6e7d6c41e95e3dd79dc1881805c1057cee87dbab size 4893533328 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00017-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00017-of-00080.safetensors index 378d0bc48b955a72d760c64dbb9845ec07e67304..f7722bf46558b4038064a181174983c6be3dc09e 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00017-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00017-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e2c344f82131cea595a8c94005a7e79af667b393709ed42a87ad6aabc7d6d4b +oid sha256:ef95353e285c4edcf2203da8cbc796a0c923cfcd28f7dec96f954188c92b12b3 size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00018-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00018-of-00080.safetensors index 3ec9afb88e57e7a92e27e86ae24897f786886e14..f382e2f8a28601ef1bd8c6d89d76be0038597745 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00018-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00018-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61ea4a8577feab0de9dc6e990b7d325273effc5d76b024135a685960959bf270 +oid sha256:de5ed4ac92b109df43bd21f6378eb5bd59321f3c1adac462ade563f3e2a02ad8 size 4893533352 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00019-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00019-of-00080.safetensors index 10d063cd05c4bfacf9daeca8cba652a90b0cfab6..31a4a5792f2c3975513d1d26213a6eca3205a2d5 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00019-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00019-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f187904c6f482d154ed4ca028af8885ed78744156245afdb42fa6424d51910e0 +oid sha256:39616a511b70623c9560e47af03093910e59106ef5140008967b33a72b1068e1 size 4885340680 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00020-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00020-of-00080.safetensors index c5b6945aa38a899d2428fcf854da5abde2131b04..713a76fd7ddc573efeee9c2f86775a56f51ae1e6 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00020-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00020-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a932c96c17c37463622f88e239a52d76d3e7cd8606d9f843d7921b3866756c22 +oid sha256:6e818dd5788a99b30250130b84df5dacf6f76968c767c16d239d3d9657db1fce size 4920129824 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00021-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00021-of-00080.safetensors index a3853ee46ce2fa753a18b31d2ed1fecea9ca65d7..59fdc2daf61f50500f65912630221dd6f36eebab 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00021-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00021-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f03c80dd2a592ab88f03c7460475071636c7fd47a94b76f77161d70581d50010 +oid sha256:39ca5671a15e7115a4942be29b5e7667e7cc49d699af27a707c1588866b0955d size 4893533320 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00022-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00022-of-00080.safetensors index aa10e8750bc260a9319f40154c75666d4796821e..a287a183e7f01fcb121d71b2f877931d5c43295c 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00022-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00022-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f3b6139c541f318b2f5e5a8a0d95382d1ab2d700cbe868c2045314c13f6b298 +oid sha256:8dc0573a9ca15b77d8cb12e814e124c019cc730f55234f1618e07dbe76d04503 size 4885340664 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00023-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00023-of-00080.safetensors index 2e9ea9355f7b1c90e80d9eb5e0f24c5274fe732f..ecb7dbe7a15a297e809b58305ba092cf6ef05b4e 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00023-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00023-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:406152b551648efff8e08e9ebfc1b98c3dd3c88fb882399379f8f1e751986dad +oid sha256:064a247e467d0dd62afa4d7eb83e2afc34b9035a220a004ba133e1f1b94be9ef size 4893533360 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00024-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00024-of-00080.safetensors index 1ab2dd57673c9127fc6fd028fada7624ba1842fc..342a9d0c20c42e7a9f8702a7dd3d2115fa8a2512 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00024-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00024-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab9fdb81550275baa4bdecbb04f6b635d69786d58b5b9e15fd11c8d0e45a9d1d +oid sha256:43b297381f0d2da5b6c310e85e040f0e71d5fbbba15e483583c1431b09489a7d size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00025-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00025-of-00080.safetensors index ab07bc6d406fb70646e6d6dd2e96b25def02664d..8e83d892905783b94ff4c755b2c9a2cc18b45370 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00025-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00025-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aeef0752daaa9dce0ab3a48483a14a8f4077e6403e49e578fbcbba3574113825 +oid sha256:93a7ec7dc233be3a4036ba66fea5e16171482bec58cb3d8c26af5277434e796b size 4920129824 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00026-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00026-of-00080.safetensors index eb128749755b9a2ff2fd66f8e57c0d09ddf8d7ee..07626feba10c535766706add9674233dc7544ec9 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00026-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00026-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef2f23bdeba79cc2644f080e2fdd3c4edfe8450fc5f91e573aa89da8a1b88f00 +oid sha256:ffa16013f5598d8ba7a3ba266b1297564d23946b7de6153b35c69bae4ff38aa8 size 4893533320 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00027-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00027-of-00080.safetensors index fb2134a4d30e1de8d4e966d92ad0d0ce815c9013..8d54be27c6223aa8806f8818877973ac16c0b18c 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00027-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00027-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2a55ac4e84aa2fa6d20740ecb3b4287f911d71373cdcf6c7a2403de62b4546f6 +oid sha256:e4e3798f3c6582ad814bfd75857142b0f9fab7ccfb384edce3ffad27af4380af size 4885340664 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00028-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00028-of-00080.safetensors index 34ada87deb7908d682b5c6e52a9b5a8a86f4c41e..5b286b840d030538438260bdec0816c848099733 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00028-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00028-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abe0d28e539c38dd463f4b24638d0171da36426bd9d4be1d761e004552fe636f +oid sha256:f36cdd485b20a1dc8ca35c08f7599991dafc7a10d2f940812686b9923976336a size 4893533352 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00029-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00029-of-00080.safetensors index 1e744483d7ca7d3bbfd7fff9d4d29534279ba967..02cd73740b45bddd01071dc3c74eb3ef5ffa7941 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00029-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00029-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0d2394809ac89de4c827f0bf04f71835a90c75e233ee440fe01e32f02691b059 +oid sha256:de881689df08c8523033478f1f986a7bb36a4a2cd05f4381bc2716716deacc65 size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00030-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00030-of-00080.safetensors index e2c7aaf0f7491cc556a2fddf7b8d21fa44cebe31..2ccdcd830d71e1c19e5e00948c82c8837dcfc842 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00030-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00030-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8add053886e678c67de09fc05502b38d528932642fd9daeef82b2caad301910a +oid sha256:0403b94b04886b25e7319322e5d1af7bcaaa569adc42e17e9e92e9fd9f8fdbb0 size 4920129832 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00031-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00031-of-00080.safetensors index 9eebf2679f9073074d38f6bcac0cc9c4021c5ddf..8e95f330f8cfa375a90d404d3f2672ab3c2bd26c 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00031-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00031-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a845c4b784fb0bedb0f83ae5eeadf76b52e26d35077420cde2ff0973968302f +oid sha256:a898f9606c9ec43f0350289de27508879ff80a4f2fc5a3646f9ba17171036f87 size 4893533328 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00032-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00032-of-00080.safetensors index f9479b887a15d11aee08c377105b1513e610f6c6..0259282929fd316d58c7838faaf4eed28fac51af 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00032-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00032-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd8bc1a97d29ca079e66f017c2fc24e12e76084ad3dcea7aacd61d37acb66e0a +oid sha256:2a7073e84e21fe08e1bfbb4ec23eaedc8a4f60a86e9634850b2f588e7d36104b size 4885340648 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00033-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00033-of-00080.safetensors index be0fbfd9d55eca7aaa2335e8098838937db72cf1..be7e4c23accd124cc8c1366dfc166b5d6120ca87 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00033-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00033-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:963d68433c399ccdbd5afe9bfe4a1b64bb81ebd330ceded6af6e086af88e6851 +oid sha256:c1e26619607cfb9ecf1e06451a5982a9993970d1dbd609174f973df8da72b579 size 4893533352 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00034-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00034-of-00080.safetensors index 5fcea7cd5f0dd6b8c5bdb104cc7fa45b8f8532c9..b96330b9d562b0fbc30251a79a6b948c42d59f29 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00034-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00034-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3a32ae571b4172f8b514a512acce823e15e346cf2c814e81bb299ef515645c71 +oid sha256:0e32759f62718534c688bda81ff0fc7b5299b768d18e0d20206a406c620c7675 size 4885340680 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00035-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00035-of-00080.safetensors index a0a4381fb8b784e70c2fbe57b3f388ee79349c46..76774038348c5b76b6c0adef8769000db14dc33b 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00035-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00035-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7fbd6a03dba49f253d0b15c1619740fcb02a31f072cfe0412addb74619da3ad1 +oid sha256:aca0713595056cb9cb392f663044428e49e0fc08030f096272bfe118ede676dd size 4893123544 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00036-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00036-of-00080.safetensors index 19edd440db995db59b9246b8ea5b09b97e621261..9bf85eced44ede03faba6124049788a6a537e677 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00036-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00036-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b5fc1715bf36b8eca1f4ee9e886af71c679093273bf2c632213eb3e8ccdcc851 +oid sha256:e615f52c0ef4e0dbaa6eae2c2f6a6a5efa8a6f838ec55cefbfd32030b5469f78 size 4920539560 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00037-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00037-of-00080.safetensors index 5a51a7a8c9bf73c70adbc3e302f588f2e8ea37a6..0695dc35f4be64371018f0a82cba92221672ce4e 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00037-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00037-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:231987dba4383a2c4905cb2959622ba32ee95af222a105f24690b9ad3a3c1387 +oid sha256:29f17d561a1fe91d57e6f55efef958f396a4ece55483da4a56273b3dd711e565 size 4885340648 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00038-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00038-of-00080.safetensors index bb4e7675e59de60409ab9020067e8ff8afe94b11..a95f26f5c6576ad02ea2b18fb10d7bafde32c47f 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00038-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00038-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e261ba033602b0c39b312d3c5a2ea3b5464884ff3111d0fbfce8698a04681ec +oid sha256:ceebb116b86be704243309cbb7a46732ebe3d3bf2906c57f125dd40eee0a3036 size 4893533360 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00039-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00039-of-00080.safetensors index b959a87dddb0f6b9f1cf93a479652dc7769e722c..88985d2fb142a80f8051585b0bd3a6ffa5bc3dc2 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00039-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00039-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3cbb5f39aa05ef2946bd31d955a512c32fda9362488f0076ca365788c528dec +oid sha256:9ebcaf4caf60115c0620aebe46c734e8d5c898dee88ac3165a0123624c12ad28 size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00040-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00040-of-00080.safetensors index 46cfc61bd3fa12ad6848f16033bac24b5f008d17..ac613e1b9805a9a2225bda35a9eb9f25ded7b02a 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00040-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00040-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c664f93eede6dbb90676f2f698044ffe073e2779a674789c11765730ac050fb4 +oid sha256:5a4207621c18e01bd8f9095bf99bf91c95854f2ee1a80e48bef255523c7545db size 4893123552 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00041-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00041-of-00080.safetensors index 8bbe43e7d4397e49e33ec80d8dd37e4e91fa7961..e7cc75cb7e11a8556270976cefbf34815b4620d2 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00041-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00041-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec672869960a25839d3ea1eebb5c461207a77bab7ced795e45d4bedd4eb223a3 +oid sha256:4af57b9adb295cd534724c62caec0faaebb4b989aaf5fbe2d9a59317929e499d size 4920539536 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00042-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00042-of-00080.safetensors index 5022f9d9b4a09633df3098935b24fc2cb51e6896..b3f6f26cdb657ec8b02c5579073e2b3ad5a19b82 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00042-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00042-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f3d6321192323b2561f41f4545e2ed3c72601eb46197fed605c6e22d8211c7a +oid sha256:8a2073a8eb463623997a5270daf62afcc5211c9c11e03f78d71561ebcc3efaef size 4885340648 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00043-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00043-of-00080.safetensors index 66601de67c098c7725a6fee1ef74d5816d057b7e..71bcf1508f45ad52b60f04f5f92e860c9a58121d 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00043-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00043-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5f9d9bcd38da37d15bd168b4dc7a1558127705f214d5c4e44bff68a626fa62c +oid sha256:a863947df273b5401820a4e12a459d81f84d8e6b90ad8d75a07193752ce6deac size 4893533344 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00044-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00044-of-00080.safetensors index 91d3f1cbfd840ebcf6535f68d8c1c91d6c6e807c..69195b20ce349d3be92fe212f1ad40cbd7a352bb 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00044-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00044-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:336270175a59084544198106b89684d232e98aa70b45315fbf2eb6dd04bf75a7 +oid sha256:ccfee0ba313831c35767333f7e014e5a16cee05b6b6b890704d217aaf2b9639f size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00045-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00045-of-00080.safetensors index 464003e8d249dfaeddffa61643187a981c52c351..6c80194d2f7689dc1b4ba4a0c5cc2236343b9999 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00045-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00045-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f29b7e6fbfbd04d366a299d48f135f9a9f8f69e4f9eef2be7e09cfa36d6bf134 +oid sha256:23b1f9b1a9aa89ec458467f659a2b498de272d3735e549998efc48975d2bcacb size 4893533360 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00046-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00046-of-00080.safetensors index e4cfca3dc13a654c79981536cbab5b21b351bb2d..da529921182e60623b4389b53edea3fbb7f3376e 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00046-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00046-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:743303bde1bad2b3e915ec7c88e78cca52028fc8a6705a4a1f6c361c166e6516 +oid sha256:f13b68ce5c5867a04a3b920d79ba63cf63c866de7b3455ab8568a6f0223a17dc size 4920129824 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00047-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00047-of-00080.safetensors index c8cb277e137346c31151842efd0b05efc6b31050..900240b887cb5cd3be2338f89cf5291930d3ba05 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00047-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00047-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e916464c1bea93850eeebc7d0f3bc444194fd529e63e38a89ec2586bff83bb40 +oid sha256:2c00e3dcea7480ed15018a177ba3abb222980503270057b5d341efab6691564b size 4885340640 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00048-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00048-of-00080.safetensors index 45b9c0ebbcc459ed8534883337cef21d0a39d00a..418c55a22ed75a39877782af0521385c03d2a724 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00048-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00048-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:31255b0b50c0f3f26abef3af775263f68c16a26f7f7e0c9c17b4a6fc230acb56 +oid sha256:f7045561a2d595056123fc18064da04abaaaef0fa0fa4af78cc8613d0da24be7 size 4893533344 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00049-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00049-of-00080.safetensors index c471e65a514dbf74f1eb2228489b9dfb28442e82..95fbacd1e73d7487fcdbba161c68e67e2e07aed4 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00049-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00049-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f49aa9a33d6711395b5491d899188be0dafcfb964469400dbc9a54850212335 +oid sha256:d1d570d112b8e9735d6c3285c96082931550256be8b012edc18d6ddeec393210 size 4885340680 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00050-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00050-of-00080.safetensors index d033d7aeb117ca33d9960326a39430475492c47d..911f358f114077044cfec460a4ea6de4f5b0b63e 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00050-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00050-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e076243c631957920f2f173ee9449c102155ab6d6151d3296c05c59427ecd364 +oid sha256:7be6d634773ac395af16a0ba7df59475518c2cebc0660c04ea9604471e6c2da0 size 4893533352 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00051-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00051-of-00080.safetensors index 8ecde42209326dbd9335e9c87a0cce12d6484a9f..22e6804522794c4946fd967b3e32597020cf7f6d 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00051-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00051-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aff3b7244ec1ba66da17b08f6de06e9bc4a8ee4ecb255011ed34654bd24d9d7e +oid sha256:96c0ff0f22ff8110a251d1badf17fd76453c863e8d5b9538e186437210b3183c size 4920129824 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00052-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00052-of-00080.safetensors index 58e861f947ba11892d4caf058de419d4b315a596..95ec54c74e9a19c2e1de70b24eb6256f766d5664 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00052-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00052-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3a78243232350b99b227c60e33f05343fc3af67bb379bcc253ea519faaad6e8 +oid sha256:665450b3404282fb12fffe2eb505549d9c0be8861a4812edb2f01bf76d816afa size 4885340640 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00053-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00053-of-00080.safetensors index 933bdc04c518fcdd190c3e4b40804deb89caa305..a46d6eb9db81757b0ece5e84d83eb17e96dcb8f4 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00053-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00053-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a9394a9f24c05914294613c0f9c37fea1c7509951aced40c9a44a68cd8850b6d +oid sha256:980c418c095cd2dc2e48c00423f9ce90a20a608cf3f782e7e06a9b9fbf674459 size 4893533344 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00054-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00054-of-00080.safetensors index f3aa0128ed1a97eae8cef8b498440854498e2790..35ee197399448418365b795c77947ebc39d4a365 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00054-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00054-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e5607618bb43a691f456b714b13df9718ed0fa6d76e6fd5a369a423d136323f +oid sha256:0182115eaa282f133c75750c526ebda9a505343fe52eeb54e99b331e781ddd01 size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00055-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00055-of-00080.safetensors index 9825913590fc08fd11057e720b13d7fcd7da8925..31018649edbdd3a586e816b389c44282d2afe7d4 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00055-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00055-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:46ed39b8611afa19480e63f7d89a264ef3843cee1072690bce1ebca07d8da923 +oid sha256:a2d94e0f298c77f0d722e2f41405ab6ee322a177867eeb5d3ea01198beb60068 size 4893533352 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00056-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00056-of-00080.safetensors index 2e652868688e1755514362c3cde0e8805f1cd711..21ca2a9f87068a4f6fec2859d78ec0e5555ea262 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00056-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00056-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:29616c2a00ca38ce9d904226f8c8505a41756bb7398f32c1672dafacf92165c6 +oid sha256:5e09d4081ab3cdf33cd8abdd9d5c9635b2351a7062ff232652c64534cca2141d size 4920129832 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00057-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00057-of-00080.safetensors index 1dadac6ff99cd8e22f3d31adcf06c24978bfdb03..32898ae9695038eaea6b214d2259e0ce93be2ec5 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00057-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00057-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5348644f628997ae1dc8dc9ca77a111b96316e23219de3c19a9536ab97dbe55c +oid sha256:094b0bfe0ea2730ec43f23d789c3f130813b457d4dac410210b72fd540118eec size 4885340648 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00058-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00058-of-00080.safetensors index 7a412123a9923e3f9347667208d48a25472fa16c..e517be05fbae6e37f352743e43eb43d084ca26f5 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00058-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00058-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:872eda599d9aef4900f754747a90b63092bb75e4e5ab3e6b15a4da4a5e859061 +oid sha256:25efa98b4d0e314258e9b1bdc3f4f8616540fef7c10c955a1c6139918c47a684 size 4893533328 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00059-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00059-of-00080.safetensors index 2a9a7de472383921b28549313a460f52a563dc3b..d36f695a9e38d36b45235f99b5d0f025bdca74d0 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00059-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00059-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d44513dee840dfdce0dfc79d91cd9e0314dcd2a1b91839184e4fdfad74c929eb +oid sha256:b9a5340743ce9442068ff79d44849d3409d5318a22473255caf717ff9f246391 size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00060-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00060-of-00080.safetensors index df0913336c78a16cefd52f678c706b0f21b7ce13..11b49c4ca55c42ee431f9625bd622fd3ee4b291c 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00060-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00060-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5e996ce88312ade57026e018716132a06c14b18619565af7f62a91a9743cf098 +oid sha256:22bf94f02f3be210b254ff2af666481623c5b4219f884fe703e7a491dc296a35 size 4893533360 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00061-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00061-of-00080.safetensors index 16650d5d84acaeba797aaf9faf920582de73cfe4..6729ebe2743a1ced2d819e2f57fc962768ce659a 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00061-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00061-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4bd746013d81383facbf87a322c1f3e73c409b566447003345b898b70160c7d1 +oid sha256:154b8386a73191fc57eda358998d487459176c6b5e456aa42597cf9f674d50f6 size 4884930896 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00062-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00062-of-00080.safetensors index f3ab982e739c23ccf080a7efde1b695be413748d..6d5daf547a50f3cc4378d34d2ff2b9ba8f7ab32d 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00062-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00062-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7e2b286227bb7c8cde1d5b20ca19d570081f9ffc15b63bd7ac4b4d204f18877 +oid sha256:bf7164a8821bfa86ff9ad468b961cab71c6ec31e4c5c3b01eb536ad896d4fdd6 size 4920539560 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00063-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00063-of-00080.safetensors index a320263a64d11c43bb8378c78e98239bfa1255db..6199f35a8273acd8aaece793e142c28698f38873 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00063-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00063-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a86633011d48f56819bc0b94724b97145a7d2d78d3d7abb7a769a71477c326c6 +oid sha256:32da8da2296270911a3d4f119be05793c79f0ce57c4635a80c46257b157b5651 size 4893533320 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00064-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00064-of-00080.safetensors index 08d633908bb38ee6c2adf90806491f41155c4a0a..0b69d22e2f019448591ee6aa2711a0400fa0f304 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00064-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00064-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa6cb0ce20da57c1ac47c4570aa97ec7838e7bdf888d572eb8124d4c38690151 +oid sha256:8fbebe8b796b58a72415e5198573abef693a6cc5b5bfd9db13d6213ef8271a08 size 4885340680 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00065-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00065-of-00080.safetensors index b6daba92779bfb3164dc19b4ea51e100385b7c32..5a38b2b9fb761c7216a19e84a56bd2e23423afe4 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00065-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00065-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21aa6ff0fe57fdb3bb6eecaa43838a2ae3f5fc0f93557cabd59875e01b117f88 +oid sha256:a32d889378aa88aeb67a3a6998d7da6b5d1aa21c3eea543bb487999d023c3c69 size 4893533352 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00066-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00066-of-00080.safetensors index 624d9b1db32b5d5036916fcd656c99144266d71e..77645508cdc5d030407c1aacee13e2698f8bcada 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00066-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00066-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4410d64cd8ffe7c08a92db2e99dd9411bc600e71abba49923dd5a82a7e76c15a +oid sha256:1af6adf243858a4ae4d130916b92dffe97fbc4fc1fe53331454502bea63580ca size 4884930896 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00067-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00067-of-00080.safetensors index 1429927d014688c1ee62964bd0a913d5d650898a..6e726deda7d25725ca3cd76c24d0699762c3c9ab 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00067-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00067-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92f6e725bede20aadf644e2f8b00c831f67d8277e0473f4423e272cdab2e90fb +oid sha256:1c5b4e1bf47690fda00abac033eead98e2558bdc4218038b85ff04a3db0ef31b size 4920539536 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00068-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00068-of-00080.safetensors index 3b1e3ba40c78d215a5febbeecb3e5c474b7db317..d8e60ba0e69244b915af65e421240adb5cde169f 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00068-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00068-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cfe1ef4af9447c9b757d8bb0eddc03a79d15f2b1af14386655a1ca1089ee4db3 +oid sha256:6dea83d2d1cc4545ae677b8e0a472ead85ce8a36ec2699b4651e60ae03daa0c3 size 4893533328 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00069-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00069-of-00080.safetensors index 94c2dd628920f858adbb69bee6d46bc164c3dd9c..0567414f850fe659b7fb44ca49ae73a7dd2fc7ee 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00069-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00069-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:148d059759936c3d8d8c5aac1637ed112905947919583dea69cdb975ad8057c5 +oid sha256:292a0e85f05394627fd84094474041184efe79229ebb53363056adcf29b6f69a size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00070-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00070-of-00080.safetensors index 3941e84e24e53f63c27505c89b4dbd8f07a375a6..281bfea69c087745466283a186500752c605a70b 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00070-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00070-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d97bde32a727c4b75d876fc9492e12243e2dde2293ecadd7094b7ce32036ab2d +oid sha256:e6c59b251d22ceb0b0b6686c113c036ae6836813048f98167af40b1594b1687d size 4893533352 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00071-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00071-of-00080.safetensors index ade829035f83512edf3c164af61c82116d003c20..8e7916d0619efaf9a8a442622354d0def583d09f 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00071-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00071-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38574248bafc6e3d485ac995eb94adc64aaf3f5613c26236d30963343440cca5 +oid sha256:0f02a4c107d9dc047359d1e6d9788c6980995efcc28ebc6e8f3405437e1bed60 size 4885340680 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00072-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00072-of-00080.safetensors index 0b6c72da63e5f8b83165049ac0c8b71770f37bae..397583d81f763f5e66e483c164d99aec64ff0a23 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00072-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00072-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95f60270823caf900ff7a1a43f8dba4edc8384fd3c4c97aea2a7bfe955ebea9d +oid sha256:b2b205051c9de68be6b6c2cf096ce40801370461bda780e1355b36fb85c68018 size 4920129824 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00073-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00073-of-00080.safetensors index 39cb0bfe815790260ac0eca50b686dadbead7066..ea84cfdebb98ccade1c10045e229b51c941d0ed0 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00073-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00073-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01e4f63b1aeed5cfaa413269006d25939818b340971f75f21e7bb486aedcbed4 +oid sha256:366888458ea0877b16ab2530b63b22ee0c50c4cab73b793042270a611bca2de0 size 4893533320 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00074-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00074-of-00080.safetensors index 8c3793180b218bfd1656c2d3aa4886c1653af5a4..c694e0c7c8ad8201b9cce4ba00a010526d5ecd1f 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00074-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00074-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba248d585aeb9ad3354c3315c2cb5ae7b9effead87b980c11508a79b37e53b75 +oid sha256:8dc64ad142fa619c60a1172f4e920c839f4e16e827daa136ad25db190832e9ef size 4885340664 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00075-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00075-of-00080.safetensors index 3da3ea23dbbab243853a8b9616ccb011157f63da..048209dcde0df865bdb6c1cac068e87e8d0f4cb6 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00075-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00075-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac4a9e0ce4a1c15809e68f1bf33ba871c121b11aa8fa268ad0d7986196a6e47d +oid sha256:ac974f04cc4389f513fccc1afea48e204af23dd30e4cd511b5e2dc916186d522 size 4893533360 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00076-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00076-of-00080.safetensors index b38bac34da0b85d728495a23bc9d6481280c0052..5a6ef93ac875c7297ef5863a11c28df4068aee49 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00076-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00076-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b96c823346c5e6fdbe680e19509421b67e89709cae7bd92a09151cc04e40867 +oid sha256:f3b2bc784fdc410ceea6965995fe0c5b4b01a76f3dda94aea7b78c40a7f94b07 size 4885340672 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00077-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00077-of-00080.safetensors index 4157b4e5a4cb7874e1c90668b21c9069397c81c9..1a24122f4f6543b1611a3e8cb91b6d630c646df4 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00077-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00077-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e4bc1d079a6b747eaeb86b24a136f9084344bdc1bb422d7d7c604f8166ada6dc +oid sha256:455e8314c80ec9cbf1ab543dd16c4c98db45477aa7eed748db9f041d25bcaea9 size 4920129824 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00078-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00078-of-00080.safetensors index 8f64521e294a2413e8eb0c65eb6d9f38fe44f8ca..da95622439edb086304d96db5cd3b006798c6b4b 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00078-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00078-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42da605469585b22735c4ba28c8b378f2521bf6e5babb9720cadb708cd62bd5c +oid sha256:24cadebd5256629708c7f2e1b093e682138190636139ea5c6600b3c9be3732e9 size 4893533320 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00079-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00079-of-00080.safetensors index b9e3e36f5683f0f39cd4a893e5c1b78dbe3fc21a..e1275efbc583ac4841b790f5d874441de34bf5a9 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00079-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00079-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:75acc876dfc668d945a69354a57cc73e5d9b8165b464c660ba43bbcbed476062 +oid sha256:1b3243acb122140ec24a1708d303cbbf62a9a40abd1e619f1f1399835423cb5b size 4885340664 diff --git a/Hunyuan-A52B-Instruct-FP8/model-00080-of-00080.safetensors b/Hunyuan-A52B-Instruct-FP8/model-00080-of-00080.safetensors index 5e582d9aaf5e5062712ae31a584156d857991810..2a7d0f38a5c895bb6693998b5e5fc91bf19c9d78 100644 --- a/Hunyuan-A52B-Instruct-FP8/model-00080-of-00080.safetensors +++ b/Hunyuan-A52B-Instruct-FP8/model-00080-of-00080.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cce8d1c3d1805a4a9be03848d56ad1e4c3f137c0e7cbf35184fbe033b6cda058 +oid sha256:92fbd30806238f74fdd573369176d5d0b2b097400b78866b04774c6000c03d9e size 2694395520 diff --git a/model-00001-of-00080.safetensors b/model-00001-of-00080.safetensors deleted file mode 100644 index 0440c036ee9c5910de9975c82f6a576dfd336a73..0000000000000000000000000000000000000000 --- a/model-00001-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:521c6c7aabefd888aeb2937ed6eecb1d9d7dd30b866e0e6eb7beeed2dbaef264 -size 4904971448 diff --git a/model-00002-of-00080.safetensors b/model-00002-of-00080.safetensors deleted file mode 100644 index 05878453a837a0b968221f8111bd64e691611845..0000000000000000000000000000000000000000 --- a/model-00002-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c45b39e3fd58782d342c076a607eb493799bc22a503766ef645588f822aa51dc -size 4885340536 diff --git a/model-00003-of-00080.safetensors b/model-00003-of-00080.safetensors deleted file mode 100644 index 57182d16198c38dd4a30db8e0a88768031393c9f..0000000000000000000000000000000000000000 --- a/model-00003-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5050ea793ed99055b7ab92c6ea3a0c47e123d14f858db034f84a8a66b9cd3b1e -size 4893533216 diff --git a/model-00004-of-00080.safetensors b/model-00004-of-00080.safetensors deleted file mode 100644 index 39919b4c907a1c283d0c93b8496022f8ffb01fde..0000000000000000000000000000000000000000 --- a/model-00004-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0119bcf5d96466178f6ed5cb08af964b8acfe8268cc7f4a86584b376674f1a27 -size 4920129704 diff --git a/model-00005-of-00080.safetensors b/model-00005-of-00080.safetensors deleted file mode 100644 index 6e986fee7bae966323e9e374f13e7ed66f6aca8a..0000000000000000000000000000000000000000 --- a/model-00005-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c9dc50b08030c0bcb750505d63ee7fc74003d07c297f95d8a4bad047b8148809 -size 4885340512 diff --git a/model-00006-of-00080.safetensors b/model-00006-of-00080.safetensors deleted file mode 100644 index b3df079a51d1596cf30184b840136f037d97e523..0000000000000000000000000000000000000000 --- a/model-00006-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:73be942dcde5dab69298870ee0f3c32f44d057f82315ebf1f8a1f22a02fb75a2 -size 4893533192 diff --git a/model-00007-of-00080.safetensors b/model-00007-of-00080.safetensors deleted file mode 100644 index e8c8db7c722876cf8d898c97b85b79603bfcb120..0000000000000000000000000000000000000000 --- a/model-00007-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:08532bee0ab217deca4c72e97e2f69cb2e030db30949619f4d1836db602cac1e -size 4885340544 diff --git a/model-00008-of-00080.safetensors b/model-00008-of-00080.safetensors deleted file mode 100644 index cc36227a997030ef1633c1bd5454ec1eacf9a34f..0000000000000000000000000000000000000000 --- a/model-00008-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6c994ce4b912108eaef3f236d5684f55ef70b8501c1449ca5b1e89f864c67cfb -size 4893533208 diff --git a/model-00009-of-00080.safetensors b/model-00009-of-00080.safetensors deleted file mode 100644 index c170adec1e062e3de89b4b0cb03602048123503d..0000000000000000000000000000000000000000 --- a/model-00009-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad0fbbc4e3f5d5c2beb01c28f010d50d0fdef0f1da31963ed6f8d043138b05a4 -size 4961115848 diff --git a/model-00010-of-00080.safetensors b/model-00010-of-00080.safetensors deleted file mode 100644 index c8b674ae772c175eba832ce76f5a826279218ce0..0000000000000000000000000000000000000000 --- a/model-00010-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:129373a286a7753372a69432b582f751897bc3e8ae4ddb3aa7a6e96445299e01 -size 4961500304 diff --git a/model-00011-of-00080.safetensors b/model-00011-of-00080.safetensors deleted file mode 100644 index 16fe21606be9d35321007db8d2cad01195aab7e8..0000000000000000000000000000000000000000 --- a/model-00011-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8c8a23805f98451641cb02b21065a46839d2d2f732bd735ef4f330bcd21bfceb -size 4893533184 diff --git a/model-00012-of-00080.safetensors b/model-00012-of-00080.safetensors deleted file mode 100644 index 95595b52e554a64a7fa2d85f26f3b7ce43cb47df..0000000000000000000000000000000000000000 --- a/model-00012-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fdc3b719e758b640ea7318623212b517fa5395b674a0113f08cb9e01b10b0380 -size 4885340544 diff --git a/model-00013-of-00080.safetensors b/model-00013-of-00080.safetensors deleted file mode 100644 index 510d1cdcda8d358d48cae956141b4e62cbefeea4..0000000000000000000000000000000000000000 --- a/model-00013-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:813a495ed8115ca9e8b8151d1b3ab8ac32292250f26aa5e40ea7e539de857ebe -size 4893533624 diff --git a/model-00014-of-00080.safetensors b/model-00014-of-00080.safetensors deleted file mode 100644 index 6289f18bb0723042aab072ec87d72a5fb2b40712..0000000000000000000000000000000000000000 --- a/model-00014-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99db7c25f88a46043bffb7500c8379671596d8f5e9f3c4892a82918978d33c7d -size 4884930896 diff --git a/model-00015-of-00080.safetensors b/model-00015-of-00080.safetensors deleted file mode 100644 index 308402d6415a446947d5ea7247ef21e723ee5622..0000000000000000000000000000000000000000 --- a/model-00015-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c653a924e70b1e5963985ba1ea73d4bff7bb450833c71b1eacf7f6712bd6747 -size 4920539536 diff --git a/model-00016-of-00080.safetensors b/model-00016-of-00080.safetensors deleted file mode 100644 index 0fe5a4dd4863c3ebc705e4abfad73c9cfd62a3b3..0000000000000000000000000000000000000000 --- a/model-00016-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0524647adf429e312bb6c58d6e7d6c41e95e3dd79dc1881805c1057cee87dbab -size 4893533328 diff --git a/model-00017-of-00080.safetensors b/model-00017-of-00080.safetensors deleted file mode 100644 index f7722bf46558b4038064a181174983c6be3dc09e..0000000000000000000000000000000000000000 --- a/model-00017-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ef95353e285c4edcf2203da8cbc796a0c923cfcd28f7dec96f954188c92b12b3 -size 4885340672 diff --git a/model-00018-of-00080.safetensors b/model-00018-of-00080.safetensors deleted file mode 100644 index f382e2f8a28601ef1bd8c6d89d76be0038597745..0000000000000000000000000000000000000000 --- a/model-00018-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de5ed4ac92b109df43bd21f6378eb5bd59321f3c1adac462ade563f3e2a02ad8 -size 4893533352 diff --git a/model-00019-of-00080.safetensors b/model-00019-of-00080.safetensors deleted file mode 100644 index 31a4a5792f2c3975513d1d26213a6eca3205a2d5..0000000000000000000000000000000000000000 --- a/model-00019-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39616a511b70623c9560e47af03093910e59106ef5140008967b33a72b1068e1 -size 4885340680 diff --git a/model-00020-of-00080.safetensors b/model-00020-of-00080.safetensors deleted file mode 100644 index 713a76fd7ddc573efeee9c2f86775a56f51ae1e6..0000000000000000000000000000000000000000 --- a/model-00020-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6e818dd5788a99b30250130b84df5dacf6f76968c767c16d239d3d9657db1fce -size 4920129824 diff --git a/model-00021-of-00080.safetensors b/model-00021-of-00080.safetensors deleted file mode 100644 index 59fdc2daf61f50500f65912630221dd6f36eebab..0000000000000000000000000000000000000000 --- a/model-00021-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39ca5671a15e7115a4942be29b5e7667e7cc49d699af27a707c1588866b0955d -size 4893533320 diff --git a/model-00022-of-00080.safetensors b/model-00022-of-00080.safetensors deleted file mode 100644 index a287a183e7f01fcb121d71b2f877931d5c43295c..0000000000000000000000000000000000000000 --- a/model-00022-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8dc0573a9ca15b77d8cb12e814e124c019cc730f55234f1618e07dbe76d04503 -size 4885340664 diff --git a/model-00023-of-00080.safetensors b/model-00023-of-00080.safetensors deleted file mode 100644 index ecb7dbe7a15a297e809b58305ba092cf6ef05b4e..0000000000000000000000000000000000000000 --- a/model-00023-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:064a247e467d0dd62afa4d7eb83e2afc34b9035a220a004ba133e1f1b94be9ef -size 4893533360 diff --git a/model-00024-of-00080.safetensors b/model-00024-of-00080.safetensors deleted file mode 100644 index 342a9d0c20c42e7a9f8702a7dd3d2115fa8a2512..0000000000000000000000000000000000000000 --- a/model-00024-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43b297381f0d2da5b6c310e85e040f0e71d5fbbba15e483583c1431b09489a7d -size 4885340672 diff --git a/model-00025-of-00080.safetensors b/model-00025-of-00080.safetensors deleted file mode 100644 index 8e83d892905783b94ff4c755b2c9a2cc18b45370..0000000000000000000000000000000000000000 --- a/model-00025-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:93a7ec7dc233be3a4036ba66fea5e16171482bec58cb3d8c26af5277434e796b -size 4920129824 diff --git a/model-00026-of-00080.safetensors b/model-00026-of-00080.safetensors deleted file mode 100644 index 07626feba10c535766706add9674233dc7544ec9..0000000000000000000000000000000000000000 --- a/model-00026-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ffa16013f5598d8ba7a3ba266b1297564d23946b7de6153b35c69bae4ff38aa8 -size 4893533320 diff --git a/model-00027-of-00080.safetensors b/model-00027-of-00080.safetensors deleted file mode 100644 index 8d54be27c6223aa8806f8818877973ac16c0b18c..0000000000000000000000000000000000000000 --- a/model-00027-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4e3798f3c6582ad814bfd75857142b0f9fab7ccfb384edce3ffad27af4380af -size 4885340664 diff --git a/model-00028-of-00080.safetensors b/model-00028-of-00080.safetensors deleted file mode 100644 index 5b286b840d030538438260bdec0816c848099733..0000000000000000000000000000000000000000 --- a/model-00028-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f36cdd485b20a1dc8ca35c08f7599991dafc7a10d2f940812686b9923976336a -size 4893533352 diff --git a/model-00029-of-00080.safetensors b/model-00029-of-00080.safetensors deleted file mode 100644 index 02cd73740b45bddd01071dc3c74eb3ef5ffa7941..0000000000000000000000000000000000000000 --- a/model-00029-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de881689df08c8523033478f1f986a7bb36a4a2cd05f4381bc2716716deacc65 -size 4885340672 diff --git a/model-00030-of-00080.safetensors b/model-00030-of-00080.safetensors deleted file mode 100644 index 2ccdcd830d71e1c19e5e00948c82c8837dcfc842..0000000000000000000000000000000000000000 --- a/model-00030-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0403b94b04886b25e7319322e5d1af7bcaaa569adc42e17e9e92e9fd9f8fdbb0 -size 4920129832 diff --git a/model-00031-of-00080.safetensors b/model-00031-of-00080.safetensors deleted file mode 100644 index 8e95f330f8cfa375a90d404d3f2672ab3c2bd26c..0000000000000000000000000000000000000000 --- a/model-00031-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a898f9606c9ec43f0350289de27508879ff80a4f2fc5a3646f9ba17171036f87 -size 4893533328 diff --git a/model-00032-of-00080.safetensors b/model-00032-of-00080.safetensors deleted file mode 100644 index 0259282929fd316d58c7838faaf4eed28fac51af..0000000000000000000000000000000000000000 --- a/model-00032-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2a7073e84e21fe08e1bfbb4ec23eaedc8a4f60a86e9634850b2f588e7d36104b -size 4885340648 diff --git a/model-00033-of-00080.safetensors b/model-00033-of-00080.safetensors deleted file mode 100644 index be7e4c23accd124cc8c1366dfc166b5d6120ca87..0000000000000000000000000000000000000000 --- a/model-00033-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c1e26619607cfb9ecf1e06451a5982a9993970d1dbd609174f973df8da72b579 -size 4893533352 diff --git a/model-00034-of-00080.safetensors b/model-00034-of-00080.safetensors deleted file mode 100644 index b96330b9d562b0fbc30251a79a6b948c42d59f29..0000000000000000000000000000000000000000 --- a/model-00034-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0e32759f62718534c688bda81ff0fc7b5299b768d18e0d20206a406c620c7675 -size 4885340680 diff --git a/model-00035-of-00080.safetensors b/model-00035-of-00080.safetensors deleted file mode 100644 index 76774038348c5b76b6c0adef8769000db14dc33b..0000000000000000000000000000000000000000 --- a/model-00035-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aca0713595056cb9cb392f663044428e49e0fc08030f096272bfe118ede676dd -size 4893123544 diff --git a/model-00036-of-00080.safetensors b/model-00036-of-00080.safetensors deleted file mode 100644 index 9bf85eced44ede03faba6124049788a6a537e677..0000000000000000000000000000000000000000 --- a/model-00036-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e615f52c0ef4e0dbaa6eae2c2f6a6a5efa8a6f838ec55cefbfd32030b5469f78 -size 4920539560 diff --git a/model-00037-of-00080.safetensors b/model-00037-of-00080.safetensors deleted file mode 100644 index 0695dc35f4be64371018f0a82cba92221672ce4e..0000000000000000000000000000000000000000 --- a/model-00037-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:29f17d561a1fe91d57e6f55efef958f396a4ece55483da4a56273b3dd711e565 -size 4885340648 diff --git a/model-00038-of-00080.safetensors b/model-00038-of-00080.safetensors deleted file mode 100644 index a95f26f5c6576ad02ea2b18fb10d7bafde32c47f..0000000000000000000000000000000000000000 --- a/model-00038-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ceebb116b86be704243309cbb7a46732ebe3d3bf2906c57f125dd40eee0a3036 -size 4893533360 diff --git a/model-00039-of-00080.safetensors b/model-00039-of-00080.safetensors deleted file mode 100644 index 88985d2fb142a80f8051585b0bd3a6ffa5bc3dc2..0000000000000000000000000000000000000000 --- a/model-00039-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ebcaf4caf60115c0620aebe46c734e8d5c898dee88ac3165a0123624c12ad28 -size 4885340672 diff --git a/model-00040-of-00080.safetensors b/model-00040-of-00080.safetensors deleted file mode 100644 index ac613e1b9805a9a2225bda35a9eb9f25ded7b02a..0000000000000000000000000000000000000000 --- a/model-00040-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a4207621c18e01bd8f9095bf99bf91c95854f2ee1a80e48bef255523c7545db -size 4893123552 diff --git a/model-00041-of-00080.safetensors b/model-00041-of-00080.safetensors deleted file mode 100644 index e7cc75cb7e11a8556270976cefbf34815b4620d2..0000000000000000000000000000000000000000 --- a/model-00041-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4af57b9adb295cd534724c62caec0faaebb4b989aaf5fbe2d9a59317929e499d -size 4920539536 diff --git a/model-00042-of-00080.safetensors b/model-00042-of-00080.safetensors deleted file mode 100644 index b3f6f26cdb657ec8b02c5579073e2b3ad5a19b82..0000000000000000000000000000000000000000 --- a/model-00042-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a2073a8eb463623997a5270daf62afcc5211c9c11e03f78d71561ebcc3efaef -size 4885340648 diff --git a/model-00043-of-00080.safetensors b/model-00043-of-00080.safetensors deleted file mode 100644 index 71bcf1508f45ad52b60f04f5f92e860c9a58121d..0000000000000000000000000000000000000000 --- a/model-00043-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a863947df273b5401820a4e12a459d81f84d8e6b90ad8d75a07193752ce6deac -size 4893533344 diff --git a/model-00044-of-00080.safetensors b/model-00044-of-00080.safetensors deleted file mode 100644 index 69195b20ce349d3be92fe212f1ad40cbd7a352bb..0000000000000000000000000000000000000000 --- a/model-00044-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ccfee0ba313831c35767333f7e014e5a16cee05b6b6b890704d217aaf2b9639f -size 4885340672 diff --git a/model-00045-of-00080.safetensors b/model-00045-of-00080.safetensors deleted file mode 100644 index 6c80194d2f7689dc1b4ba4a0c5cc2236343b9999..0000000000000000000000000000000000000000 --- a/model-00045-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:23b1f9b1a9aa89ec458467f659a2b498de272d3735e549998efc48975d2bcacb -size 4893533360 diff --git a/model-00046-of-00080.safetensors b/model-00046-of-00080.safetensors deleted file mode 100644 index da529921182e60623b4389b53edea3fbb7f3376e..0000000000000000000000000000000000000000 --- a/model-00046-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f13b68ce5c5867a04a3b920d79ba63cf63c866de7b3455ab8568a6f0223a17dc -size 4920129824 diff --git a/model-00047-of-00080.safetensors b/model-00047-of-00080.safetensors deleted file mode 100644 index 900240b887cb5cd3be2338f89cf5291930d3ba05..0000000000000000000000000000000000000000 --- a/model-00047-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c00e3dcea7480ed15018a177ba3abb222980503270057b5d341efab6691564b -size 4885340640 diff --git a/model-00048-of-00080.safetensors b/model-00048-of-00080.safetensors deleted file mode 100644 index 418c55a22ed75a39877782af0521385c03d2a724..0000000000000000000000000000000000000000 --- a/model-00048-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f7045561a2d595056123fc18064da04abaaaef0fa0fa4af78cc8613d0da24be7 -size 4893533344 diff --git a/model-00049-of-00080.safetensors b/model-00049-of-00080.safetensors deleted file mode 100644 index 95fbacd1e73d7487fcdbba161c68e67e2e07aed4..0000000000000000000000000000000000000000 --- a/model-00049-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d1d570d112b8e9735d6c3285c96082931550256be8b012edc18d6ddeec393210 -size 4885340680 diff --git a/model-00050-of-00080.safetensors b/model-00050-of-00080.safetensors deleted file mode 100644 index 911f358f114077044cfec460a4ea6de4f5b0b63e..0000000000000000000000000000000000000000 --- a/model-00050-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7be6d634773ac395af16a0ba7df59475518c2cebc0660c04ea9604471e6c2da0 -size 4893533352 diff --git a/model-00051-of-00080.safetensors b/model-00051-of-00080.safetensors deleted file mode 100644 index 22e6804522794c4946fd967b3e32597020cf7f6d..0000000000000000000000000000000000000000 --- a/model-00051-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:96c0ff0f22ff8110a251d1badf17fd76453c863e8d5b9538e186437210b3183c -size 4920129824 diff --git a/model-00052-of-00080.safetensors b/model-00052-of-00080.safetensors deleted file mode 100644 index 95ec54c74e9a19c2e1de70b24eb6256f766d5664..0000000000000000000000000000000000000000 --- a/model-00052-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:665450b3404282fb12fffe2eb505549d9c0be8861a4812edb2f01bf76d816afa -size 4885340640 diff --git a/model-00053-of-00080.safetensors b/model-00053-of-00080.safetensors deleted file mode 100644 index a46d6eb9db81757b0ece5e84d83eb17e96dcb8f4..0000000000000000000000000000000000000000 --- a/model-00053-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:980c418c095cd2dc2e48c00423f9ce90a20a608cf3f782e7e06a9b9fbf674459 -size 4893533344 diff --git a/model-00054-of-00080.safetensors b/model-00054-of-00080.safetensors deleted file mode 100644 index 35ee197399448418365b795c77947ebc39d4a365..0000000000000000000000000000000000000000 --- a/model-00054-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0182115eaa282f133c75750c526ebda9a505343fe52eeb54e99b331e781ddd01 -size 4885340672 diff --git a/model-00055-of-00080.safetensors b/model-00055-of-00080.safetensors deleted file mode 100644 index 31018649edbdd3a586e816b389c44282d2afe7d4..0000000000000000000000000000000000000000 --- a/model-00055-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2d94e0f298c77f0d722e2f41405ab6ee322a177867eeb5d3ea01198beb60068 -size 4893533352 diff --git a/model-00056-of-00080.safetensors b/model-00056-of-00080.safetensors deleted file mode 100644 index 21ca2a9f87068a4f6fec2859d78ec0e5555ea262..0000000000000000000000000000000000000000 --- a/model-00056-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e09d4081ab3cdf33cd8abdd9d5c9635b2351a7062ff232652c64534cca2141d -size 4920129832 diff --git a/model-00057-of-00080.safetensors b/model-00057-of-00080.safetensors deleted file mode 100644 index 32898ae9695038eaea6b214d2259e0ce93be2ec5..0000000000000000000000000000000000000000 --- a/model-00057-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:094b0bfe0ea2730ec43f23d789c3f130813b457d4dac410210b72fd540118eec -size 4885340648 diff --git a/model-00058-of-00080.safetensors b/model-00058-of-00080.safetensors deleted file mode 100644 index e517be05fbae6e37f352743e43eb43d084ca26f5..0000000000000000000000000000000000000000 --- a/model-00058-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25efa98b4d0e314258e9b1bdc3f4f8616540fef7c10c955a1c6139918c47a684 -size 4893533328 diff --git a/model-00059-of-00080.safetensors b/model-00059-of-00080.safetensors deleted file mode 100644 index d36f695a9e38d36b45235f99b5d0f025bdca74d0..0000000000000000000000000000000000000000 --- a/model-00059-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9a5340743ce9442068ff79d44849d3409d5318a22473255caf717ff9f246391 -size 4885340672 diff --git a/model-00060-of-00080.safetensors b/model-00060-of-00080.safetensors deleted file mode 100644 index 11b49c4ca55c42ee431f9625bd622fd3ee4b291c..0000000000000000000000000000000000000000 --- a/model-00060-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:22bf94f02f3be210b254ff2af666481623c5b4219f884fe703e7a491dc296a35 -size 4893533360 diff --git a/model-00061-of-00080.safetensors b/model-00061-of-00080.safetensors deleted file mode 100644 index 6729ebe2743a1ced2d819e2f57fc962768ce659a..0000000000000000000000000000000000000000 --- a/model-00061-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:154b8386a73191fc57eda358998d487459176c6b5e456aa42597cf9f674d50f6 -size 4884930896 diff --git a/model-00062-of-00080.safetensors b/model-00062-of-00080.safetensors deleted file mode 100644 index 6d5daf547a50f3cc4378d34d2ff2b9ba8f7ab32d..0000000000000000000000000000000000000000 --- a/model-00062-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bf7164a8821bfa86ff9ad468b961cab71c6ec31e4c5c3b01eb536ad896d4fdd6 -size 4920539560 diff --git a/model-00063-of-00080.safetensors b/model-00063-of-00080.safetensors deleted file mode 100644 index 6199f35a8273acd8aaece793e142c28698f38873..0000000000000000000000000000000000000000 --- a/model-00063-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32da8da2296270911a3d4f119be05793c79f0ce57c4635a80c46257b157b5651 -size 4893533320 diff --git a/model-00064-of-00080.safetensors b/model-00064-of-00080.safetensors deleted file mode 100644 index 0b69d22e2f019448591ee6aa2711a0400fa0f304..0000000000000000000000000000000000000000 --- a/model-00064-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8fbebe8b796b58a72415e5198573abef693a6cc5b5bfd9db13d6213ef8271a08 -size 4885340680 diff --git a/model-00065-of-00080.safetensors b/model-00065-of-00080.safetensors deleted file mode 100644 index 5a38b2b9fb761c7216a19e84a56bd2e23423afe4..0000000000000000000000000000000000000000 --- a/model-00065-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a32d889378aa88aeb67a3a6998d7da6b5d1aa21c3eea543bb487999d023c3c69 -size 4893533352 diff --git a/model-00066-of-00080.safetensors b/model-00066-of-00080.safetensors deleted file mode 100644 index 77645508cdc5d030407c1aacee13e2698f8bcada..0000000000000000000000000000000000000000 --- a/model-00066-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1af6adf243858a4ae4d130916b92dffe97fbc4fc1fe53331454502bea63580ca -size 4884930896 diff --git a/model-00067-of-00080.safetensors b/model-00067-of-00080.safetensors deleted file mode 100644 index 6e726deda7d25725ca3cd76c24d0699762c3c9ab..0000000000000000000000000000000000000000 --- a/model-00067-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c5b4e1bf47690fda00abac033eead98e2558bdc4218038b85ff04a3db0ef31b -size 4920539536 diff --git a/model-00068-of-00080.safetensors b/model-00068-of-00080.safetensors deleted file mode 100644 index d8e60ba0e69244b915af65e421240adb5cde169f..0000000000000000000000000000000000000000 --- a/model-00068-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6dea83d2d1cc4545ae677b8e0a472ead85ce8a36ec2699b4651e60ae03daa0c3 -size 4893533328 diff --git a/model-00069-of-00080.safetensors b/model-00069-of-00080.safetensors deleted file mode 100644 index 0567414f850fe659b7fb44ca49ae73a7dd2fc7ee..0000000000000000000000000000000000000000 --- a/model-00069-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:292a0e85f05394627fd84094474041184efe79229ebb53363056adcf29b6f69a -size 4885340672 diff --git a/model-00070-of-00080.safetensors b/model-00070-of-00080.safetensors deleted file mode 100644 index 281bfea69c087745466283a186500752c605a70b..0000000000000000000000000000000000000000 --- a/model-00070-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6c59b251d22ceb0b0b6686c113c036ae6836813048f98167af40b1594b1687d -size 4893533352 diff --git a/model-00071-of-00080.safetensors b/model-00071-of-00080.safetensors deleted file mode 100644 index 8e7916d0619efaf9a8a442622354d0def583d09f..0000000000000000000000000000000000000000 --- a/model-00071-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f02a4c107d9dc047359d1e6d9788c6980995efcc28ebc6e8f3405437e1bed60 -size 4885340680 diff --git a/model-00072-of-00080.safetensors b/model-00072-of-00080.safetensors deleted file mode 100644 index 397583d81f763f5e66e483c164d99aec64ff0a23..0000000000000000000000000000000000000000 --- a/model-00072-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b2b205051c9de68be6b6c2cf096ce40801370461bda780e1355b36fb85c68018 -size 4920129824 diff --git a/model-00073-of-00080.safetensors b/model-00073-of-00080.safetensors deleted file mode 100644 index ea84cfdebb98ccade1c10045e229b51c941d0ed0..0000000000000000000000000000000000000000 --- a/model-00073-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:366888458ea0877b16ab2530b63b22ee0c50c4cab73b793042270a611bca2de0 -size 4893533320 diff --git a/model-00074-of-00080.safetensors b/model-00074-of-00080.safetensors deleted file mode 100644 index c694e0c7c8ad8201b9cce4ba00a010526d5ecd1f..0000000000000000000000000000000000000000 --- a/model-00074-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8dc64ad142fa619c60a1172f4e920c839f4e16e827daa136ad25db190832e9ef -size 4885340664 diff --git a/model-00075-of-00080.safetensors b/model-00075-of-00080.safetensors deleted file mode 100644 index 048209dcde0df865bdb6c1cac068e87e8d0f4cb6..0000000000000000000000000000000000000000 --- a/model-00075-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac974f04cc4389f513fccc1afea48e204af23dd30e4cd511b5e2dc916186d522 -size 4893533360 diff --git a/model-00076-of-00080.safetensors b/model-00076-of-00080.safetensors deleted file mode 100644 index 5a6ef93ac875c7297ef5863a11c28df4068aee49..0000000000000000000000000000000000000000 --- a/model-00076-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f3b2bc784fdc410ceea6965995fe0c5b4b01a76f3dda94aea7b78c40a7f94b07 -size 4885340672 diff --git a/model-00077-of-00080.safetensors b/model-00077-of-00080.safetensors deleted file mode 100644 index 1a24122f4f6543b1611a3e8cb91b6d630c646df4..0000000000000000000000000000000000000000 --- a/model-00077-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:455e8314c80ec9cbf1ab543dd16c4c98db45477aa7eed748db9f041d25bcaea9 -size 4920129824 diff --git a/model-00078-of-00080.safetensors b/model-00078-of-00080.safetensors deleted file mode 100644 index da95622439edb086304d96db5cd3b006798c6b4b..0000000000000000000000000000000000000000 --- a/model-00078-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24cadebd5256629708c7f2e1b093e682138190636139ea5c6600b3c9be3732e9 -size 4893533320 diff --git a/model-00079-of-00080.safetensors b/model-00079-of-00080.safetensors deleted file mode 100644 index e1275efbc583ac4841b790f5d874441de34bf5a9..0000000000000000000000000000000000000000 --- a/model-00079-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1b3243acb122140ec24a1708d303cbbf62a9a40abd1e619f1f1399835423cb5b -size 4885340664 diff --git a/model-00080-of-00080.safetensors b/model-00080-of-00080.safetensors deleted file mode 100644 index 2a7d0f38a5c895bb6693998b5e5fc91bf19c9d78..0000000000000000000000000000000000000000 --- a/model-00080-of-00080.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:92fbd30806238f74fdd573369176d5d0b2b097400b78866b04774c6000c03d9e -size 2694395520 diff --git a/model.safetensors.index.json b/model.safetensors.index.json deleted file mode 100644 index 5aa6daf16225293f3cf9d484fdcd9d7233359668..0000000000000000000000000000000000000000 --- a/model.safetensors.index.json +++ /dev/null @@ -1,10697 +0,0 @@ -{ - "metadata": { - "total_size": 389547683328 - }, - "weight_map": { - "model.embed_tokens.weight": "model-00001-of-00080.safetensors", - "model.layers.0.input_layernorm.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.0.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.0.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.1.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.10.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.10.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.11.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.12.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.13.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.14.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.15.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.2.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.2.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.3.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.4.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.5.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.6.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.7.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.experts.8.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.8.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.experts.9.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.0.mlp.gate.wg.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.down_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.down_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.down_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.gate_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.gate_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.gate_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.up_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.up_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.mlp.shared_mlp.up_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00080.safetensors", - "model.layers.0.self_attn.k_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.key_layernorm.weight": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.o_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.o_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.q_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.query_layernorm.weight": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.v_proj.input_scale": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00080.safetensors", - "model.layers.0.self_attn.v_proj.weight_scale": "model-00001-of-00080.safetensors", - "model.layers.1.input_layernorm.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.0.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.0.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.1.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.10.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.10.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.11.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.12.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.13.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.14.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.15.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.2.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.2.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.3.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.4.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.4.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.4.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.4.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.4.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.4.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.4.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.4.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.4.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.experts.5.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.5.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.6.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.7.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.8.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.experts.9.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.1.mlp.gate.wg.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.down_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.down_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.down_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.gate_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.gate_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.gate_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.up_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.up_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.mlp.shared_mlp.up_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00080.safetensors", - "model.layers.1.self_attn.key_layernorm.weight": "model-00002-of-00080.safetensors", - "model.layers.1.self_attn.o_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.self_attn.o_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.self_attn.q_proj.input_scale": "model-00002-of-00080.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00080.safetensors", - "model.layers.1.self_attn.q_proj.weight_scale": "model-00002-of-00080.safetensors", - "model.layers.1.self_attn.query_layernorm.weight": "model-00002-of-00080.safetensors", - "model.layers.10.input_layernorm.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.0.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.0.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.1.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.10.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.10.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.11.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.12.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.13.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.14.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.15.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.2.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.2.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.experts.3.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.3.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.4.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.5.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.6.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.7.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.8.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.down_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.down_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.down_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.experts.9.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.10.mlp.gate.wg.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.mlp.shared_mlp.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00014-of-00080.safetensors", - "model.layers.10.self_attn.k_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.k_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.key_layernorm.weight": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.o_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.o_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.q_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.q_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.query_layernorm.weight": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.v_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.10.self_attn.v_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.11.input_layernorm.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.0.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.0.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.1.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.10.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.11.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.12.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.13.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.13.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.13.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.13.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.13.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.13.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.13.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.13.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.14.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.14.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.15.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.11.mlp.experts.2.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.2.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.3.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.4.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.5.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.6.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.7.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.8.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.gate_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.gate_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.up_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.up_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.experts.9.up_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.gate.wg.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.down_proj.input_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.down_proj.weight": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.down_proj.weight_scale": "model-00015-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.gate_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.gate_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.gate_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.up_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.up_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.11.mlp.shared_mlp.up_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00016-of-00080.safetensors", - "model.layers.11.self_attn.key_layernorm.weight": "model-00014-of-00080.safetensors", - "model.layers.11.self_attn.o_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.11.self_attn.o_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.11.self_attn.q_proj.input_scale": "model-00014-of-00080.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00014-of-00080.safetensors", - "model.layers.11.self_attn.q_proj.weight_scale": "model-00014-of-00080.safetensors", - "model.layers.11.self_attn.query_layernorm.weight": "model-00014-of-00080.safetensors", - "model.layers.12.input_layernorm.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.0.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.0.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.1.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.10.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.10.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.10.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.10.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.10.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.10.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.10.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.10.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.11.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.12.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.13.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.14.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.15.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.12.mlp.experts.2.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.2.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.3.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.4.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.5.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.6.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.7.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.8.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.experts.9.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.gate.wg.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.down_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.down_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.down_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.gate_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.gate_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.gate_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.up_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.up_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.mlp.shared_mlp.up_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00017-of-00080.safetensors", - "model.layers.12.self_attn.k_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.k_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.key_layernorm.weight": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.o_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.o_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.q_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.q_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.query_layernorm.weight": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.v_proj.input_scale": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00016-of-00080.safetensors", - "model.layers.12.self_attn.v_proj.weight_scale": "model-00016-of-00080.safetensors", - "model.layers.13.input_layernorm.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.0.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.0.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.1.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.10.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.10.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.11.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.12.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.13.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.14.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.15.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.2.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.2.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.3.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.4.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.5.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.6.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.experts.7.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.7.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.8.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.experts.9.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.13.mlp.gate.wg.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.down_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.down_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.down_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.gate_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.gate_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.gate_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.up_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.up_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.mlp.shared_mlp.up_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00018-of-00080.safetensors", - "model.layers.13.self_attn.key_layernorm.weight": "model-00017-of-00080.safetensors", - "model.layers.13.self_attn.o_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.self_attn.o_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.self_attn.q_proj.input_scale": "model-00017-of-00080.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00017-of-00080.safetensors", - "model.layers.13.self_attn.q_proj.weight_scale": "model-00017-of-00080.safetensors", - "model.layers.13.self_attn.query_layernorm.weight": "model-00017-of-00080.safetensors", - "model.layers.14.input_layernorm.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.0.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.0.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.1.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.10.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.10.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.11.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.12.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.13.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.14.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.15.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.2.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.2.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.3.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.3.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.3.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.3.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.3.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.3.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.3.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.3.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.experts.4.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.4.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.5.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.6.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.7.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.8.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.experts.9.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.14.mlp.gate.wg.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.down_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.down_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.down_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.gate_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.gate_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.gate_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.up_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.up_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.mlp.shared_mlp.up_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00019-of-00080.safetensors", - "model.layers.14.self_attn.k_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.k_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.key_layernorm.weight": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.o_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.o_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.q_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.q_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.query_layernorm.weight": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.v_proj.input_scale": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00018-of-00080.safetensors", - "model.layers.14.self_attn.v_proj.weight_scale": "model-00018-of-00080.safetensors", - "model.layers.15.input_layernorm.weight": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.0.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.0.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.0.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.0.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.experts.0.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.experts.0.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.0.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.0.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.1.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.10.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.11.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.12.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.13.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.14.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.14.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.14.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.14.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.14.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.14.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.14.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.14.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.15.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.15.mlp.experts.2.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.2.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.3.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.4.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.5.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.6.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.7.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.8.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.down_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.down_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.down_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.gate_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.gate_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.up_proj.input_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.up_proj.weight": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.experts.9.up_proj.weight_scale": "model-00020-of-00080.safetensors", - "model.layers.15.mlp.gate.wg.weight": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.down_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.down_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.down_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.gate_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.gate_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.gate_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.up_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.up_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.15.mlp.shared_mlp.up_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00021-of-00080.safetensors", - "model.layers.15.self_attn.key_layernorm.weight": "model-00019-of-00080.safetensors", - "model.layers.15.self_attn.o_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.15.self_attn.o_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.15.self_attn.q_proj.input_scale": "model-00019-of-00080.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00019-of-00080.safetensors", - "model.layers.15.self_attn.q_proj.weight_scale": "model-00019-of-00080.safetensors", - "model.layers.15.self_attn.query_layernorm.weight": "model-00019-of-00080.safetensors", - "model.layers.16.input_layernorm.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.0.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.0.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.1.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.10.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.11.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.11.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.12.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.13.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.14.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.15.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.16.mlp.experts.2.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.2.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.3.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.4.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.5.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.6.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.7.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.8.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.experts.9.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.gate.wg.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.down_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.down_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.down_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.gate_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.gate_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.gate_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.up_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.up_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.mlp.shared_mlp.up_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00022-of-00080.safetensors", - "model.layers.16.self_attn.k_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.k_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.key_layernorm.weight": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.o_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.o_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.q_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.q_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.query_layernorm.weight": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.v_proj.input_scale": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00021-of-00080.safetensors", - "model.layers.16.self_attn.v_proj.weight_scale": "model-00021-of-00080.safetensors", - "model.layers.17.input_layernorm.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.0.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.0.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.1.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.10.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.10.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.11.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.12.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.13.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.14.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.15.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.2.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.2.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.3.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.4.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.5.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.6.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.7.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.7.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.7.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.7.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.7.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.7.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.7.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.7.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.experts.8.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.8.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.experts.9.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.17.mlp.gate.wg.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.down_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.down_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.down_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.gate_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.gate_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.gate_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.up_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.up_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.mlp.shared_mlp.up_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00023-of-00080.safetensors", - "model.layers.17.self_attn.key_layernorm.weight": "model-00022-of-00080.safetensors", - "model.layers.17.self_attn.o_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.self_attn.o_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.self_attn.q_proj.input_scale": "model-00022-of-00080.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00022-of-00080.safetensors", - "model.layers.17.self_attn.q_proj.weight_scale": "model-00022-of-00080.safetensors", - "model.layers.17.self_attn.query_layernorm.weight": "model-00022-of-00080.safetensors", - "model.layers.18.input_layernorm.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.0.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.0.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.1.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.10.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.10.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.11.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.12.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.13.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.14.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.15.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.2.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.2.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.3.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.4.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.4.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.4.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.4.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.4.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.experts.4.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.4.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.4.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.5.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.6.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.7.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.8.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.experts.9.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.18.mlp.gate.wg.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.down_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.down_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.down_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.gate_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.gate_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.gate_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.up_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.up_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.mlp.shared_mlp.up_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00024-of-00080.safetensors", - "model.layers.18.self_attn.k_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.k_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.key_layernorm.weight": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.o_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.o_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.q_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.q_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.query_layernorm.weight": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.v_proj.input_scale": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00023-of-00080.safetensors", - "model.layers.18.self_attn.v_proj.weight_scale": "model-00023-of-00080.safetensors", - "model.layers.19.input_layernorm.weight": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.0.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.0.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.experts.1.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.1.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.10.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.11.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.12.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.13.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.14.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.15.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.15.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.19.mlp.experts.2.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.2.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.3.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.4.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.5.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.6.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.7.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.8.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.down_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.down_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.down_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.gate_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.gate_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.up_proj.input_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.up_proj.weight": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.experts.9.up_proj.weight_scale": "model-00025-of-00080.safetensors", - "model.layers.19.mlp.gate.wg.weight": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.down_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.down_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.down_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.gate_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.gate_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.gate_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.up_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.up_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.mlp.shared_mlp.up_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00026-of-00080.safetensors", - "model.layers.19.self_attn.key_layernorm.weight": "model-00024-of-00080.safetensors", - "model.layers.19.self_attn.o_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.self_attn.o_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.self_attn.q_proj.input_scale": "model-00024-of-00080.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00024-of-00080.safetensors", - "model.layers.19.self_attn.q_proj.weight_scale": "model-00024-of-00080.safetensors", - "model.layers.19.self_attn.query_layernorm.weight": "model-00024-of-00080.safetensors", - "model.layers.2.input_layernorm.weight": "model-00005-of-00080.safetensors", - "model.layers.2.mlp.experts.0.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.0.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.1.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.1.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.1.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.1.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.1.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.1.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.experts.1.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.1.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.1.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.10.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.11.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.12.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.13.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.14.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.15.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.2.mlp.experts.15.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.2.mlp.experts.15.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.2.mlp.experts.15.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.15.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.15.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.15.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.2.mlp.experts.15.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.2.mlp.experts.15.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.2.mlp.experts.2.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.2.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.3.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.4.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.5.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.6.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.7.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.8.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.down_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.down_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.down_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.gate_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.gate_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.gate_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.up_proj.input_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.up_proj.weight": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.experts.9.up_proj.weight_scale": "model-00004-of-00080.safetensors", - "model.layers.2.mlp.gate.wg.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.down_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.down_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.down_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.gate_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.gate_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.gate_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.up_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.up_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.mlp.shared_mlp.up_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00080.safetensors", - "model.layers.2.self_attn.k_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.k_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.key_layernorm.weight": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.o_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.o_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.q_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.q_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.query_layernorm.weight": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.v_proj.input_scale": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00080.safetensors", - "model.layers.2.self_attn.v_proj.weight_scale": "model-00003-of-00080.safetensors", - "model.layers.20.input_layernorm.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.0.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.0.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.1.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.10.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.11.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.11.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.11.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.11.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.11.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.11.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.11.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.11.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.12.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.12.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.13.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.14.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.15.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.20.mlp.experts.2.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.2.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.3.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.4.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.5.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.6.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.7.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.8.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.experts.9.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.gate.wg.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.down_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.down_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.down_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.gate_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.gate_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.gate_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.up_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.up_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.mlp.shared_mlp.up_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00027-of-00080.safetensors", - "model.layers.20.self_attn.k_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.k_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.key_layernorm.weight": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.o_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.o_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.q_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.q_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.query_layernorm.weight": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.v_proj.input_scale": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00026-of-00080.safetensors", - "model.layers.20.self_attn.v_proj.weight_scale": "model-00026-of-00080.safetensors", - "model.layers.21.input_layernorm.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.0.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.0.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.1.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.10.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.10.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.11.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.12.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.13.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.14.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.15.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.2.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.2.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.3.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.4.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.5.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.6.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.7.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.8.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.8.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.8.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.8.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.8.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.experts.8.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.8.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.8.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.experts.9.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.21.mlp.gate.wg.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.down_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.down_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.down_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.gate_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.gate_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.gate_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.up_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.up_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.mlp.shared_mlp.up_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00028-of-00080.safetensors", - "model.layers.21.self_attn.key_layernorm.weight": "model-00027-of-00080.safetensors", - "model.layers.21.self_attn.o_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.self_attn.o_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.self_attn.q_proj.input_scale": "model-00027-of-00080.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00027-of-00080.safetensors", - "model.layers.21.self_attn.q_proj.weight_scale": "model-00027-of-00080.safetensors", - "model.layers.21.self_attn.query_layernorm.weight": "model-00027-of-00080.safetensors", - "model.layers.22.input_layernorm.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.0.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.0.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.1.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.10.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.10.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.11.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.12.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.13.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.14.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.15.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.2.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.2.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.3.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.4.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.experts.5.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.5.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.6.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.7.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.8.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.experts.9.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.22.mlp.gate.wg.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.down_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.down_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.down_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.gate_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.gate_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.gate_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.up_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.up_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.mlp.shared_mlp.up_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00029-of-00080.safetensors", - "model.layers.22.self_attn.k_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.k_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.key_layernorm.weight": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.o_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.o_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.q_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.q_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.query_layernorm.weight": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.v_proj.input_scale": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00028-of-00080.safetensors", - "model.layers.22.self_attn.v_proj.weight_scale": "model-00028-of-00080.safetensors", - "model.layers.23.input_layernorm.weight": "model-00031-of-00080.safetensors", - "model.layers.23.mlp.experts.0.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.0.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.1.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.1.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.1.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.1.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.1.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.1.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.1.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.1.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.experts.10.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.10.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.11.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.12.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.13.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.14.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.15.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.23.mlp.experts.15.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.23.mlp.experts.15.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.23.mlp.experts.15.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.15.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.15.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.15.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.15.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.2.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.3.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.4.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.5.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.6.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.7.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.8.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.down_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.down_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.down_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.gate_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.gate_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.up_proj.input_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.up_proj.weight": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.experts.9.up_proj.weight_scale": "model-00030-of-00080.safetensors", - "model.layers.23.mlp.gate.wg.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.down_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.down_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.down_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.gate_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.gate_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.gate_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.up_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.up_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.mlp.shared_mlp.up_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00031-of-00080.safetensors", - "model.layers.23.self_attn.key_layernorm.weight": "model-00029-of-00080.safetensors", - "model.layers.23.self_attn.o_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.self_attn.o_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.self_attn.q_proj.input_scale": "model-00029-of-00080.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00029-of-00080.safetensors", - "model.layers.23.self_attn.q_proj.weight_scale": "model-00029-of-00080.safetensors", - "model.layers.23.self_attn.query_layernorm.weight": "model-00029-of-00080.safetensors", - "model.layers.24.input_layernorm.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.0.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.0.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.1.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.10.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.11.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.12.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.12.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.12.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.12.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.12.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.12.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.12.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.12.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.13.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.14.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.15.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.24.mlp.experts.2.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.2.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.3.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.4.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.5.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.6.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.7.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.8.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.experts.9.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.gate.wg.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.down_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.down_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.down_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.gate_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.gate_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.gate_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.up_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.up_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.mlp.shared_mlp.up_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00032-of-00080.safetensors", - "model.layers.24.self_attn.k_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.k_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.key_layernorm.weight": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.o_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.o_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.q_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.q_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.query_layernorm.weight": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.v_proj.input_scale": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00031-of-00080.safetensors", - "model.layers.24.self_attn.v_proj.weight_scale": "model-00031-of-00080.safetensors", - "model.layers.25.input_layernorm.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.0.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.0.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.1.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.10.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.10.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.11.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.12.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.13.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.14.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.15.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.2.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.2.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.3.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.4.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.5.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.6.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.7.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.8.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.experts.9.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.experts.9.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.25.mlp.gate.wg.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.down_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.down_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.down_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.gate_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.gate_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.gate_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.up_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.up_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.mlp.shared_mlp.up_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00033-of-00080.safetensors", - "model.layers.25.self_attn.key_layernorm.weight": "model-00032-of-00080.safetensors", - "model.layers.25.self_attn.o_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.self_attn.o_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.self_attn.q_proj.input_scale": "model-00032-of-00080.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00032-of-00080.safetensors", - "model.layers.25.self_attn.q_proj.weight_scale": "model-00032-of-00080.safetensors", - "model.layers.25.self_attn.query_layernorm.weight": "model-00032-of-00080.safetensors", - "model.layers.26.input_layernorm.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.0.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.0.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.1.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.10.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.10.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.11.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.12.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.13.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.14.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.15.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.2.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.2.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.3.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.4.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.5.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.5.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.5.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.5.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.5.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.5.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.5.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.5.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.experts.6.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.6.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.7.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.8.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.experts.9.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.26.mlp.gate.wg.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.down_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.down_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.down_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.gate_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.gate_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.gate_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.up_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.up_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.mlp.shared_mlp.up_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00034-of-00080.safetensors", - "model.layers.26.self_attn.k_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.k_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.key_layernorm.weight": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.o_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.o_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.q_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.q_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.query_layernorm.weight": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.v_proj.input_scale": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00033-of-00080.safetensors", - "model.layers.26.self_attn.v_proj.weight_scale": "model-00033-of-00080.safetensors", - "model.layers.27.input_layernorm.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.0.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.0.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.1.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.10.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.10.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.11.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.12.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.13.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.14.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.15.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.2.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.2.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.2.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.2.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.2.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.experts.2.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.2.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.2.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.3.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.4.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.5.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.6.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.7.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.8.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.down_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.down_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.down_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.gate_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.gate_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.up_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.up_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.experts.9.up_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.27.mlp.gate.wg.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.down_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.down_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.down_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.gate_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.gate_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.gate_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.up_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.up_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.mlp.shared_mlp.up_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00035-of-00080.safetensors", - "model.layers.27.self_attn.key_layernorm.weight": "model-00034-of-00080.safetensors", - "model.layers.27.self_attn.o_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.self_attn.o_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.self_attn.q_proj.input_scale": "model-00034-of-00080.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00034-of-00080.safetensors", - "model.layers.27.self_attn.q_proj.weight_scale": "model-00034-of-00080.safetensors", - "model.layers.27.self_attn.query_layernorm.weight": "model-00034-of-00080.safetensors", - "model.layers.28.input_layernorm.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.0.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.0.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.1.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.10.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.11.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.12.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.13.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.13.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.14.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.15.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.28.mlp.experts.2.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.2.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.3.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.4.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.5.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.6.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.7.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.8.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.experts.9.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.gate.wg.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.down_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.down_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.down_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.gate_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.gate_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.gate_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.up_proj.input_scale": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.up_proj.weight": "model-00036-of-00080.safetensors", - "model.layers.28.mlp.shared_mlp.up_proj.weight_scale": "model-00036-of-00080.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00037-of-00080.safetensors", - "model.layers.28.self_attn.k_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.k_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.key_layernorm.weight": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.o_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.o_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.q_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.q_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.query_layernorm.weight": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.v_proj.input_scale": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00035-of-00080.safetensors", - "model.layers.28.self_attn.v_proj.weight_scale": "model-00035-of-00080.safetensors", - "model.layers.29.input_layernorm.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.0.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.0.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.1.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.10.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.10.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.11.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.12.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.13.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.14.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.15.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.2.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.2.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.3.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.4.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.5.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.6.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.7.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.8.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.9.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.9.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.9.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.29.mlp.experts.9.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.9.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.9.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.9.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.9.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.experts.9.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.gate.wg.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.down_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.down_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.down_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.gate_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.gate_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.gate_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.up_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.up_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.mlp.shared_mlp.up_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00038-of-00080.safetensors", - "model.layers.29.self_attn.key_layernorm.weight": "model-00037-of-00080.safetensors", - "model.layers.29.self_attn.o_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.self_attn.o_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.self_attn.q_proj.input_scale": "model-00037-of-00080.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00037-of-00080.safetensors", - "model.layers.29.self_attn.q_proj.weight_scale": "model-00037-of-00080.safetensors", - "model.layers.29.self_attn.query_layernorm.weight": "model-00037-of-00080.safetensors", - "model.layers.3.input_layernorm.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.0.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.0.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.1.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.10.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.11.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.12.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.12.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.13.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.14.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.15.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.3.mlp.experts.2.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.2.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.3.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.4.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.5.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.6.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.7.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.8.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.experts.9.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.gate.wg.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.down_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.down_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.down_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.gate_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.gate_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.gate_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.up_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.up_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.mlp.shared_mlp.up_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00080.safetensors", - "model.layers.3.self_attn.key_layernorm.weight": "model-00005-of-00080.safetensors", - "model.layers.3.self_attn.o_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.self_attn.o_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.self_attn.q_proj.input_scale": "model-00005-of-00080.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00080.safetensors", - "model.layers.3.self_attn.q_proj.weight_scale": "model-00005-of-00080.safetensors", - "model.layers.3.self_attn.query_layernorm.weight": "model-00005-of-00080.safetensors", - "model.layers.30.input_layernorm.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.0.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.0.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.1.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.10.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.10.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.11.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.12.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.13.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.14.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.15.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.2.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.2.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.3.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.4.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.5.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.6.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.6.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.6.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.6.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.6.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.6.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.experts.6.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.6.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.6.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.7.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.8.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.experts.9.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.30.mlp.gate.wg.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.down_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.down_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.down_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.gate_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.gate_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.gate_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.up_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.up_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.mlp.shared_mlp.up_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00039-of-00080.safetensors", - "model.layers.30.self_attn.k_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.k_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.key_layernorm.weight": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.o_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.o_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.q_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.q_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.query_layernorm.weight": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.v_proj.input_scale": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00038-of-00080.safetensors", - "model.layers.30.self_attn.v_proj.weight_scale": "model-00038-of-00080.safetensors", - "model.layers.31.input_layernorm.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.0.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.0.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.1.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.10.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.10.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.11.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.12.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.13.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.14.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.15.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.2.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.2.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.experts.3.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.3.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.4.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.5.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.6.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.7.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.8.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.down_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.down_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.down_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.experts.9.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.31.mlp.gate.wg.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.down_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.down_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.down_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.gate_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.gate_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.gate_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.up_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.up_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.mlp.shared_mlp.up_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00040-of-00080.safetensors", - "model.layers.31.self_attn.key_layernorm.weight": "model-00039-of-00080.safetensors", - "model.layers.31.self_attn.o_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.self_attn.o_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.self_attn.q_proj.input_scale": "model-00039-of-00080.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00039-of-00080.safetensors", - "model.layers.31.self_attn.q_proj.weight_scale": "model-00039-of-00080.safetensors", - "model.layers.31.self_attn.query_layernorm.weight": "model-00039-of-00080.safetensors", - "model.layers.32.input_layernorm.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.0.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.0.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.1.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.10.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.11.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.12.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.13.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.13.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.13.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.13.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.13.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.13.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.13.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.13.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.13.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.14.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.14.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.15.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.32.mlp.experts.2.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.2.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.3.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.4.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.5.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.6.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.7.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.8.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.gate_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.gate_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.gate_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.up_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.up_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.experts.9.up_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.gate.wg.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.down_proj.input_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.down_proj.weight": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.down_proj.weight_scale": "model-00041-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.gate_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.gate_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.gate_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.up_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.up_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.32.mlp.shared_mlp.up_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.32.post_attention_layernorm.weight": "model-00042-of-00080.safetensors", - "model.layers.32.self_attn.k_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.k_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.k_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.key_layernorm.weight": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.o_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.o_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.o_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.q_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.q_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.q_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.query_layernorm.weight": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.v_proj.input_scale": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.v_proj.weight": "model-00040-of-00080.safetensors", - "model.layers.32.self_attn.v_proj.weight_scale": "model-00040-of-00080.safetensors", - "model.layers.33.input_layernorm.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.0.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.0.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.1.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.10.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.10.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.10.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.10.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.10.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.10.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.10.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.10.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.10.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.11.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.12.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.13.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.14.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.15.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.33.mlp.experts.2.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.2.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.3.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.4.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.5.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.6.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.7.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.8.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.experts.9.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.gate.wg.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.down_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.down_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.down_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.gate_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.gate_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.gate_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.up_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.up_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.mlp.shared_mlp.up_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.post_attention_layernorm.weight": "model-00043-of-00080.safetensors", - "model.layers.33.self_attn.key_layernorm.weight": "model-00042-of-00080.safetensors", - "model.layers.33.self_attn.o_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.self_attn.o_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.self_attn.o_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.self_attn.q_proj.input_scale": "model-00042-of-00080.safetensors", - "model.layers.33.self_attn.q_proj.weight": "model-00042-of-00080.safetensors", - "model.layers.33.self_attn.q_proj.weight_scale": "model-00042-of-00080.safetensors", - "model.layers.33.self_attn.query_layernorm.weight": "model-00042-of-00080.safetensors", - "model.layers.34.input_layernorm.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.0.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.0.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.1.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.10.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.10.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.11.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.12.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.13.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.14.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.15.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.2.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.2.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.3.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.4.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.5.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.6.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.experts.7.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.7.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.8.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.experts.9.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.34.mlp.gate.wg.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.down_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.down_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.down_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.gate_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.gate_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.gate_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.up_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.up_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.mlp.shared_mlp.up_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.post_attention_layernorm.weight": "model-00044-of-00080.safetensors", - "model.layers.34.self_attn.k_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.k_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.k_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.key_layernorm.weight": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.o_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.o_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.o_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.q_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.q_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.q_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.query_layernorm.weight": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.v_proj.input_scale": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.v_proj.weight": "model-00043-of-00080.safetensors", - "model.layers.34.self_attn.v_proj.weight_scale": "model-00043-of-00080.safetensors", - "model.layers.35.input_layernorm.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.0.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.0.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.1.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.10.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.10.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.11.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.12.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.13.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.14.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.15.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.2.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.2.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.3.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.3.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.3.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.3.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.3.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.3.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.3.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.3.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.3.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.experts.4.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.4.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.5.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.6.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.7.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.8.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.experts.9.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.35.mlp.gate.wg.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.down_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.down_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.down_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.gate_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.gate_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.gate_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.up_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.up_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.mlp.shared_mlp.up_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.post_attention_layernorm.weight": "model-00045-of-00080.safetensors", - "model.layers.35.self_attn.key_layernorm.weight": "model-00044-of-00080.safetensors", - "model.layers.35.self_attn.o_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.self_attn.o_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.self_attn.o_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.self_attn.q_proj.input_scale": "model-00044-of-00080.safetensors", - "model.layers.35.self_attn.q_proj.weight": "model-00044-of-00080.safetensors", - "model.layers.35.self_attn.q_proj.weight_scale": "model-00044-of-00080.safetensors", - "model.layers.35.self_attn.query_layernorm.weight": "model-00044-of-00080.safetensors", - "model.layers.36.input_layernorm.weight": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.0.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.0.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.0.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.0.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.experts.0.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.experts.0.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.experts.0.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.0.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.0.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.1.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.10.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.11.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.12.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.13.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.14.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.14.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.14.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.14.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.14.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.14.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.14.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.14.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.14.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.15.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.36.mlp.experts.2.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.2.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.3.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.4.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.5.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.6.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.7.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.8.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.down_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.down_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.down_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.gate_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.gate_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.gate_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.up_proj.input_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.up_proj.weight": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.experts.9.up_proj.weight_scale": "model-00046-of-00080.safetensors", - "model.layers.36.mlp.gate.wg.weight": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.down_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.down_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.down_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.gate_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.gate_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.gate_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.up_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.up_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.mlp.shared_mlp.up_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.36.post_attention_layernorm.weight": "model-00047-of-00080.safetensors", - "model.layers.36.self_attn.k_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.k_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.k_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.key_layernorm.weight": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.o_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.o_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.o_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.q_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.q_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.q_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.query_layernorm.weight": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.v_proj.input_scale": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.v_proj.weight": "model-00045-of-00080.safetensors", - "model.layers.36.self_attn.v_proj.weight_scale": "model-00045-of-00080.safetensors", - "model.layers.37.input_layernorm.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.0.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.0.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.1.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.10.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.11.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.11.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.12.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.13.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.14.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.15.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.37.mlp.experts.2.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.2.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.3.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.4.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.5.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.6.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.7.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.8.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.experts.9.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.gate.wg.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.down_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.down_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.down_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.gate_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.gate_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.gate_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.up_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.up_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.mlp.shared_mlp.up_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.post_attention_layernorm.weight": "model-00048-of-00080.safetensors", - "model.layers.37.self_attn.key_layernorm.weight": "model-00047-of-00080.safetensors", - "model.layers.37.self_attn.o_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.self_attn.o_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.self_attn.o_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.self_attn.q_proj.input_scale": "model-00047-of-00080.safetensors", - "model.layers.37.self_attn.q_proj.weight": "model-00047-of-00080.safetensors", - "model.layers.37.self_attn.q_proj.weight_scale": "model-00047-of-00080.safetensors", - "model.layers.37.self_attn.query_layernorm.weight": "model-00047-of-00080.safetensors", - "model.layers.38.input_layernorm.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.0.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.0.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.1.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.10.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.10.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.11.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.12.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.13.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.14.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.15.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.2.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.2.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.3.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.4.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.5.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.6.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.7.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.7.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.7.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.7.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.7.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.7.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.7.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.7.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.7.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.experts.8.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.8.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.experts.9.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.38.mlp.gate.wg.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.down_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.down_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.down_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.gate_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.gate_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.gate_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.up_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.up_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.mlp.shared_mlp.up_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.post_attention_layernorm.weight": "model-00049-of-00080.safetensors", - "model.layers.38.self_attn.k_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.k_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.k_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.key_layernorm.weight": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.o_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.o_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.o_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.q_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.q_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.q_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.query_layernorm.weight": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.v_proj.input_scale": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.v_proj.weight": "model-00048-of-00080.safetensors", - "model.layers.38.self_attn.v_proj.weight_scale": "model-00048-of-00080.safetensors", - "model.layers.39.input_layernorm.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.0.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.0.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.1.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.10.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.10.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.11.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.12.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.13.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.14.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.15.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.2.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.2.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.3.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.4.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.4.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.4.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.4.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.4.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.4.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.experts.4.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.4.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.4.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.5.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.6.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.7.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.8.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.experts.9.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.39.mlp.gate.wg.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.down_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.down_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.down_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.gate_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.gate_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.gate_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.up_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.up_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.mlp.shared_mlp.up_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.post_attention_layernorm.weight": "model-00050-of-00080.safetensors", - "model.layers.39.self_attn.key_layernorm.weight": "model-00049-of-00080.safetensors", - "model.layers.39.self_attn.o_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.self_attn.o_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.self_attn.o_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.self_attn.q_proj.input_scale": "model-00049-of-00080.safetensors", - "model.layers.39.self_attn.q_proj.weight": "model-00049-of-00080.safetensors", - "model.layers.39.self_attn.q_proj.weight_scale": "model-00049-of-00080.safetensors", - "model.layers.39.self_attn.query_layernorm.weight": "model-00049-of-00080.safetensors", - "model.layers.4.input_layernorm.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.0.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.0.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.1.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.10.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.10.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.11.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.12.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.13.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.14.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.15.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.2.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.2.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.3.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.4.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.5.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.6.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.7.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.8.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.8.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.8.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.8.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.8.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.8.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.8.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.8.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.experts.9.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.experts.9.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.4.mlp.gate.wg.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.down_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.down_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.down_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.gate_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.gate_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.gate_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.up_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.up_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.mlp.shared_mlp.up_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00007-of-00080.safetensors", - "model.layers.4.self_attn.k_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.k_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.key_layernorm.weight": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.o_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.o_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.q_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.q_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.query_layernorm.weight": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.v_proj.input_scale": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00006-of-00080.safetensors", - "model.layers.4.self_attn.v_proj.weight_scale": "model-00006-of-00080.safetensors", - "model.layers.40.input_layernorm.weight": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.0.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.0.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.experts.1.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.1.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.10.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.11.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.12.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.13.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.14.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.15.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.15.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.40.mlp.experts.2.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.2.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.3.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.4.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.5.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.6.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.7.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.8.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.down_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.down_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.down_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.gate_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.gate_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.gate_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.up_proj.input_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.up_proj.weight": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.experts.9.up_proj.weight_scale": "model-00051-of-00080.safetensors", - "model.layers.40.mlp.gate.wg.weight": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.down_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.down_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.down_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.gate_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.gate_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.gate_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.up_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.up_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.mlp.shared_mlp.up_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.post_attention_layernorm.weight": "model-00052-of-00080.safetensors", - "model.layers.40.self_attn.k_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.k_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.k_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.key_layernorm.weight": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.o_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.o_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.o_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.q_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.q_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.q_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.query_layernorm.weight": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.v_proj.input_scale": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.v_proj.weight": "model-00050-of-00080.safetensors", - "model.layers.40.self_attn.v_proj.weight_scale": "model-00050-of-00080.safetensors", - "model.layers.41.input_layernorm.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.0.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.0.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.1.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.10.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.11.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.11.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.11.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.11.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.11.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.11.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.11.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.11.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.11.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.12.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.12.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.13.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.14.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.15.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.41.mlp.experts.2.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.2.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.3.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.4.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.5.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.6.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.7.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.8.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.experts.9.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.gate.wg.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.down_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.down_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.down_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.gate_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.gate_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.gate_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.up_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.up_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.mlp.shared_mlp.up_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.post_attention_layernorm.weight": "model-00053-of-00080.safetensors", - "model.layers.41.self_attn.key_layernorm.weight": "model-00052-of-00080.safetensors", - "model.layers.41.self_attn.o_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.self_attn.o_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.self_attn.o_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.self_attn.q_proj.input_scale": "model-00052-of-00080.safetensors", - "model.layers.41.self_attn.q_proj.weight": "model-00052-of-00080.safetensors", - "model.layers.41.self_attn.q_proj.weight_scale": "model-00052-of-00080.safetensors", - "model.layers.41.self_attn.query_layernorm.weight": "model-00052-of-00080.safetensors", - "model.layers.42.input_layernorm.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.0.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.0.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.1.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.10.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.10.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.11.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.12.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.13.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.14.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.15.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.2.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.2.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.3.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.4.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.5.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.6.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.7.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.8.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.8.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.8.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.8.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.8.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.8.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.experts.8.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.8.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.8.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.experts.9.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.42.mlp.gate.wg.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.down_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.down_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.down_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.gate_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.gate_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.gate_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.up_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.up_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.mlp.shared_mlp.up_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.post_attention_layernorm.weight": "model-00054-of-00080.safetensors", - "model.layers.42.self_attn.k_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.k_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.k_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.key_layernorm.weight": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.o_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.o_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.o_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.q_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.q_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.q_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.query_layernorm.weight": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.v_proj.input_scale": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.v_proj.weight": "model-00053-of-00080.safetensors", - "model.layers.42.self_attn.v_proj.weight_scale": "model-00053-of-00080.safetensors", - "model.layers.43.input_layernorm.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.0.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.0.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.1.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.10.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.10.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.11.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.12.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.13.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.14.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.15.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.2.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.2.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.3.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.4.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.experts.5.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.5.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.6.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.7.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.8.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.experts.9.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.43.mlp.gate.wg.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.down_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.down_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.down_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.gate_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.gate_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.gate_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.up_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.up_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.mlp.shared_mlp.up_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.post_attention_layernorm.weight": "model-00055-of-00080.safetensors", - "model.layers.43.self_attn.key_layernorm.weight": "model-00054-of-00080.safetensors", - "model.layers.43.self_attn.o_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.self_attn.o_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.self_attn.o_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.self_attn.q_proj.input_scale": "model-00054-of-00080.safetensors", - "model.layers.43.self_attn.q_proj.weight": "model-00054-of-00080.safetensors", - "model.layers.43.self_attn.q_proj.weight_scale": "model-00054-of-00080.safetensors", - "model.layers.43.self_attn.query_layernorm.weight": "model-00054-of-00080.safetensors", - "model.layers.44.input_layernorm.weight": "model-00057-of-00080.safetensors", - "model.layers.44.mlp.experts.0.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.0.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.1.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.1.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.1.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.1.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.1.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.1.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.1.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.1.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.1.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.experts.10.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.10.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.11.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.12.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.13.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.14.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.15.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.44.mlp.experts.15.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.44.mlp.experts.15.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.44.mlp.experts.15.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.15.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.15.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.15.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.15.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.15.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.2.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.3.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.4.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.5.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.6.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.7.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.8.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.down_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.down_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.down_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.gate_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.gate_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.gate_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.up_proj.input_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.up_proj.weight": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.experts.9.up_proj.weight_scale": "model-00056-of-00080.safetensors", - "model.layers.44.mlp.gate.wg.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.down_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.down_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.down_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.gate_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.gate_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.gate_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.up_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.up_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.mlp.shared_mlp.up_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.post_attention_layernorm.weight": "model-00057-of-00080.safetensors", - "model.layers.44.self_attn.k_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.k_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.k_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.key_layernorm.weight": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.o_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.o_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.o_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.q_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.q_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.q_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.query_layernorm.weight": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.v_proj.input_scale": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.v_proj.weight": "model-00055-of-00080.safetensors", - "model.layers.44.self_attn.v_proj.weight_scale": "model-00055-of-00080.safetensors", - "model.layers.45.input_layernorm.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.0.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.0.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.1.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.10.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.11.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.12.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.12.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.12.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.12.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.12.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.12.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.12.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.12.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.12.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.13.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.14.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.15.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.45.mlp.experts.2.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.2.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.3.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.4.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.5.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.6.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.7.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.8.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.experts.9.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.gate.wg.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.down_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.down_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.down_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.gate_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.gate_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.gate_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.up_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.up_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.mlp.shared_mlp.up_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.post_attention_layernorm.weight": "model-00058-of-00080.safetensors", - "model.layers.45.self_attn.key_layernorm.weight": "model-00057-of-00080.safetensors", - "model.layers.45.self_attn.o_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.self_attn.o_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.self_attn.o_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.self_attn.q_proj.input_scale": "model-00057-of-00080.safetensors", - "model.layers.45.self_attn.q_proj.weight": "model-00057-of-00080.safetensors", - "model.layers.45.self_attn.q_proj.weight_scale": "model-00057-of-00080.safetensors", - "model.layers.45.self_attn.query_layernorm.weight": "model-00057-of-00080.safetensors", - "model.layers.46.input_layernorm.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.0.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.0.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.1.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.10.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.10.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.11.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.12.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.13.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.14.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.15.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.2.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.2.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.3.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.4.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.5.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.6.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.7.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.8.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.experts.9.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.experts.9.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.46.mlp.gate.wg.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.down_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.down_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.down_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.gate_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.gate_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.gate_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.up_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.up_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.mlp.shared_mlp.up_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.post_attention_layernorm.weight": "model-00059-of-00080.safetensors", - "model.layers.46.self_attn.k_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.k_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.k_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.key_layernorm.weight": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.o_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.o_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.o_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.q_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.q_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.q_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.query_layernorm.weight": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.v_proj.input_scale": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.v_proj.weight": "model-00058-of-00080.safetensors", - "model.layers.46.self_attn.v_proj.weight_scale": "model-00058-of-00080.safetensors", - "model.layers.47.input_layernorm.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.0.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.0.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.1.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.10.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.10.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.11.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.12.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.13.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.14.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.15.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.2.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.2.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.3.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.4.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.5.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.5.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.5.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.5.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.5.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.5.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.5.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.5.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.5.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.experts.6.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.6.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.7.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.8.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.experts.9.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.47.mlp.gate.wg.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.down_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.down_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.down_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.gate_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.gate_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.gate_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.up_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.up_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.mlp.shared_mlp.up_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.post_attention_layernorm.weight": "model-00060-of-00080.safetensors", - "model.layers.47.self_attn.key_layernorm.weight": "model-00059-of-00080.safetensors", - "model.layers.47.self_attn.o_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.self_attn.o_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.self_attn.o_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.self_attn.q_proj.input_scale": "model-00059-of-00080.safetensors", - "model.layers.47.self_attn.q_proj.weight": "model-00059-of-00080.safetensors", - "model.layers.47.self_attn.q_proj.weight_scale": "model-00059-of-00080.safetensors", - "model.layers.47.self_attn.query_layernorm.weight": "model-00059-of-00080.safetensors", - "model.layers.48.input_layernorm.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.0.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.0.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.1.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.10.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.10.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.11.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.12.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.13.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.14.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.15.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.2.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.2.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.2.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.2.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.2.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.2.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.experts.2.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.2.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.2.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.3.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.4.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.5.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.6.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.7.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.8.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.down_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.down_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.down_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.gate_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.gate_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.gate_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.up_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.up_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.experts.9.up_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.48.mlp.gate.wg.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.down_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.down_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.down_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.gate_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.gate_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.gate_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.up_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.up_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.mlp.shared_mlp.up_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.post_attention_layernorm.weight": "model-00061-of-00080.safetensors", - "model.layers.48.self_attn.k_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.k_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.k_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.key_layernorm.weight": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.o_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.o_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.o_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.q_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.q_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.q_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.query_layernorm.weight": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.v_proj.input_scale": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.v_proj.weight": "model-00060-of-00080.safetensors", - "model.layers.48.self_attn.v_proj.weight_scale": "model-00060-of-00080.safetensors", - "model.layers.49.input_layernorm.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.0.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.0.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.1.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.10.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.11.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.12.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.13.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.13.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.14.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.15.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.49.mlp.experts.2.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.2.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.3.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.4.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.5.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.6.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.7.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.8.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.experts.9.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.gate.wg.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.down_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.down_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.down_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.gate_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.gate_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.gate_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.up_proj.input_scale": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.up_proj.weight": "model-00062-of-00080.safetensors", - "model.layers.49.mlp.shared_mlp.up_proj.weight_scale": "model-00062-of-00080.safetensors", - "model.layers.49.post_attention_layernorm.weight": "model-00063-of-00080.safetensors", - "model.layers.49.self_attn.key_layernorm.weight": "model-00061-of-00080.safetensors", - "model.layers.49.self_attn.o_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.49.self_attn.o_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.49.self_attn.o_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.49.self_attn.q_proj.input_scale": "model-00061-of-00080.safetensors", - "model.layers.49.self_attn.q_proj.weight": "model-00061-of-00080.safetensors", - "model.layers.49.self_attn.q_proj.weight_scale": "model-00061-of-00080.safetensors", - "model.layers.49.self_attn.query_layernorm.weight": "model-00061-of-00080.safetensors", - "model.layers.5.input_layernorm.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.0.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.0.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.1.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.10.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.10.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.11.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.12.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.13.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.14.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.15.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.2.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.2.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.3.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.4.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.5.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.5.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.5.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.5.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.5.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.experts.5.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.5.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.5.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.6.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.7.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.8.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.experts.9.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.5.mlp.gate.wg.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.down_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.down_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.down_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.gate_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.gate_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.gate_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.up_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.up_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.mlp.shared_mlp.up_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00008-of-00080.safetensors", - "model.layers.5.self_attn.key_layernorm.weight": "model-00007-of-00080.safetensors", - "model.layers.5.self_attn.o_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.self_attn.o_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.self_attn.q_proj.input_scale": "model-00007-of-00080.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00007-of-00080.safetensors", - "model.layers.5.self_attn.q_proj.weight_scale": "model-00007-of-00080.safetensors", - "model.layers.5.self_attn.query_layernorm.weight": "model-00007-of-00080.safetensors", - "model.layers.50.input_layernorm.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.0.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.0.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.1.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.10.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.10.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.11.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.12.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.13.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.14.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.15.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.2.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.2.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.3.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.4.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.5.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.6.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.7.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.8.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.9.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.9.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.9.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.50.mlp.experts.9.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.9.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.9.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.9.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.9.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.experts.9.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.gate.wg.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.down_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.down_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.down_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.gate_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.gate_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.gate_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.up_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.up_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.mlp.shared_mlp.up_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.post_attention_layernorm.weight": "model-00064-of-00080.safetensors", - "model.layers.50.self_attn.k_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.k_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.k_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.key_layernorm.weight": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.o_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.o_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.o_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.q_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.q_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.q_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.query_layernorm.weight": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.v_proj.input_scale": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.v_proj.weight": "model-00063-of-00080.safetensors", - "model.layers.50.self_attn.v_proj.weight_scale": "model-00063-of-00080.safetensors", - "model.layers.51.input_layernorm.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.0.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.0.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.1.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.10.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.10.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.11.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.12.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.13.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.14.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.15.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.2.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.2.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.3.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.4.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.5.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.6.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.6.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.6.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.6.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.6.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.6.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.experts.6.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.6.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.6.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.7.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.8.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.experts.9.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.51.mlp.gate.wg.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.down_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.down_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.down_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.gate_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.gate_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.gate_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.up_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.up_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.mlp.shared_mlp.up_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.post_attention_layernorm.weight": "model-00065-of-00080.safetensors", - "model.layers.51.self_attn.key_layernorm.weight": "model-00064-of-00080.safetensors", - "model.layers.51.self_attn.o_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.self_attn.o_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.self_attn.o_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.self_attn.q_proj.input_scale": "model-00064-of-00080.safetensors", - "model.layers.51.self_attn.q_proj.weight": "model-00064-of-00080.safetensors", - "model.layers.51.self_attn.q_proj.weight_scale": "model-00064-of-00080.safetensors", - "model.layers.51.self_attn.query_layernorm.weight": "model-00064-of-00080.safetensors", - "model.layers.52.input_layernorm.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.0.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.0.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.1.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.10.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.10.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.11.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.12.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.13.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.14.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.15.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.2.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.2.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.experts.3.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.3.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.4.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.5.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.6.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.7.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.8.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.down_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.down_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.down_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.experts.9.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.52.mlp.gate.wg.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.down_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.down_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.down_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.gate_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.gate_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.gate_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.up_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.up_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.mlp.shared_mlp.up_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.post_attention_layernorm.weight": "model-00066-of-00080.safetensors", - "model.layers.52.self_attn.k_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.k_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.k_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.key_layernorm.weight": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.o_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.o_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.o_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.q_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.q_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.q_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.query_layernorm.weight": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.v_proj.input_scale": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.v_proj.weight": "model-00065-of-00080.safetensors", - "model.layers.52.self_attn.v_proj.weight_scale": "model-00065-of-00080.safetensors", - "model.layers.53.input_layernorm.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.0.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.0.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.1.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.10.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.11.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.12.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.13.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.13.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.13.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.13.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.13.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.13.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.13.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.13.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.13.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.14.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.14.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.15.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.53.mlp.experts.2.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.2.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.3.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.4.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.5.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.6.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.7.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.8.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.gate_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.gate_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.gate_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.up_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.up_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.experts.9.up_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.gate.wg.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.down_proj.input_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.down_proj.weight": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.down_proj.weight_scale": "model-00067-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.gate_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.gate_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.gate_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.up_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.up_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.53.mlp.shared_mlp.up_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.53.post_attention_layernorm.weight": "model-00068-of-00080.safetensors", - "model.layers.53.self_attn.key_layernorm.weight": "model-00066-of-00080.safetensors", - "model.layers.53.self_attn.o_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.53.self_attn.o_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.53.self_attn.o_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.53.self_attn.q_proj.input_scale": "model-00066-of-00080.safetensors", - "model.layers.53.self_attn.q_proj.weight": "model-00066-of-00080.safetensors", - "model.layers.53.self_attn.q_proj.weight_scale": "model-00066-of-00080.safetensors", - "model.layers.53.self_attn.query_layernorm.weight": "model-00066-of-00080.safetensors", - "model.layers.54.input_layernorm.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.0.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.0.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.1.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.10.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.10.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.10.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.10.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.10.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.10.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.10.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.10.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.10.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.11.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.12.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.13.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.14.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.15.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.54.mlp.experts.2.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.2.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.3.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.4.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.5.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.6.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.7.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.8.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.experts.9.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.gate.wg.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.down_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.down_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.down_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.gate_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.gate_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.gate_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.up_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.up_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.mlp.shared_mlp.up_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.post_attention_layernorm.weight": "model-00069-of-00080.safetensors", - "model.layers.54.self_attn.k_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.k_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.k_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.key_layernorm.weight": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.o_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.o_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.o_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.q_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.q_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.q_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.query_layernorm.weight": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.v_proj.input_scale": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.v_proj.weight": "model-00068-of-00080.safetensors", - "model.layers.54.self_attn.v_proj.weight_scale": "model-00068-of-00080.safetensors", - "model.layers.55.input_layernorm.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.0.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.0.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.1.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.10.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.10.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.11.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.12.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.13.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.14.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.15.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.2.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.2.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.3.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.4.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.5.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.6.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.experts.7.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.7.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.8.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.experts.9.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.55.mlp.gate.wg.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.down_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.down_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.down_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.gate_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.gate_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.gate_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.up_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.up_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.mlp.shared_mlp.up_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.post_attention_layernorm.weight": "model-00070-of-00080.safetensors", - "model.layers.55.self_attn.key_layernorm.weight": "model-00069-of-00080.safetensors", - "model.layers.55.self_attn.o_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.self_attn.o_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.self_attn.o_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.self_attn.q_proj.input_scale": "model-00069-of-00080.safetensors", - "model.layers.55.self_attn.q_proj.weight": "model-00069-of-00080.safetensors", - "model.layers.55.self_attn.q_proj.weight_scale": "model-00069-of-00080.safetensors", - "model.layers.55.self_attn.query_layernorm.weight": "model-00069-of-00080.safetensors", - "model.layers.56.input_layernorm.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.0.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.0.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.1.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.10.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.10.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.11.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.12.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.13.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.14.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.15.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.2.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.2.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.3.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.3.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.3.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.3.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.3.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.3.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.3.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.3.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.3.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.experts.4.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.4.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.5.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.6.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.7.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.8.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.experts.9.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.56.mlp.gate.wg.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.down_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.down_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.down_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.gate_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.gate_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.gate_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.up_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.up_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.mlp.shared_mlp.up_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.post_attention_layernorm.weight": "model-00071-of-00080.safetensors", - "model.layers.56.self_attn.k_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.k_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.k_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.key_layernorm.weight": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.o_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.o_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.o_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.q_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.q_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.q_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.query_layernorm.weight": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.v_proj.input_scale": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.v_proj.weight": "model-00070-of-00080.safetensors", - "model.layers.56.self_attn.v_proj.weight_scale": "model-00070-of-00080.safetensors", - "model.layers.57.input_layernorm.weight": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.0.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.0.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.0.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.0.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.experts.0.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.experts.0.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.experts.0.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.0.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.0.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.1.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.10.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.11.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.12.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.13.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.14.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.14.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.14.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.14.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.14.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.14.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.14.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.14.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.14.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.15.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.57.mlp.experts.2.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.2.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.3.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.4.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.5.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.6.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.7.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.8.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.down_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.down_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.down_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.gate_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.gate_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.gate_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.up_proj.input_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.up_proj.weight": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.experts.9.up_proj.weight_scale": "model-00072-of-00080.safetensors", - "model.layers.57.mlp.gate.wg.weight": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.down_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.down_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.down_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.gate_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.gate_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.gate_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.up_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.up_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.57.mlp.shared_mlp.up_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.57.post_attention_layernorm.weight": "model-00073-of-00080.safetensors", - "model.layers.57.self_attn.key_layernorm.weight": "model-00071-of-00080.safetensors", - "model.layers.57.self_attn.o_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.57.self_attn.o_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.57.self_attn.o_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.57.self_attn.q_proj.input_scale": "model-00071-of-00080.safetensors", - "model.layers.57.self_attn.q_proj.weight": "model-00071-of-00080.safetensors", - "model.layers.57.self_attn.q_proj.weight_scale": "model-00071-of-00080.safetensors", - "model.layers.57.self_attn.query_layernorm.weight": "model-00071-of-00080.safetensors", - "model.layers.58.input_layernorm.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.0.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.0.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.1.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.10.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.11.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.11.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.12.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.13.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.14.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.15.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.58.mlp.experts.2.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.2.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.3.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.4.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.5.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.6.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.7.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.8.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.experts.9.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.gate.wg.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.down_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.down_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.down_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.gate_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.gate_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.gate_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.up_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.up_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.mlp.shared_mlp.up_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.post_attention_layernorm.weight": "model-00074-of-00080.safetensors", - "model.layers.58.self_attn.k_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.k_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.k_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.key_layernorm.weight": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.o_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.o_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.o_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.q_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.q_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.q_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.query_layernorm.weight": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.v_proj.input_scale": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.v_proj.weight": "model-00073-of-00080.safetensors", - "model.layers.58.self_attn.v_proj.weight_scale": "model-00073-of-00080.safetensors", - "model.layers.59.input_layernorm.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.0.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.0.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.1.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.10.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.10.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.11.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.12.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.13.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.14.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.15.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.2.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.2.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.3.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.4.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.5.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.6.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.7.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.7.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.7.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.7.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.7.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.7.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.7.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.7.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.7.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.experts.8.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.8.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.experts.9.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.59.mlp.gate.wg.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.down_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.down_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.down_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.gate_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.gate_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.gate_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.up_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.up_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.mlp.shared_mlp.up_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.post_attention_layernorm.weight": "model-00075-of-00080.safetensors", - "model.layers.59.self_attn.key_layernorm.weight": "model-00074-of-00080.safetensors", - "model.layers.59.self_attn.o_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.self_attn.o_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.self_attn.o_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.self_attn.q_proj.input_scale": "model-00074-of-00080.safetensors", - "model.layers.59.self_attn.q_proj.weight": "model-00074-of-00080.safetensors", - "model.layers.59.self_attn.q_proj.weight_scale": "model-00074-of-00080.safetensors", - "model.layers.59.self_attn.query_layernorm.weight": "model-00074-of-00080.safetensors", - "model.layers.6.input_layernorm.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.0.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.0.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.1.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.experts.10.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.10.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.11.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.12.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.13.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.14.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.15.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.2.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.3.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.4.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.5.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.6.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.7.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.8.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.down_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.down_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.down_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.gate_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.gate_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.up_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.up_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.experts.9.up_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.6.mlp.gate.wg.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.down_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.down_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.down_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.gate_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.gate_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.gate_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.up_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.up_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.mlp.shared_mlp.up_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00009-of-00080.safetensors", - "model.layers.6.self_attn.k_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.k_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.key_layernorm.weight": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.o_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.o_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.q_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.q_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.query_layernorm.weight": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.v_proj.input_scale": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00008-of-00080.safetensors", - "model.layers.6.self_attn.v_proj.weight_scale": "model-00008-of-00080.safetensors", - "model.layers.60.input_layernorm.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.0.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.0.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.1.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.10.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.10.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.11.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.12.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.13.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.14.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.15.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.2.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.2.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.3.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.4.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.4.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.4.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.4.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.4.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.4.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.experts.4.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.4.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.4.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.5.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.6.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.7.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.8.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.experts.9.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.60.mlp.gate.wg.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.down_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.down_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.down_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.gate_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.gate_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.gate_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.up_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.up_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.mlp.shared_mlp.up_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.post_attention_layernorm.weight": "model-00076-of-00080.safetensors", - "model.layers.60.self_attn.k_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.k_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.k_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.key_layernorm.weight": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.o_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.o_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.o_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.q_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.q_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.q_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.query_layernorm.weight": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.v_proj.input_scale": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.v_proj.weight": "model-00075-of-00080.safetensors", - "model.layers.60.self_attn.v_proj.weight_scale": "model-00075-of-00080.safetensors", - "model.layers.61.input_layernorm.weight": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.0.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.0.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.experts.1.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.1.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.10.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.11.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.12.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.13.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.14.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.15.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.15.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.61.mlp.experts.2.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.2.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.3.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.4.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.5.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.6.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.7.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.8.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.down_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.down_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.down_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.gate_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.gate_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.gate_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.up_proj.input_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.up_proj.weight": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.experts.9.up_proj.weight_scale": "model-00077-of-00080.safetensors", - "model.layers.61.mlp.gate.wg.weight": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.down_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.down_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.down_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.gate_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.gate_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.gate_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.up_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.up_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.mlp.shared_mlp.up_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.post_attention_layernorm.weight": "model-00078-of-00080.safetensors", - "model.layers.61.self_attn.key_layernorm.weight": "model-00076-of-00080.safetensors", - "model.layers.61.self_attn.o_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.self_attn.o_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.self_attn.o_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.self_attn.q_proj.input_scale": "model-00076-of-00080.safetensors", - "model.layers.61.self_attn.q_proj.weight": "model-00076-of-00080.safetensors", - "model.layers.61.self_attn.q_proj.weight_scale": "model-00076-of-00080.safetensors", - "model.layers.61.self_attn.query_layernorm.weight": "model-00076-of-00080.safetensors", - "model.layers.62.input_layernorm.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.0.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.0.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.1.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.10.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.11.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.11.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.11.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.11.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.11.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.11.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.11.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.11.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.11.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.12.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.12.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.13.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.14.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.15.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.62.mlp.experts.2.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.2.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.3.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.4.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.5.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.6.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.7.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.8.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.experts.9.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.gate.wg.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.down_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.down_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.down_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.gate_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.gate_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.gate_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.up_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.up_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.mlp.shared_mlp.up_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.post_attention_layernorm.weight": "model-00079-of-00080.safetensors", - "model.layers.62.self_attn.k_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.k_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.k_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.key_layernorm.weight": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.o_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.o_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.o_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.q_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.q_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.q_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.query_layernorm.weight": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.v_proj.input_scale": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.v_proj.weight": "model-00078-of-00080.safetensors", - "model.layers.62.self_attn.v_proj.weight_scale": "model-00078-of-00080.safetensors", - "model.layers.63.input_layernorm.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.0.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.0.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.1.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.10.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.gate_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.gate_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.gate_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.10.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.gate_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.gate_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.gate_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.11.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.gate_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.gate_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.gate_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.12.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.gate_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.gate_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.gate_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.13.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.gate_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.gate_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.gate_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.14.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.gate_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.gate_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.gate_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.15.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.2.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.2.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.3.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.4.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.5.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.6.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.7.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.8.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.8.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.8.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.8.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.8.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.8.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.experts.8.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.8.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.8.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.down_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.down_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.down_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.gate_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.gate_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.gate_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.up_proj.input_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.up_proj.weight": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.experts.9.up_proj.weight_scale": "model-00080-of-00080.safetensors", - "model.layers.63.mlp.gate.wg.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.down_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.down_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.down_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.gate_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.gate_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.gate_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.up_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.up_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.mlp.shared_mlp.up_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.post_attention_layernorm.weight": "model-00080-of-00080.safetensors", - "model.layers.63.self_attn.key_layernorm.weight": "model-00079-of-00080.safetensors", - "model.layers.63.self_attn.o_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.self_attn.o_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.self_attn.o_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.self_attn.q_proj.input_scale": "model-00079-of-00080.safetensors", - "model.layers.63.self_attn.q_proj.weight": "model-00079-of-00080.safetensors", - "model.layers.63.self_attn.q_proj.weight_scale": "model-00079-of-00080.safetensors", - "model.layers.63.self_attn.query_layernorm.weight": "model-00079-of-00080.safetensors", - "model.layers.7.input_layernorm.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.0.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.0.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.1.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.10.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.11.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.12.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.13.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.13.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.14.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.15.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.7.mlp.experts.2.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.2.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.3.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.4.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.5.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.6.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.7.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.8.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.experts.9.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.gate.wg.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.down_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.down_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.down_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.gate_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.gate_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.gate_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.up_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.up_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.mlp.shared_mlp.up_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00011-of-00080.safetensors", - "model.layers.7.self_attn.key_layernorm.weight": "model-00010-of-00080.safetensors", - "model.layers.7.self_attn.o_proj.input_scale": "model-00010-of-00080.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00010-of-00080.safetensors", - "model.layers.7.self_attn.o_proj.weight_scale": "model-00010-of-00080.safetensors", - "model.layers.7.self_attn.q_proj.input_scale": "model-00009-of-00080.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00009-of-00080.safetensors", - "model.layers.7.self_attn.q_proj.weight_scale": "model-00009-of-00080.safetensors", - "model.layers.7.self_attn.query_layernorm.weight": "model-00010-of-00080.safetensors", - "model.layers.8.input_layernorm.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.0.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.0.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.1.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.10.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.10.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.11.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.12.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.13.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.14.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.15.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.2.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.2.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.3.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.4.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.5.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.6.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.7.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.8.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.9.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.9.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.9.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.8.mlp.experts.9.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.9.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.9.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.9.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.experts.9.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.gate.wg.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.down_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.down_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.down_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.gate_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.gate_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.gate_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.up_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.up_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.mlp.shared_mlp.up_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00012-of-00080.safetensors", - "model.layers.8.self_attn.k_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.k_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.key_layernorm.weight": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.o_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.o_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.q_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.q_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.query_layernorm.weight": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.v_proj.input_scale": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00011-of-00080.safetensors", - "model.layers.8.self_attn.v_proj.weight_scale": "model-00011-of-00080.safetensors", - "model.layers.9.input_layernorm.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.0.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.0.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.1.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.10.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.10.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.11.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.12.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.13.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.14.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.15.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.2.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.2.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.3.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.4.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.5.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.6.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.6.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.6.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.6.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.6.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.experts.6.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.6.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.6.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.7.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.8.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.down_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.down_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.down_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.gate_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.gate_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.up_proj.input_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.up_proj.weight": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.experts.9.up_proj.weight_scale": "model-00013-of-00080.safetensors", - "model.layers.9.mlp.gate.wg.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.down_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.down_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.down_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.gate_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.gate_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.gate_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.up_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.up_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.mlp.shared_mlp.up_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00013-of-00080.safetensors", - "model.layers.9.self_attn.key_layernorm.weight": "model-00012-of-00080.safetensors", - "model.layers.9.self_attn.o_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.self_attn.o_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.self_attn.q_proj.input_scale": "model-00012-of-00080.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00012-of-00080.safetensors", - "model.layers.9.self_attn.q_proj.weight_scale": "model-00012-of-00080.safetensors", - "model.layers.9.self_attn.query_layernorm.weight": "model-00012-of-00080.safetensors", - "model.norm.weight": "model-00080-of-00080.safetensors" - } -}