Upload folder using huggingface_hub
Browse files- config.json +48 -0
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +160 -304
- recipe.yaml +1 -1
config.json
CHANGED
|
@@ -60,6 +60,54 @@
|
|
| 60 |
"format": "nvfp4-pack-quantized",
|
| 61 |
"global_compression_ratio": null,
|
| 62 |
"ignore": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
"lm_head"
|
| 64 |
],
|
| 65 |
"kv_cache_scheme": null,
|
|
|
|
| 60 |
"format": "nvfp4-pack-quantized",
|
| 61 |
"global_compression_ratio": null,
|
| 62 |
"ignore": [
|
| 63 |
+
"model.layers.0.mlp.gate",
|
| 64 |
+
"model.layers.1.mlp.gate",
|
| 65 |
+
"model.layers.2.mlp.gate",
|
| 66 |
+
"model.layers.3.mlp.gate",
|
| 67 |
+
"model.layers.4.mlp.gate",
|
| 68 |
+
"model.layers.5.mlp.gate",
|
| 69 |
+
"model.layers.6.mlp.gate",
|
| 70 |
+
"model.layers.7.mlp.gate",
|
| 71 |
+
"model.layers.8.mlp.gate",
|
| 72 |
+
"model.layers.9.mlp.gate",
|
| 73 |
+
"model.layers.10.mlp.gate",
|
| 74 |
+
"model.layers.11.mlp.gate",
|
| 75 |
+
"model.layers.12.mlp.gate",
|
| 76 |
+
"model.layers.13.mlp.gate",
|
| 77 |
+
"model.layers.14.mlp.gate",
|
| 78 |
+
"model.layers.15.mlp.gate",
|
| 79 |
+
"model.layers.16.mlp.gate",
|
| 80 |
+
"model.layers.17.mlp.gate",
|
| 81 |
+
"model.layers.18.mlp.gate",
|
| 82 |
+
"model.layers.19.mlp.gate",
|
| 83 |
+
"model.layers.20.mlp.gate",
|
| 84 |
+
"model.layers.21.mlp.gate",
|
| 85 |
+
"model.layers.22.mlp.gate",
|
| 86 |
+
"model.layers.23.mlp.gate",
|
| 87 |
+
"model.layers.24.mlp.gate",
|
| 88 |
+
"model.layers.25.mlp.gate",
|
| 89 |
+
"model.layers.26.mlp.gate",
|
| 90 |
+
"model.layers.27.mlp.gate",
|
| 91 |
+
"model.layers.28.mlp.gate",
|
| 92 |
+
"model.layers.29.mlp.gate",
|
| 93 |
+
"model.layers.30.mlp.gate",
|
| 94 |
+
"model.layers.31.mlp.gate",
|
| 95 |
+
"model.layers.32.mlp.gate",
|
| 96 |
+
"model.layers.33.mlp.gate",
|
| 97 |
+
"model.layers.34.mlp.gate",
|
| 98 |
+
"model.layers.35.mlp.gate",
|
| 99 |
+
"model.layers.36.mlp.gate",
|
| 100 |
+
"model.layers.37.mlp.gate",
|
| 101 |
+
"model.layers.38.mlp.gate",
|
| 102 |
+
"model.layers.39.mlp.gate",
|
| 103 |
+
"model.layers.40.mlp.gate",
|
| 104 |
+
"model.layers.41.mlp.gate",
|
| 105 |
+
"model.layers.42.mlp.gate",
|
| 106 |
+
"model.layers.43.mlp.gate",
|
| 107 |
+
"model.layers.44.mlp.gate",
|
| 108 |
+
"model.layers.45.mlp.gate",
|
| 109 |
+
"model.layers.46.mlp.gate",
|
| 110 |
+
"model.layers.47.mlp.gate",
|
| 111 |
"lm_head"
|
| 112 |
],
|
| 113 |
"kv_cache_scheme": null,
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cc625c6fc82c8d84d196d3c9ec6c6813594ccb8321e1a1f29d6d2827c99a498
|
| 3 |
+
size 5002279496
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5512103702dd54a3c410f6a367e08cef8e1107faf3276bfb584f3195befad0fc
|
| 3 |
+
size 5002724080
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5699f8c32a1254f67071924c40a7c2f26375001592c17d83f52fc4bcdc90a2d1
|
| 3 |
+
size 5002036032
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cea1e3dbae26e04a2773b68450059c3bf3da633a00747be37c4f9b3a95cb3acb
|
| 3 |
+
size 3089670712
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
|
@@ -1542,10 +1542,7 @@
|
|
| 1542 |
"model.layers.0.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 1543 |
"model.layers.0.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 1544 |
"model.layers.0.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 1545 |
-
"model.layers.0.mlp.gate.
|
| 1546 |
-
"model.layers.0.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 1547 |
-
"model.layers.0.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 1548 |
-
"model.layers.0.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 1549 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 1550 |
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 1551 |
"model.layers.0.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -3102,10 +3099,7 @@
|
|
| 3102 |
"model.layers.1.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 3103 |
"model.layers.1.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 3104 |
"model.layers.1.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 3105 |
-
"model.layers.1.mlp.gate.
|
| 3106 |
-
"model.layers.1.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 3107 |
-
"model.layers.1.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 3108 |
-
"model.layers.1.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 3109 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 3110 |
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 3111 |
"model.layers.1.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -4662,10 +4656,7 @@
|
|
| 4662 |
"model.layers.10.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 4663 |
"model.layers.10.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 4664 |
"model.layers.10.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 4665 |
-
"model.layers.10.mlp.gate.
|
| 4666 |
-
"model.layers.10.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 4667 |
-
"model.layers.10.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 4668 |
-
"model.layers.10.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 4669 |
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 4670 |
"model.layers.10.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 4671 |
"model.layers.10.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -6222,10 +6213,7 @@
|
|
| 6222 |
"model.layers.11.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 6223 |
"model.layers.11.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 6224 |
"model.layers.11.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 6225 |
-
"model.layers.11.mlp.gate.
|
| 6226 |
-
"model.layers.11.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 6227 |
-
"model.layers.11.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 6228 |
-
"model.layers.11.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 6229 |
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 6230 |
"model.layers.11.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 6231 |
"model.layers.11.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -7230,10 +7218,10 @@
|
|
| 7230 |
"model.layers.12.mlp.experts.57.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7231 |
"model.layers.12.mlp.experts.57.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 7232 |
"model.layers.12.mlp.experts.57.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 7233 |
-
"model.layers.12.mlp.experts.58.down_proj.input_global_scale": "model-
|
| 7234 |
-
"model.layers.12.mlp.experts.58.down_proj.weight_global_scale": "model-
|
| 7235 |
-
"model.layers.12.mlp.experts.58.down_proj.weight_packed": "model-
|
| 7236 |
-
"model.layers.12.mlp.experts.58.down_proj.weight_scale": "model-
|
| 7237 |
"model.layers.12.mlp.experts.58.gate_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
| 7238 |
"model.layers.12.mlp.experts.58.gate_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7239 |
"model.layers.12.mlp.experts.58.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
|
|
@@ -7242,18 +7230,18 @@
|
|
| 7242 |
"model.layers.12.mlp.experts.58.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7243 |
"model.layers.12.mlp.experts.58.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 7244 |
"model.layers.12.mlp.experts.58.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 7245 |
-
"model.layers.12.mlp.experts.59.down_proj.input_global_scale": "model-
|
| 7246 |
-
"model.layers.12.mlp.experts.59.down_proj.weight_global_scale": "model-
|
| 7247 |
-
"model.layers.12.mlp.experts.59.down_proj.weight_packed": "model-
|
| 7248 |
-
"model.layers.12.mlp.experts.59.down_proj.weight_scale": "model-
|
| 7249 |
-
"model.layers.12.mlp.experts.59.gate_proj.input_global_scale": "model-
|
| 7250 |
-
"model.layers.12.mlp.experts.59.gate_proj.weight_global_scale": "model-
|
| 7251 |
-
"model.layers.12.mlp.experts.59.gate_proj.weight_packed": "model-
|
| 7252 |
-
"model.layers.12.mlp.experts.59.gate_proj.weight_scale": "model-
|
| 7253 |
-
"model.layers.12.mlp.experts.59.up_proj.input_global_scale": "model-
|
| 7254 |
-
"model.layers.12.mlp.experts.59.up_proj.weight_global_scale": "model-
|
| 7255 |
-
"model.layers.12.mlp.experts.59.up_proj.weight_packed": "model-
|
| 7256 |
-
"model.layers.12.mlp.experts.59.up_proj.weight_scale": "model-
|
| 7257 |
"model.layers.12.mlp.experts.6.down_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
| 7258 |
"model.layers.12.mlp.experts.6.down_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7259 |
"model.layers.12.mlp.experts.6.down_proj.weight_packed": "model-00001-of-00004.safetensors",
|
|
@@ -7270,10 +7258,10 @@
|
|
| 7270 |
"model.layers.12.mlp.experts.60.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7271 |
"model.layers.12.mlp.experts.60.down_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7272 |
"model.layers.12.mlp.experts.60.down_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7273 |
-
"model.layers.12.mlp.experts.60.gate_proj.input_global_scale": "model-
|
| 7274 |
-
"model.layers.12.mlp.experts.60.gate_proj.weight_global_scale": "model-
|
| 7275 |
-
"model.layers.12.mlp.experts.60.gate_proj.weight_packed": "model-
|
| 7276 |
-
"model.layers.12.mlp.experts.60.gate_proj.weight_scale": "model-
|
| 7277 |
"model.layers.12.mlp.experts.60.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 7278 |
"model.layers.12.mlp.experts.60.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7279 |
"model.layers.12.mlp.experts.60.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
|
@@ -7782,10 +7770,7 @@
|
|
| 7782 |
"model.layers.12.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7783 |
"model.layers.12.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7784 |
"model.layers.12.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7785 |
-
"model.layers.12.mlp.gate.
|
| 7786 |
-
"model.layers.12.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7787 |
-
"model.layers.12.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 7788 |
-
"model.layers.12.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 7789 |
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 7790 |
"model.layers.12.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 7791 |
"model.layers.12.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -9342,10 +9327,7 @@
|
|
| 9342 |
"model.layers.13.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 9343 |
"model.layers.13.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 9344 |
"model.layers.13.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 9345 |
-
"model.layers.13.mlp.gate.
|
| 9346 |
-
"model.layers.13.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 9347 |
-
"model.layers.13.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 9348 |
-
"model.layers.13.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 9349 |
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 9350 |
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 9351 |
"model.layers.13.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -10902,10 +10884,7 @@
|
|
| 10902 |
"model.layers.14.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 10903 |
"model.layers.14.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 10904 |
"model.layers.14.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 10905 |
-
"model.layers.14.mlp.gate.
|
| 10906 |
-
"model.layers.14.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 10907 |
-
"model.layers.14.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 10908 |
-
"model.layers.14.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 10909 |
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 10910 |
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 10911 |
"model.layers.14.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -12462,10 +12441,7 @@
|
|
| 12462 |
"model.layers.15.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 12463 |
"model.layers.15.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 12464 |
"model.layers.15.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 12465 |
-
"model.layers.15.mlp.gate.
|
| 12466 |
-
"model.layers.15.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 12467 |
-
"model.layers.15.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 12468 |
-
"model.layers.15.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 12469 |
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 12470 |
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 12471 |
"model.layers.15.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -14022,10 +13998,7 @@
|
|
| 14022 |
"model.layers.16.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 14023 |
"model.layers.16.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 14024 |
"model.layers.16.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 14025 |
-
"model.layers.16.mlp.gate.
|
| 14026 |
-
"model.layers.16.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 14027 |
-
"model.layers.16.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 14028 |
-
"model.layers.16.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 14029 |
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 14030 |
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 14031 |
"model.layers.16.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -15582,10 +15555,7 @@
|
|
| 15582 |
"model.layers.17.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 15583 |
"model.layers.17.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 15584 |
"model.layers.17.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 15585 |
-
"model.layers.17.mlp.gate.
|
| 15586 |
-
"model.layers.17.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 15587 |
-
"model.layers.17.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 15588 |
-
"model.layers.17.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 15589 |
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 15590 |
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 15591 |
"model.layers.17.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -17142,10 +17112,7 @@
|
|
| 17142 |
"model.layers.18.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 17143 |
"model.layers.18.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 17144 |
"model.layers.18.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 17145 |
-
"model.layers.18.mlp.gate.
|
| 17146 |
-
"model.layers.18.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 17147 |
-
"model.layers.18.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 17148 |
-
"model.layers.18.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 17149 |
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 17150 |
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 17151 |
"model.layers.18.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -18702,10 +18669,7 @@
|
|
| 18702 |
"model.layers.19.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 18703 |
"model.layers.19.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 18704 |
"model.layers.19.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 18705 |
-
"model.layers.19.mlp.gate.
|
| 18706 |
-
"model.layers.19.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 18707 |
-
"model.layers.19.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 18708 |
-
"model.layers.19.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 18709 |
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 18710 |
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 18711 |
"model.layers.19.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -20262,10 +20226,7 @@
|
|
| 20262 |
"model.layers.2.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 20263 |
"model.layers.2.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 20264 |
"model.layers.2.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 20265 |
-
"model.layers.2.mlp.gate.
|
| 20266 |
-
"model.layers.2.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 20267 |
-
"model.layers.2.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 20268 |
-
"model.layers.2.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 20269 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 20270 |
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 20271 |
"model.layers.2.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -21822,10 +21783,7 @@
|
|
| 21822 |
"model.layers.20.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 21823 |
"model.layers.20.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 21824 |
"model.layers.20.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 21825 |
-
"model.layers.20.mlp.gate.
|
| 21826 |
-
"model.layers.20.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 21827 |
-
"model.layers.20.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 21828 |
-
"model.layers.20.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 21829 |
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 21830 |
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 21831 |
"model.layers.20.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -23382,10 +23340,7 @@
|
|
| 23382 |
"model.layers.21.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 23383 |
"model.layers.21.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 23384 |
"model.layers.21.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 23385 |
-
"model.layers.21.mlp.gate.
|
| 23386 |
-
"model.layers.21.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 23387 |
-
"model.layers.21.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 23388 |
-
"model.layers.21.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 23389 |
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 23390 |
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 23391 |
"model.layers.21.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -24942,10 +24897,7 @@
|
|
| 24942 |
"model.layers.22.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 24943 |
"model.layers.22.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 24944 |
"model.layers.22.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 24945 |
-
"model.layers.22.mlp.gate.
|
| 24946 |
-
"model.layers.22.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 24947 |
-
"model.layers.22.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 24948 |
-
"model.layers.22.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 24949 |
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 24950 |
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 24951 |
"model.layers.22.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -26502,10 +26454,7 @@
|
|
| 26502 |
"model.layers.23.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 26503 |
"model.layers.23.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 26504 |
"model.layers.23.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 26505 |
-
"model.layers.23.mlp.gate.
|
| 26506 |
-
"model.layers.23.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 26507 |
-
"model.layers.23.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 26508 |
-
"model.layers.23.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 26509 |
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 26510 |
"model.layers.23.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 26511 |
"model.layers.23.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -28062,10 +28011,7 @@
|
|
| 28062 |
"model.layers.24.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 28063 |
"model.layers.24.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 28064 |
"model.layers.24.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 28065 |
-
"model.layers.24.mlp.gate.
|
| 28066 |
-
"model.layers.24.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 28067 |
-
"model.layers.24.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 28068 |
-
"model.layers.24.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 28069 |
"model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 28070 |
"model.layers.24.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 28071 |
"model.layers.24.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -29622,10 +29568,7 @@
|
|
| 29622 |
"model.layers.25.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 29623 |
"model.layers.25.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 29624 |
"model.layers.25.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 29625 |
-
"model.layers.25.mlp.gate.
|
| 29626 |
-
"model.layers.25.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 29627 |
-
"model.layers.25.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 29628 |
-
"model.layers.25.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 29629 |
"model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 29630 |
"model.layers.25.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 29631 |
"model.layers.25.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -31074,10 +31017,10 @@
|
|
| 31074 |
"model.layers.26.mlp.experts.90.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 31075 |
"model.layers.26.mlp.experts.90.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 31076 |
"model.layers.26.mlp.experts.90.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 31077 |
-
"model.layers.26.mlp.experts.91.down_proj.input_global_scale": "model-
|
| 31078 |
-
"model.layers.26.mlp.experts.91.down_proj.weight_global_scale": "model-
|
| 31079 |
-
"model.layers.26.mlp.experts.91.down_proj.weight_packed": "model-
|
| 31080 |
-
"model.layers.26.mlp.experts.91.down_proj.weight_scale": "model-
|
| 31081 |
"model.layers.26.mlp.experts.91.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 31082 |
"model.layers.26.mlp.experts.91.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 31083 |
"model.layers.26.mlp.experts.91.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
|
|
@@ -31085,43 +31028,43 @@
|
|
| 31085 |
"model.layers.26.mlp.experts.91.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 31086 |
"model.layers.26.mlp.experts.91.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 31087 |
"model.layers.26.mlp.experts.91.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 31088 |
-
"model.layers.26.mlp.experts.91.up_proj.weight_scale": "model-
|
| 31089 |
-
"model.layers.26.mlp.experts.92.down_proj.input_global_scale": "model-
|
| 31090 |
-
"model.layers.26.mlp.experts.92.down_proj.weight_global_scale": "model-
|
| 31091 |
-
"model.layers.26.mlp.experts.92.down_proj.weight_packed": "model-
|
| 31092 |
-
"model.layers.26.mlp.experts.92.down_proj.weight_scale": "model-
|
| 31093 |
-
"model.layers.26.mlp.experts.92.gate_proj.input_global_scale": "model-
|
| 31094 |
-
"model.layers.26.mlp.experts.92.gate_proj.weight_global_scale": "model-
|
| 31095 |
-
"model.layers.26.mlp.experts.92.gate_proj.weight_packed": "model-
|
| 31096 |
-
"model.layers.26.mlp.experts.92.gate_proj.weight_scale": "model-
|
| 31097 |
-
"model.layers.26.mlp.experts.92.up_proj.input_global_scale": "model-
|
| 31098 |
-
"model.layers.26.mlp.experts.92.up_proj.weight_global_scale": "model-
|
| 31099 |
-
"model.layers.26.mlp.experts.92.up_proj.weight_packed": "model-
|
| 31100 |
-
"model.layers.26.mlp.experts.92.up_proj.weight_scale": "model-
|
| 31101 |
-
"model.layers.26.mlp.experts.93.down_proj.input_global_scale": "model-
|
| 31102 |
-
"model.layers.26.mlp.experts.93.down_proj.weight_global_scale": "model-
|
| 31103 |
-
"model.layers.26.mlp.experts.93.down_proj.weight_packed": "model-
|
| 31104 |
-
"model.layers.26.mlp.experts.93.down_proj.weight_scale": "model-
|
| 31105 |
-
"model.layers.26.mlp.experts.93.gate_proj.input_global_scale": "model-
|
| 31106 |
-
"model.layers.26.mlp.experts.93.gate_proj.weight_global_scale": "model-
|
| 31107 |
-
"model.layers.26.mlp.experts.93.gate_proj.weight_packed": "model-
|
| 31108 |
-
"model.layers.26.mlp.experts.93.gate_proj.weight_scale": "model-
|
| 31109 |
-
"model.layers.26.mlp.experts.93.up_proj.input_global_scale": "model-
|
| 31110 |
-
"model.layers.26.mlp.experts.93.up_proj.weight_global_scale": "model-
|
| 31111 |
-
"model.layers.26.mlp.experts.93.up_proj.weight_packed": "model-
|
| 31112 |
-
"model.layers.26.mlp.experts.93.up_proj.weight_scale": "model-
|
| 31113 |
-
"model.layers.26.mlp.experts.94.down_proj.input_global_scale": "model-
|
| 31114 |
-
"model.layers.26.mlp.experts.94.down_proj.weight_global_scale": "model-
|
| 31115 |
-
"model.layers.26.mlp.experts.94.down_proj.weight_packed": "model-
|
| 31116 |
-
"model.layers.26.mlp.experts.94.down_proj.weight_scale": "model-
|
| 31117 |
-
"model.layers.26.mlp.experts.94.gate_proj.input_global_scale": "model-
|
| 31118 |
-
"model.layers.26.mlp.experts.94.gate_proj.weight_global_scale": "model-
|
| 31119 |
-
"model.layers.26.mlp.experts.94.gate_proj.weight_packed": "model-
|
| 31120 |
-
"model.layers.26.mlp.experts.94.gate_proj.weight_scale": "model-
|
| 31121 |
-
"model.layers.26.mlp.experts.94.up_proj.input_global_scale": "model-
|
| 31122 |
-
"model.layers.26.mlp.experts.94.up_proj.weight_global_scale": "model-
|
| 31123 |
-
"model.layers.26.mlp.experts.94.up_proj.weight_packed": "model-
|
| 31124 |
-
"model.layers.26.mlp.experts.94.up_proj.weight_scale": "model-
|
| 31125 |
"model.layers.26.mlp.experts.95.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31126 |
"model.layers.26.mlp.experts.95.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31127 |
"model.layers.26.mlp.experts.95.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
|
@@ -31182,10 +31125,7 @@
|
|
| 31182 |
"model.layers.26.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31183 |
"model.layers.26.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31184 |
"model.layers.26.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31185 |
-
"model.layers.26.mlp.gate.
|
| 31186 |
-
"model.layers.26.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 31187 |
-
"model.layers.26.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
|
| 31188 |
-
"model.layers.26.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
|
| 31189 |
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 31190 |
"model.layers.26.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 31191 |
"model.layers.26.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
@@ -32742,10 +32682,7 @@
|
|
| 32742 |
"model.layers.27.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 32743 |
"model.layers.27.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 32744 |
"model.layers.27.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 32745 |
-
"model.layers.27.mlp.gate.
|
| 32746 |
-
"model.layers.27.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 32747 |
-
"model.layers.27.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 32748 |
-
"model.layers.27.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 32749 |
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 32750 |
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 32751 |
"model.layers.27.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -34302,10 +34239,7 @@
|
|
| 34302 |
"model.layers.28.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 34303 |
"model.layers.28.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 34304 |
"model.layers.28.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 34305 |
-
"model.layers.28.mlp.gate.
|
| 34306 |
-
"model.layers.28.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 34307 |
-
"model.layers.28.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 34308 |
-
"model.layers.28.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 34309 |
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 34310 |
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 34311 |
"model.layers.28.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -35862,10 +35796,7 @@
|
|
| 35862 |
"model.layers.29.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 35863 |
"model.layers.29.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 35864 |
"model.layers.29.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 35865 |
-
"model.layers.29.mlp.gate.
|
| 35866 |
-
"model.layers.29.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 35867 |
-
"model.layers.29.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 35868 |
-
"model.layers.29.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 35869 |
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 35870 |
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 35871 |
"model.layers.29.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -37422,10 +37353,7 @@
|
|
| 37422 |
"model.layers.3.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 37423 |
"model.layers.3.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 37424 |
"model.layers.3.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 37425 |
-
"model.layers.3.mlp.gate.
|
| 37426 |
-
"model.layers.3.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 37427 |
-
"model.layers.3.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 37428 |
-
"model.layers.3.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 37429 |
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 37430 |
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 37431 |
"model.layers.3.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -38982,10 +38910,7 @@
|
|
| 38982 |
"model.layers.30.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 38983 |
"model.layers.30.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 38984 |
"model.layers.30.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 38985 |
-
"model.layers.30.mlp.gate.
|
| 38986 |
-
"model.layers.30.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 38987 |
-
"model.layers.30.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 38988 |
-
"model.layers.30.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 38989 |
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 38990 |
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 38991 |
"model.layers.30.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -40542,10 +40467,7 @@
|
|
| 40542 |
"model.layers.31.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 40543 |
"model.layers.31.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 40544 |
"model.layers.31.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 40545 |
-
"model.layers.31.mlp.gate.
|
| 40546 |
-
"model.layers.31.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 40547 |
-
"model.layers.31.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 40548 |
-
"model.layers.31.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 40549 |
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 40550 |
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 40551 |
"model.layers.31.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -42102,10 +42024,7 @@
|
|
| 42102 |
"model.layers.32.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 42103 |
"model.layers.32.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 42104 |
"model.layers.32.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 42105 |
-
"model.layers.32.mlp.gate.
|
| 42106 |
-
"model.layers.32.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 42107 |
-
"model.layers.32.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 42108 |
-
"model.layers.32.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 42109 |
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 42110 |
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 42111 |
"model.layers.32.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -43662,10 +43581,7 @@
|
|
| 43662 |
"model.layers.33.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 43663 |
"model.layers.33.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 43664 |
"model.layers.33.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 43665 |
-
"model.layers.33.mlp.gate.
|
| 43666 |
-
"model.layers.33.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 43667 |
-
"model.layers.33.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 43668 |
-
"model.layers.33.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 43669 |
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 43670 |
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 43671 |
"model.layers.33.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -45222,10 +45138,7 @@
|
|
| 45222 |
"model.layers.34.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 45223 |
"model.layers.34.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 45224 |
"model.layers.34.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 45225 |
-
"model.layers.34.mlp.gate.
|
| 45226 |
-
"model.layers.34.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 45227 |
-
"model.layers.34.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 45228 |
-
"model.layers.34.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 45229 |
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 45230 |
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 45231 |
"model.layers.34.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -46782,10 +46695,7 @@
|
|
| 46782 |
"model.layers.35.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 46783 |
"model.layers.35.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 46784 |
"model.layers.35.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 46785 |
-
"model.layers.35.mlp.gate.
|
| 46786 |
-
"model.layers.35.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 46787 |
-
"model.layers.35.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 46788 |
-
"model.layers.35.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 46789 |
"model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 46790 |
"model.layers.35.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 46791 |
"model.layers.35.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -48342,10 +48252,7 @@
|
|
| 48342 |
"model.layers.36.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 48343 |
"model.layers.36.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 48344 |
"model.layers.36.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 48345 |
-
"model.layers.36.mlp.gate.
|
| 48346 |
-
"model.layers.36.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 48347 |
-
"model.layers.36.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 48348 |
-
"model.layers.36.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 48349 |
"model.layers.36.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 48350 |
"model.layers.36.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 48351 |
"model.layers.36.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -49902,10 +49809,7 @@
|
|
| 49902 |
"model.layers.37.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 49903 |
"model.layers.37.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 49904 |
"model.layers.37.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 49905 |
-
"model.layers.37.mlp.gate.
|
| 49906 |
-
"model.layers.37.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 49907 |
-
"model.layers.37.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 49908 |
-
"model.layers.37.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 49909 |
"model.layers.37.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 49910 |
"model.layers.37.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 49911 |
"model.layers.37.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -51462,10 +51366,7 @@
|
|
| 51462 |
"model.layers.38.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 51463 |
"model.layers.38.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 51464 |
"model.layers.38.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 51465 |
-
"model.layers.38.mlp.gate.
|
| 51466 |
-
"model.layers.38.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 51467 |
-
"model.layers.38.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 51468 |
-
"model.layers.38.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 51469 |
"model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 51470 |
"model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 51471 |
"model.layers.38.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -53022,10 +52923,7 @@
|
|
| 53022 |
"model.layers.39.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 53023 |
"model.layers.39.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 53024 |
"model.layers.39.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 53025 |
-
"model.layers.39.mlp.gate.
|
| 53026 |
-
"model.layers.39.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 53027 |
-
"model.layers.39.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 53028 |
-
"model.layers.39.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 53029 |
"model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 53030 |
"model.layers.39.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 53031 |
"model.layers.39.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -54582,10 +54480,7 @@
|
|
| 54582 |
"model.layers.4.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 54583 |
"model.layers.4.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 54584 |
"model.layers.4.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 54585 |
-
"model.layers.4.mlp.gate.
|
| 54586 |
-
"model.layers.4.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 54587 |
-
"model.layers.4.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 54588 |
-
"model.layers.4.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 54589 |
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 54590 |
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 54591 |
"model.layers.4.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -54605,7 +54500,7 @@
|
|
| 54605 |
"model.layers.4.self_attn.v_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 54606 |
"model.layers.4.self_attn.v_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 54607 |
"model.layers.4.self_attn.v_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 54608 |
-
"model.layers.40.input_layernorm.weight": "model-
|
| 54609 |
"model.layers.40.mlp.experts.0.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 54610 |
"model.layers.40.mlp.experts.0.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 54611 |
"model.layers.40.mlp.experts.0.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
|
@@ -54954,54 +54849,54 @@
|
|
| 54954 |
"model.layers.40.mlp.experts.123.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 54955 |
"model.layers.40.mlp.experts.123.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 54956 |
"model.layers.40.mlp.experts.123.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 54957 |
-
"model.layers.40.mlp.experts.124.down_proj.input_global_scale": "model-
|
| 54958 |
-
"model.layers.40.mlp.experts.124.down_proj.weight_global_scale": "model-
|
| 54959 |
-
"model.layers.40.mlp.experts.124.down_proj.weight_packed": "model-
|
| 54960 |
-
"model.layers.40.mlp.experts.124.down_proj.weight_scale": "model-
|
| 54961 |
"model.layers.40.mlp.experts.124.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 54962 |
"model.layers.40.mlp.experts.124.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 54963 |
"model.layers.40.mlp.experts.124.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 54964 |
"model.layers.40.mlp.experts.124.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 54965 |
-
"model.layers.40.mlp.experts.124.up_proj.input_global_scale": "model-
|
| 54966 |
-
"model.layers.40.mlp.experts.124.up_proj.weight_global_scale": "model-
|
| 54967 |
-
"model.layers.40.mlp.experts.124.up_proj.weight_packed": "model-
|
| 54968 |
-
"model.layers.40.mlp.experts.124.up_proj.weight_scale": "model-
|
| 54969 |
-
"model.layers.40.mlp.experts.125.down_proj.input_global_scale": "model-
|
| 54970 |
-
"model.layers.40.mlp.experts.125.down_proj.weight_global_scale": "model-
|
| 54971 |
-
"model.layers.40.mlp.experts.125.down_proj.weight_packed": "model-
|
| 54972 |
-
"model.layers.40.mlp.experts.125.down_proj.weight_scale": "model-
|
| 54973 |
-
"model.layers.40.mlp.experts.125.gate_proj.input_global_scale": "model-
|
| 54974 |
-
"model.layers.40.mlp.experts.125.gate_proj.weight_global_scale": "model-
|
| 54975 |
-
"model.layers.40.mlp.experts.125.gate_proj.weight_packed": "model-
|
| 54976 |
-
"model.layers.40.mlp.experts.125.gate_proj.weight_scale": "model-
|
| 54977 |
-
"model.layers.40.mlp.experts.125.up_proj.input_global_scale": "model-
|
| 54978 |
-
"model.layers.40.mlp.experts.125.up_proj.weight_global_scale": "model-
|
| 54979 |
-
"model.layers.40.mlp.experts.125.up_proj.weight_packed": "model-
|
| 54980 |
-
"model.layers.40.mlp.experts.125.up_proj.weight_scale": "model-
|
| 54981 |
-
"model.layers.40.mlp.experts.126.down_proj.input_global_scale": "model-
|
| 54982 |
-
"model.layers.40.mlp.experts.126.down_proj.weight_global_scale": "model-
|
| 54983 |
-
"model.layers.40.mlp.experts.126.down_proj.weight_packed": "model-
|
| 54984 |
-
"model.layers.40.mlp.experts.126.down_proj.weight_scale": "model-
|
| 54985 |
-
"model.layers.40.mlp.experts.126.gate_proj.input_global_scale": "model-
|
| 54986 |
-
"model.layers.40.mlp.experts.126.gate_proj.weight_global_scale": "model-
|
| 54987 |
-
"model.layers.40.mlp.experts.126.gate_proj.weight_packed": "model-
|
| 54988 |
-
"model.layers.40.mlp.experts.126.gate_proj.weight_scale": "model-
|
| 54989 |
-
"model.layers.40.mlp.experts.126.up_proj.input_global_scale": "model-
|
| 54990 |
-
"model.layers.40.mlp.experts.126.up_proj.weight_global_scale": "model-
|
| 54991 |
-
"model.layers.40.mlp.experts.126.up_proj.weight_packed": "model-
|
| 54992 |
-
"model.layers.40.mlp.experts.126.up_proj.weight_scale": "model-
|
| 54993 |
-
"model.layers.40.mlp.experts.127.down_proj.input_global_scale": "model-
|
| 54994 |
-
"model.layers.40.mlp.experts.127.down_proj.weight_global_scale": "model-
|
| 54995 |
-
"model.layers.40.mlp.experts.127.down_proj.weight_packed": "model-
|
| 54996 |
-
"model.layers.40.mlp.experts.127.down_proj.weight_scale": "model-
|
| 54997 |
-
"model.layers.40.mlp.experts.127.gate_proj.input_global_scale": "model-
|
| 54998 |
-
"model.layers.40.mlp.experts.127.gate_proj.weight_global_scale": "model-
|
| 54999 |
-
"model.layers.40.mlp.experts.127.gate_proj.weight_packed": "model-
|
| 55000 |
-
"model.layers.40.mlp.experts.127.gate_proj.weight_scale": "model-
|
| 55001 |
-
"model.layers.40.mlp.experts.127.up_proj.input_global_scale": "model-
|
| 55002 |
-
"model.layers.40.mlp.experts.127.up_proj.weight_global_scale": "model-
|
| 55003 |
-
"model.layers.40.mlp.experts.127.up_proj.weight_packed": "model-
|
| 55004 |
-
"model.layers.40.mlp.experts.127.up_proj.weight_scale": "model-
|
| 55005 |
"model.layers.40.mlp.experts.13.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 55006 |
"model.layers.40.mlp.experts.13.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 55007 |
"model.layers.40.mlp.experts.13.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
|
@@ -56142,11 +56037,8 @@
|
|
| 56142 |
"model.layers.40.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 56143 |
"model.layers.40.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 56144 |
"model.layers.40.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 56145 |
-
"model.layers.40.mlp.gate.
|
| 56146 |
-
"model.layers.40.
|
| 56147 |
-
"model.layers.40.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
|
| 56148 |
-
"model.layers.40.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
|
| 56149 |
-
"model.layers.40.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 56150 |
"model.layers.40.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 56151 |
"model.layers.40.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 56152 |
"model.layers.40.self_attn.k_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
|
@@ -57702,10 +57594,7 @@
|
|
| 57702 |
"model.layers.41.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 57703 |
"model.layers.41.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 57704 |
"model.layers.41.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 57705 |
-
"model.layers.41.mlp.gate.
|
| 57706 |
-
"model.layers.41.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 57707 |
-
"model.layers.41.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
|
| 57708 |
-
"model.layers.41.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
|
| 57709 |
"model.layers.41.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 57710 |
"model.layers.41.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 57711 |
"model.layers.41.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
@@ -57717,10 +57606,10 @@
|
|
| 57717 |
"model.layers.41.self_attn.o_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 57718 |
"model.layers.41.self_attn.o_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 57719 |
"model.layers.41.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
| 57720 |
-
"model.layers.41.self_attn.q_proj.input_global_scale": "model-
|
| 57721 |
-
"model.layers.41.self_attn.q_proj.weight_global_scale": "model-
|
| 57722 |
-
"model.layers.41.self_attn.q_proj.weight_packed": "model-
|
| 57723 |
-
"model.layers.41.self_attn.q_proj.weight_scale": "model-
|
| 57724 |
"model.layers.41.self_attn.v_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 57725 |
"model.layers.41.self_attn.v_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 57726 |
"model.layers.41.self_attn.v_proj.weight_packed": "model-00004-of-00004.safetensors",
|
|
@@ -59262,10 +59151,7 @@
|
|
| 59262 |
"model.layers.42.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 59263 |
"model.layers.42.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 59264 |
"model.layers.42.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 59265 |
-
"model.layers.42.mlp.gate.
|
| 59266 |
-
"model.layers.42.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 59267 |
-
"model.layers.42.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
|
| 59268 |
-
"model.layers.42.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
|
| 59269 |
"model.layers.42.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 59270 |
"model.layers.42.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 59271 |
"model.layers.42.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
@@ -60822,10 +60708,7 @@
|
|
| 60822 |
"model.layers.43.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 60823 |
"model.layers.43.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 60824 |
"model.layers.43.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 60825 |
-
"model.layers.43.mlp.gate.
|
| 60826 |
-
"model.layers.43.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 60827 |
-
"model.layers.43.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
|
| 60828 |
-
"model.layers.43.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
|
| 60829 |
"model.layers.43.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 60830 |
"model.layers.43.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 60831 |
"model.layers.43.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
@@ -62382,10 +62265,7 @@
|
|
| 62382 |
"model.layers.44.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 62383 |
"model.layers.44.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 62384 |
"model.layers.44.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 62385 |
-
"model.layers.44.mlp.gate.
|
| 62386 |
-
"model.layers.44.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 62387 |
-
"model.layers.44.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
|
| 62388 |
-
"model.layers.44.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
|
| 62389 |
"model.layers.44.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 62390 |
"model.layers.44.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 62391 |
"model.layers.44.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
@@ -63942,10 +63822,7 @@
|
|
| 63942 |
"model.layers.45.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 63943 |
"model.layers.45.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 63944 |
"model.layers.45.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 63945 |
-
"model.layers.45.mlp.gate.
|
| 63946 |
-
"model.layers.45.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 63947 |
-
"model.layers.45.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
|
| 63948 |
-
"model.layers.45.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
|
| 63949 |
"model.layers.45.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 63950 |
"model.layers.45.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 63951 |
"model.layers.45.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
@@ -65502,10 +65379,7 @@
|
|
| 65502 |
"model.layers.46.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 65503 |
"model.layers.46.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 65504 |
"model.layers.46.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 65505 |
-
"model.layers.46.mlp.gate.
|
| 65506 |
-
"model.layers.46.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 65507 |
-
"model.layers.46.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
|
| 65508 |
-
"model.layers.46.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
|
| 65509 |
"model.layers.46.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 65510 |
"model.layers.46.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 65511 |
"model.layers.46.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
@@ -67062,10 +66936,7 @@
|
|
| 67062 |
"model.layers.47.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 67063 |
"model.layers.47.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 67064 |
"model.layers.47.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 67065 |
-
"model.layers.47.mlp.gate.
|
| 67066 |
-
"model.layers.47.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 67067 |
-
"model.layers.47.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
|
| 67068 |
-
"model.layers.47.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
|
| 67069 |
"model.layers.47.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 67070 |
"model.layers.47.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 67071 |
"model.layers.47.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
@@ -68622,10 +68493,7 @@
|
|
| 68622 |
"model.layers.5.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 68623 |
"model.layers.5.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 68624 |
"model.layers.5.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 68625 |
-
"model.layers.5.mlp.gate.
|
| 68626 |
-
"model.layers.5.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 68627 |
-
"model.layers.5.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 68628 |
-
"model.layers.5.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 68629 |
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 68630 |
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 68631 |
"model.layers.5.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -70182,10 +70050,7 @@
|
|
| 70182 |
"model.layers.6.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 70183 |
"model.layers.6.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 70184 |
"model.layers.6.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 70185 |
-
"model.layers.6.mlp.gate.
|
| 70186 |
-
"model.layers.6.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 70187 |
-
"model.layers.6.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 70188 |
-
"model.layers.6.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 70189 |
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 70190 |
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 70191 |
"model.layers.6.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -71742,10 +71607,7 @@
|
|
| 71742 |
"model.layers.7.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 71743 |
"model.layers.7.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 71744 |
"model.layers.7.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 71745 |
-
"model.layers.7.mlp.gate.
|
| 71746 |
-
"model.layers.7.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 71747 |
-
"model.layers.7.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 71748 |
-
"model.layers.7.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 71749 |
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 71750 |
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 71751 |
"model.layers.7.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -73302,10 +73164,7 @@
|
|
| 73302 |
"model.layers.8.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 73303 |
"model.layers.8.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 73304 |
"model.layers.8.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 73305 |
-
"model.layers.8.mlp.gate.
|
| 73306 |
-
"model.layers.8.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 73307 |
-
"model.layers.8.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 73308 |
-
"model.layers.8.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 73309 |
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 73310 |
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 73311 |
"model.layers.8.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
@@ -74862,10 +74721,7 @@
|
|
| 74862 |
"model.layers.9.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 74863 |
"model.layers.9.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 74864 |
"model.layers.9.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 74865 |
-
"model.layers.9.mlp.gate.
|
| 74866 |
-
"model.layers.9.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 74867 |
-
"model.layers.9.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
|
| 74868 |
-
"model.layers.9.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
|
| 74869 |
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 74870 |
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 74871 |
"model.layers.9.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 18087458304
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00004-of-00004.safetensors",
|
|
|
|
| 1542 |
"model.layers.0.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 1543 |
"model.layers.0.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 1544 |
"model.layers.0.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 1545 |
+
"model.layers.0.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 1546 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 1547 |
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 1548 |
"model.layers.0.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 3099 |
"model.layers.1.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 3100 |
"model.layers.1.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 3101 |
"model.layers.1.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 3102 |
+
"model.layers.1.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 3103 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 3104 |
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 3105 |
"model.layers.1.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 4656 |
"model.layers.10.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 4657 |
"model.layers.10.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 4658 |
"model.layers.10.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 4659 |
+
"model.layers.10.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 4660 |
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 4661 |
"model.layers.10.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 4662 |
"model.layers.10.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 6213 |
"model.layers.11.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 6214 |
"model.layers.11.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 6215 |
"model.layers.11.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 6216 |
+
"model.layers.11.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 6217 |
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 6218 |
"model.layers.11.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 6219 |
"model.layers.11.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 7218 |
"model.layers.12.mlp.experts.57.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7219 |
"model.layers.12.mlp.experts.57.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 7220 |
"model.layers.12.mlp.experts.57.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 7221 |
+
"model.layers.12.mlp.experts.58.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 7222 |
+
"model.layers.12.mlp.experts.58.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7223 |
+
"model.layers.12.mlp.experts.58.down_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7224 |
+
"model.layers.12.mlp.experts.58.down_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7225 |
"model.layers.12.mlp.experts.58.gate_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
| 7226 |
"model.layers.12.mlp.experts.58.gate_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7227 |
"model.layers.12.mlp.experts.58.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
|
|
|
|
| 7230 |
"model.layers.12.mlp.experts.58.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7231 |
"model.layers.12.mlp.experts.58.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 7232 |
"model.layers.12.mlp.experts.58.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 7233 |
+
"model.layers.12.mlp.experts.59.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 7234 |
+
"model.layers.12.mlp.experts.59.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7235 |
+
"model.layers.12.mlp.experts.59.down_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7236 |
+
"model.layers.12.mlp.experts.59.down_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7237 |
+
"model.layers.12.mlp.experts.59.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 7238 |
+
"model.layers.12.mlp.experts.59.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7239 |
+
"model.layers.12.mlp.experts.59.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7240 |
+
"model.layers.12.mlp.experts.59.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7241 |
+
"model.layers.12.mlp.experts.59.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 7242 |
+
"model.layers.12.mlp.experts.59.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7243 |
+
"model.layers.12.mlp.experts.59.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7244 |
+
"model.layers.12.mlp.experts.59.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7245 |
"model.layers.12.mlp.experts.6.down_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
| 7246 |
"model.layers.12.mlp.experts.6.down_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 7247 |
"model.layers.12.mlp.experts.6.down_proj.weight_packed": "model-00001-of-00004.safetensors",
|
|
|
|
| 7258 |
"model.layers.12.mlp.experts.60.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7259 |
"model.layers.12.mlp.experts.60.down_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7260 |
"model.layers.12.mlp.experts.60.down_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7261 |
+
"model.layers.12.mlp.experts.60.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 7262 |
+
"model.layers.12.mlp.experts.60.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7263 |
+
"model.layers.12.mlp.experts.60.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7264 |
+
"model.layers.12.mlp.experts.60.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7265 |
"model.layers.12.mlp.experts.60.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 7266 |
"model.layers.12.mlp.experts.60.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7267 |
"model.layers.12.mlp.experts.60.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
|
|
|
| 7770 |
"model.layers.12.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 7771 |
"model.layers.12.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 7772 |
"model.layers.12.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 7773 |
+
"model.layers.12.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 7774 |
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 7775 |
"model.layers.12.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 7776 |
"model.layers.12.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 9327 |
"model.layers.13.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 9328 |
"model.layers.13.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 9329 |
"model.layers.13.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 9330 |
+
"model.layers.13.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 9331 |
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 9332 |
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 9333 |
"model.layers.13.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 10884 |
"model.layers.14.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 10885 |
"model.layers.14.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 10886 |
"model.layers.14.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 10887 |
+
"model.layers.14.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 10888 |
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 10889 |
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 10890 |
"model.layers.14.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 12441 |
"model.layers.15.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 12442 |
"model.layers.15.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 12443 |
"model.layers.15.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 12444 |
+
"model.layers.15.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 12445 |
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 12446 |
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 12447 |
"model.layers.15.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 13998 |
"model.layers.16.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 13999 |
"model.layers.16.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 14000 |
"model.layers.16.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 14001 |
+
"model.layers.16.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 14002 |
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 14003 |
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 14004 |
"model.layers.16.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 15555 |
"model.layers.17.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 15556 |
"model.layers.17.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 15557 |
"model.layers.17.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 15558 |
+
"model.layers.17.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 15559 |
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 15560 |
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 15561 |
"model.layers.17.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 17112 |
"model.layers.18.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 17113 |
"model.layers.18.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 17114 |
"model.layers.18.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 17115 |
+
"model.layers.18.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 17116 |
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 17117 |
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 17118 |
"model.layers.18.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 18669 |
"model.layers.19.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 18670 |
"model.layers.19.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 18671 |
"model.layers.19.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 18672 |
+
"model.layers.19.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 18673 |
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 18674 |
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 18675 |
"model.layers.19.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 20226 |
"model.layers.2.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 20227 |
"model.layers.2.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 20228 |
"model.layers.2.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 20229 |
+
"model.layers.2.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 20230 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 20231 |
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 20232 |
"model.layers.2.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 21783 |
"model.layers.20.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 21784 |
"model.layers.20.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 21785 |
"model.layers.20.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 21786 |
+
"model.layers.20.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 21787 |
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 21788 |
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 21789 |
"model.layers.20.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 23340 |
"model.layers.21.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 23341 |
"model.layers.21.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 23342 |
"model.layers.21.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 23343 |
+
"model.layers.21.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 23344 |
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 23345 |
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 23346 |
"model.layers.21.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 24897 |
"model.layers.22.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 24898 |
"model.layers.22.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 24899 |
"model.layers.22.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 24900 |
+
"model.layers.22.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 24901 |
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 24902 |
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 24903 |
"model.layers.22.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 26454 |
"model.layers.23.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 26455 |
"model.layers.23.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 26456 |
"model.layers.23.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 26457 |
+
"model.layers.23.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 26458 |
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 26459 |
"model.layers.23.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 26460 |
"model.layers.23.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 28011 |
"model.layers.24.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 28012 |
"model.layers.24.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 28013 |
"model.layers.24.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 28014 |
+
"model.layers.24.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 28015 |
"model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 28016 |
"model.layers.24.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 28017 |
"model.layers.24.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 29568 |
"model.layers.25.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 29569 |
"model.layers.25.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 29570 |
"model.layers.25.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 29571 |
+
"model.layers.25.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 29572 |
"model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 29573 |
"model.layers.25.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 29574 |
"model.layers.25.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 31017 |
"model.layers.26.mlp.experts.90.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 31018 |
"model.layers.26.mlp.experts.90.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 31019 |
"model.layers.26.mlp.experts.90.up_proj.weight_scale": "model-00002-of-00004.safetensors",
|
| 31020 |
+
"model.layers.26.mlp.experts.91.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31021 |
+
"model.layers.26.mlp.experts.91.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31022 |
+
"model.layers.26.mlp.experts.91.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31023 |
+
"model.layers.26.mlp.experts.91.down_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31024 |
"model.layers.26.mlp.experts.91.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 31025 |
"model.layers.26.mlp.experts.91.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 31026 |
"model.layers.26.mlp.experts.91.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
|
|
|
|
| 31028 |
"model.layers.26.mlp.experts.91.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
| 31029 |
"model.layers.26.mlp.experts.91.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
|
| 31030 |
"model.layers.26.mlp.experts.91.up_proj.weight_packed": "model-00002-of-00004.safetensors",
|
| 31031 |
+
"model.layers.26.mlp.experts.91.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31032 |
+
"model.layers.26.mlp.experts.92.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31033 |
+
"model.layers.26.mlp.experts.92.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31034 |
+
"model.layers.26.mlp.experts.92.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31035 |
+
"model.layers.26.mlp.experts.92.down_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31036 |
+
"model.layers.26.mlp.experts.92.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31037 |
+
"model.layers.26.mlp.experts.92.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31038 |
+
"model.layers.26.mlp.experts.92.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31039 |
+
"model.layers.26.mlp.experts.92.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31040 |
+
"model.layers.26.mlp.experts.92.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31041 |
+
"model.layers.26.mlp.experts.92.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31042 |
+
"model.layers.26.mlp.experts.92.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31043 |
+
"model.layers.26.mlp.experts.92.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31044 |
+
"model.layers.26.mlp.experts.93.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31045 |
+
"model.layers.26.mlp.experts.93.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31046 |
+
"model.layers.26.mlp.experts.93.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31047 |
+
"model.layers.26.mlp.experts.93.down_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31048 |
+
"model.layers.26.mlp.experts.93.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31049 |
+
"model.layers.26.mlp.experts.93.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31050 |
+
"model.layers.26.mlp.experts.93.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31051 |
+
"model.layers.26.mlp.experts.93.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31052 |
+
"model.layers.26.mlp.experts.93.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31053 |
+
"model.layers.26.mlp.experts.93.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31054 |
+
"model.layers.26.mlp.experts.93.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31055 |
+
"model.layers.26.mlp.experts.93.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31056 |
+
"model.layers.26.mlp.experts.94.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31057 |
+
"model.layers.26.mlp.experts.94.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31058 |
+
"model.layers.26.mlp.experts.94.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31059 |
+
"model.layers.26.mlp.experts.94.down_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31060 |
+
"model.layers.26.mlp.experts.94.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31061 |
+
"model.layers.26.mlp.experts.94.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31062 |
+
"model.layers.26.mlp.experts.94.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31063 |
+
"model.layers.26.mlp.experts.94.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31064 |
+
"model.layers.26.mlp.experts.94.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31065 |
+
"model.layers.26.mlp.experts.94.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31066 |
+
"model.layers.26.mlp.experts.94.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31067 |
+
"model.layers.26.mlp.experts.94.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31068 |
"model.layers.26.mlp.experts.95.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 31069 |
"model.layers.26.mlp.experts.95.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31070 |
"model.layers.26.mlp.experts.95.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
|
|
|
| 31125 |
"model.layers.26.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 31126 |
"model.layers.26.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 31127 |
"model.layers.26.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 31128 |
+
"model.layers.26.mlp.gate.weight": "model-00002-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 31129 |
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 31130 |
"model.layers.26.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
| 31131 |
"model.layers.26.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
|
|
|
|
| 32682 |
"model.layers.27.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 32683 |
"model.layers.27.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 32684 |
"model.layers.27.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 32685 |
+
"model.layers.27.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 32686 |
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 32687 |
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 32688 |
"model.layers.27.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 34239 |
"model.layers.28.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 34240 |
"model.layers.28.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 34241 |
"model.layers.28.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 34242 |
+
"model.layers.28.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 34243 |
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 34244 |
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 34245 |
"model.layers.28.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 35796 |
"model.layers.29.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 35797 |
"model.layers.29.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 35798 |
"model.layers.29.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 35799 |
+
"model.layers.29.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 35800 |
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 35801 |
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 35802 |
"model.layers.29.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 37353 |
"model.layers.3.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 37354 |
"model.layers.3.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 37355 |
"model.layers.3.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 37356 |
+
"model.layers.3.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 37357 |
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 37358 |
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 37359 |
"model.layers.3.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 38910 |
"model.layers.30.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 38911 |
"model.layers.30.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 38912 |
"model.layers.30.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 38913 |
+
"model.layers.30.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 38914 |
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 38915 |
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 38916 |
"model.layers.30.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 40467 |
"model.layers.31.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 40468 |
"model.layers.31.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 40469 |
"model.layers.31.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 40470 |
+
"model.layers.31.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 40471 |
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 40472 |
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 40473 |
"model.layers.31.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 42024 |
"model.layers.32.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 42025 |
"model.layers.32.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 42026 |
"model.layers.32.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 42027 |
+
"model.layers.32.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 42028 |
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 42029 |
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 42030 |
"model.layers.32.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 43581 |
"model.layers.33.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 43582 |
"model.layers.33.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 43583 |
"model.layers.33.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 43584 |
+
"model.layers.33.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 43585 |
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 43586 |
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 43587 |
"model.layers.33.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 45138 |
"model.layers.34.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 45139 |
"model.layers.34.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 45140 |
"model.layers.34.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 45141 |
+
"model.layers.34.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 45142 |
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 45143 |
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 45144 |
"model.layers.34.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 46695 |
"model.layers.35.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 46696 |
"model.layers.35.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 46697 |
"model.layers.35.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 46698 |
+
"model.layers.35.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 46699 |
"model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 46700 |
"model.layers.35.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 46701 |
"model.layers.35.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 48252 |
"model.layers.36.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 48253 |
"model.layers.36.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 48254 |
"model.layers.36.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 48255 |
+
"model.layers.36.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 48256 |
"model.layers.36.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 48257 |
"model.layers.36.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 48258 |
"model.layers.36.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 49809 |
"model.layers.37.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 49810 |
"model.layers.37.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 49811 |
"model.layers.37.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 49812 |
+
"model.layers.37.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 49813 |
"model.layers.37.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 49814 |
"model.layers.37.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 49815 |
"model.layers.37.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 51366 |
"model.layers.38.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 51367 |
"model.layers.38.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 51368 |
"model.layers.38.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 51369 |
+
"model.layers.38.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 51370 |
"model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 51371 |
"model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 51372 |
"model.layers.38.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 52923 |
"model.layers.39.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 52924 |
"model.layers.39.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 52925 |
"model.layers.39.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 52926 |
+
"model.layers.39.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 52927 |
"model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 52928 |
"model.layers.39.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 52929 |
"model.layers.39.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 54480 |
"model.layers.4.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 54481 |
"model.layers.4.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 54482 |
"model.layers.4.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 54483 |
+
"model.layers.4.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 54484 |
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 54485 |
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 54486 |
"model.layers.4.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 54500 |
"model.layers.4.self_attn.v_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 54501 |
"model.layers.4.self_attn.v_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 54502 |
"model.layers.4.self_attn.v_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 54503 |
+
"model.layers.40.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 54504 |
"model.layers.40.mlp.experts.0.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 54505 |
"model.layers.40.mlp.experts.0.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 54506 |
"model.layers.40.mlp.experts.0.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
|
|
|
| 54849 |
"model.layers.40.mlp.experts.123.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 54850 |
"model.layers.40.mlp.experts.123.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 54851 |
"model.layers.40.mlp.experts.123.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 54852 |
+
"model.layers.40.mlp.experts.124.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54853 |
+
"model.layers.40.mlp.experts.124.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54854 |
+
"model.layers.40.mlp.experts.124.down_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54855 |
+
"model.layers.40.mlp.experts.124.down_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54856 |
"model.layers.40.mlp.experts.124.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 54857 |
"model.layers.40.mlp.experts.124.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 54858 |
"model.layers.40.mlp.experts.124.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 54859 |
"model.layers.40.mlp.experts.124.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 54860 |
+
"model.layers.40.mlp.experts.124.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54861 |
+
"model.layers.40.mlp.experts.124.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54862 |
+
"model.layers.40.mlp.experts.124.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54863 |
+
"model.layers.40.mlp.experts.124.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54864 |
+
"model.layers.40.mlp.experts.125.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54865 |
+
"model.layers.40.mlp.experts.125.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54866 |
+
"model.layers.40.mlp.experts.125.down_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54867 |
+
"model.layers.40.mlp.experts.125.down_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54868 |
+
"model.layers.40.mlp.experts.125.gate_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54869 |
+
"model.layers.40.mlp.experts.125.gate_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54870 |
+
"model.layers.40.mlp.experts.125.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54871 |
+
"model.layers.40.mlp.experts.125.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54872 |
+
"model.layers.40.mlp.experts.125.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54873 |
+
"model.layers.40.mlp.experts.125.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54874 |
+
"model.layers.40.mlp.experts.125.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54875 |
+
"model.layers.40.mlp.experts.125.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54876 |
+
"model.layers.40.mlp.experts.126.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54877 |
+
"model.layers.40.mlp.experts.126.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54878 |
+
"model.layers.40.mlp.experts.126.down_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54879 |
+
"model.layers.40.mlp.experts.126.down_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54880 |
+
"model.layers.40.mlp.experts.126.gate_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54881 |
+
"model.layers.40.mlp.experts.126.gate_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54882 |
+
"model.layers.40.mlp.experts.126.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54883 |
+
"model.layers.40.mlp.experts.126.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54884 |
+
"model.layers.40.mlp.experts.126.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54885 |
+
"model.layers.40.mlp.experts.126.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54886 |
+
"model.layers.40.mlp.experts.126.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54887 |
+
"model.layers.40.mlp.experts.126.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54888 |
+
"model.layers.40.mlp.experts.127.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54889 |
+
"model.layers.40.mlp.experts.127.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54890 |
+
"model.layers.40.mlp.experts.127.down_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54891 |
+
"model.layers.40.mlp.experts.127.down_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54892 |
+
"model.layers.40.mlp.experts.127.gate_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54893 |
+
"model.layers.40.mlp.experts.127.gate_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54894 |
+
"model.layers.40.mlp.experts.127.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54895 |
+
"model.layers.40.mlp.experts.127.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54896 |
+
"model.layers.40.mlp.experts.127.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 54897 |
+
"model.layers.40.mlp.experts.127.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 54898 |
+
"model.layers.40.mlp.experts.127.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 54899 |
+
"model.layers.40.mlp.experts.127.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 54900 |
"model.layers.40.mlp.experts.13.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 54901 |
"model.layers.40.mlp.experts.13.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 54902 |
"model.layers.40.mlp.experts.13.down_proj.weight_packed": "model-00003-of-00004.safetensors",
|
|
|
|
| 56037 |
"model.layers.40.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
| 56038 |
"model.layers.40.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
|
| 56039 |
"model.layers.40.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
|
| 56040 |
+
"model.layers.40.mlp.gate.weight": "model-00003-of-00004.safetensors",
|
| 56041 |
+
"model.layers.40.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 56042 |
"model.layers.40.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
| 56043 |
"model.layers.40.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
|
| 56044 |
"model.layers.40.self_attn.k_proj.weight_global_scale": "model-00003-of-00004.safetensors",
|
|
|
|
| 57594 |
"model.layers.41.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 57595 |
"model.layers.41.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 57596 |
"model.layers.41.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 57597 |
+
"model.layers.41.mlp.gate.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 57598 |
"model.layers.41.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 57599 |
"model.layers.41.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 57600 |
"model.layers.41.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
|
|
| 57606 |
"model.layers.41.self_attn.o_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 57607 |
"model.layers.41.self_attn.o_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 57608 |
"model.layers.41.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
| 57609 |
+
"model.layers.41.self_attn.q_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 57610 |
+
"model.layers.41.self_attn.q_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 57611 |
+
"model.layers.41.self_attn.q_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 57612 |
+
"model.layers.41.self_attn.q_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 57613 |
"model.layers.41.self_attn.v_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
| 57614 |
"model.layers.41.self_attn.v_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 57615 |
"model.layers.41.self_attn.v_proj.weight_packed": "model-00004-of-00004.safetensors",
|
|
|
|
| 59151 |
"model.layers.42.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 59152 |
"model.layers.42.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 59153 |
"model.layers.42.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 59154 |
+
"model.layers.42.mlp.gate.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 59155 |
"model.layers.42.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 59156 |
"model.layers.42.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 59157 |
"model.layers.42.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
|
|
| 60708 |
"model.layers.43.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 60709 |
"model.layers.43.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 60710 |
"model.layers.43.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 60711 |
+
"model.layers.43.mlp.gate.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 60712 |
"model.layers.43.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 60713 |
"model.layers.43.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 60714 |
"model.layers.43.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
|
|
| 62265 |
"model.layers.44.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 62266 |
"model.layers.44.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 62267 |
"model.layers.44.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 62268 |
+
"model.layers.44.mlp.gate.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 62269 |
"model.layers.44.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 62270 |
"model.layers.44.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 62271 |
"model.layers.44.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
|
|
| 63822 |
"model.layers.45.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 63823 |
"model.layers.45.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 63824 |
"model.layers.45.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 63825 |
+
"model.layers.45.mlp.gate.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 63826 |
"model.layers.45.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 63827 |
"model.layers.45.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 63828 |
"model.layers.45.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
|
|
| 65379 |
"model.layers.46.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 65380 |
"model.layers.46.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 65381 |
"model.layers.46.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 65382 |
+
"model.layers.46.mlp.gate.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 65383 |
"model.layers.46.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 65384 |
"model.layers.46.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 65385 |
"model.layers.46.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
|
|
| 66936 |
"model.layers.47.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
|
| 66937 |
"model.layers.47.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
|
| 66938 |
"model.layers.47.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
|
| 66939 |
+
"model.layers.47.mlp.gate.weight": "model-00004-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 66940 |
"model.layers.47.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
| 66941 |
"model.layers.47.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
| 66942 |
"model.layers.47.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
|
|
|
|
| 68493 |
"model.layers.5.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 68494 |
"model.layers.5.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 68495 |
"model.layers.5.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 68496 |
+
"model.layers.5.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 68497 |
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 68498 |
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 68499 |
"model.layers.5.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 70050 |
"model.layers.6.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 70051 |
"model.layers.6.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 70052 |
"model.layers.6.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 70053 |
+
"model.layers.6.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 70054 |
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 70055 |
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 70056 |
"model.layers.6.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 71607 |
"model.layers.7.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 71608 |
"model.layers.7.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 71609 |
"model.layers.7.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 71610 |
+
"model.layers.7.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 71611 |
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 71612 |
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 71613 |
"model.layers.7.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 73164 |
"model.layers.8.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 73165 |
"model.layers.8.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 73166 |
"model.layers.8.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 73167 |
+
"model.layers.8.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 73168 |
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 73169 |
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 73170 |
"model.layers.8.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
|
|
|
| 74721 |
"model.layers.9.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
|
| 74722 |
"model.layers.9.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
|
| 74723 |
"model.layers.9.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
|
| 74724 |
+
"model.layers.9.mlp.gate.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
| 74725 |
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 74726 |
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
| 74727 |
"model.layers.9.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
|
recipe.yaml
CHANGED
|
@@ -2,5 +2,5 @@ default_stage:
|
|
| 2 |
default_modifiers:
|
| 3 |
QuantizationModifier:
|
| 4 |
targets: [Linear]
|
| 5 |
-
ignore: [lm_head]
|
| 6 |
scheme: NVFP4
|
|
|
|
| 2 |
default_modifiers:
|
| 3 |
QuantizationModifier:
|
| 4 |
targets: [Linear]
|
| 5 |
+
ignore: [lm_head, 're:.*mlp.gate$']
|
| 6 |
scheme: NVFP4
|