dsikka commited on
Commit
a999d5e
·
verified ·
1 Parent(s): f5f7d39

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -60,6 +60,54 @@
60
  "format": "nvfp4-pack-quantized",
61
  "global_compression_ratio": null,
62
  "ignore": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  "lm_head"
64
  ],
65
  "kv_cache_scheme": null,
 
60
  "format": "nvfp4-pack-quantized",
61
  "global_compression_ratio": null,
62
  "ignore": [
63
+ "model.layers.0.mlp.gate",
64
+ "model.layers.1.mlp.gate",
65
+ "model.layers.2.mlp.gate",
66
+ "model.layers.3.mlp.gate",
67
+ "model.layers.4.mlp.gate",
68
+ "model.layers.5.mlp.gate",
69
+ "model.layers.6.mlp.gate",
70
+ "model.layers.7.mlp.gate",
71
+ "model.layers.8.mlp.gate",
72
+ "model.layers.9.mlp.gate",
73
+ "model.layers.10.mlp.gate",
74
+ "model.layers.11.mlp.gate",
75
+ "model.layers.12.mlp.gate",
76
+ "model.layers.13.mlp.gate",
77
+ "model.layers.14.mlp.gate",
78
+ "model.layers.15.mlp.gate",
79
+ "model.layers.16.mlp.gate",
80
+ "model.layers.17.mlp.gate",
81
+ "model.layers.18.mlp.gate",
82
+ "model.layers.19.mlp.gate",
83
+ "model.layers.20.mlp.gate",
84
+ "model.layers.21.mlp.gate",
85
+ "model.layers.22.mlp.gate",
86
+ "model.layers.23.mlp.gate",
87
+ "model.layers.24.mlp.gate",
88
+ "model.layers.25.mlp.gate",
89
+ "model.layers.26.mlp.gate",
90
+ "model.layers.27.mlp.gate",
91
+ "model.layers.28.mlp.gate",
92
+ "model.layers.29.mlp.gate",
93
+ "model.layers.30.mlp.gate",
94
+ "model.layers.31.mlp.gate",
95
+ "model.layers.32.mlp.gate",
96
+ "model.layers.33.mlp.gate",
97
+ "model.layers.34.mlp.gate",
98
+ "model.layers.35.mlp.gate",
99
+ "model.layers.36.mlp.gate",
100
+ "model.layers.37.mlp.gate",
101
+ "model.layers.38.mlp.gate",
102
+ "model.layers.39.mlp.gate",
103
+ "model.layers.40.mlp.gate",
104
+ "model.layers.41.mlp.gate",
105
+ "model.layers.42.mlp.gate",
106
+ "model.layers.43.mlp.gate",
107
+ "model.layers.44.mlp.gate",
108
+ "model.layers.45.mlp.gate",
109
+ "model.layers.46.mlp.gate",
110
+ "model.layers.47.mlp.gate",
111
  "lm_head"
112
  ],
113
  "kv_cache_scheme": null,
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:261387433864dde38de406aef129574caef5247125fb164e085b5eb24ff0982d
3
- size 5001811128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc625c6fc82c8d84d196d3c9ec6c6813594ccb8321e1a1f29d6d2827c99a498
3
+ size 5002279496
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e94ee2fc7fe3c27f90cca5eab2987ab50a6b6ca628900401c39a0dd4ef831525
3
- size 5001977392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5512103702dd54a3c410f6a367e08cef8e1107faf3276bfb584f3195befad0fc
3
+ size 5002724080
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a38450094e8370f3a2d329b4e8a3414f15c49b42263bf9d92efa8d31f02a8864
3
- size 5002279016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5699f8c32a1254f67071924c40a7c2f26375001592c17d83f52fc4bcdc90a2d1
3
+ size 5002036032
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9214cd1a5aa7c8cfcb65bc658be9073df94c7d81f5a39af8bc0cb7ad5b206e6
3
- size 3072570048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea1e3dbae26e04a2773b68450059c3bf3da633a00747be37c4f9b3a95cb3acb
3
+ size 3089670712
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 18069370752
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
@@ -1542,10 +1542,7 @@
1542
  "model.layers.0.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
1543
  "model.layers.0.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
1544
  "model.layers.0.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
1545
- "model.layers.0.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
1546
- "model.layers.0.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
1547
- "model.layers.0.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
1548
- "model.layers.0.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
1549
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
1550
  "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
1551
  "model.layers.0.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -3102,10 +3099,7 @@
3102
  "model.layers.1.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
3103
  "model.layers.1.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
3104
  "model.layers.1.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
3105
- "model.layers.1.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
3106
- "model.layers.1.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
3107
- "model.layers.1.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
3108
- "model.layers.1.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
3109
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
3110
  "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
3111
  "model.layers.1.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -4662,10 +4656,7 @@
4662
  "model.layers.10.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
4663
  "model.layers.10.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
4664
  "model.layers.10.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
4665
- "model.layers.10.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
4666
- "model.layers.10.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
4667
- "model.layers.10.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
4668
- "model.layers.10.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
4669
  "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
4670
  "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
4671
  "model.layers.10.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -6222,10 +6213,7 @@
6222
  "model.layers.11.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
6223
  "model.layers.11.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
6224
  "model.layers.11.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
6225
- "model.layers.11.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
6226
- "model.layers.11.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
6227
- "model.layers.11.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
6228
- "model.layers.11.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
6229
  "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
6230
  "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
6231
  "model.layers.11.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -7230,10 +7218,10 @@
7230
  "model.layers.12.mlp.experts.57.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7231
  "model.layers.12.mlp.experts.57.up_proj.weight_packed": "model-00001-of-00004.safetensors",
7232
  "model.layers.12.mlp.experts.57.up_proj.weight_scale": "model-00001-of-00004.safetensors",
7233
- "model.layers.12.mlp.experts.58.down_proj.input_global_scale": "model-00001-of-00004.safetensors",
7234
- "model.layers.12.mlp.experts.58.down_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7235
- "model.layers.12.mlp.experts.58.down_proj.weight_packed": "model-00001-of-00004.safetensors",
7236
- "model.layers.12.mlp.experts.58.down_proj.weight_scale": "model-00001-of-00004.safetensors",
7237
  "model.layers.12.mlp.experts.58.gate_proj.input_global_scale": "model-00001-of-00004.safetensors",
7238
  "model.layers.12.mlp.experts.58.gate_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7239
  "model.layers.12.mlp.experts.58.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -7242,18 +7230,18 @@
7242
  "model.layers.12.mlp.experts.58.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7243
  "model.layers.12.mlp.experts.58.up_proj.weight_packed": "model-00001-of-00004.safetensors",
7244
  "model.layers.12.mlp.experts.58.up_proj.weight_scale": "model-00001-of-00004.safetensors",
7245
- "model.layers.12.mlp.experts.59.down_proj.input_global_scale": "model-00001-of-00004.safetensors",
7246
- "model.layers.12.mlp.experts.59.down_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7247
- "model.layers.12.mlp.experts.59.down_proj.weight_packed": "model-00001-of-00004.safetensors",
7248
- "model.layers.12.mlp.experts.59.down_proj.weight_scale": "model-00001-of-00004.safetensors",
7249
- "model.layers.12.mlp.experts.59.gate_proj.input_global_scale": "model-00001-of-00004.safetensors",
7250
- "model.layers.12.mlp.experts.59.gate_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7251
- "model.layers.12.mlp.experts.59.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
7252
- "model.layers.12.mlp.experts.59.gate_proj.weight_scale": "model-00001-of-00004.safetensors",
7253
- "model.layers.12.mlp.experts.59.up_proj.input_global_scale": "model-00001-of-00004.safetensors",
7254
- "model.layers.12.mlp.experts.59.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7255
- "model.layers.12.mlp.experts.59.up_proj.weight_packed": "model-00001-of-00004.safetensors",
7256
- "model.layers.12.mlp.experts.59.up_proj.weight_scale": "model-00001-of-00004.safetensors",
7257
  "model.layers.12.mlp.experts.6.down_proj.input_global_scale": "model-00001-of-00004.safetensors",
7258
  "model.layers.12.mlp.experts.6.down_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7259
  "model.layers.12.mlp.experts.6.down_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -7270,10 +7258,10 @@
7270
  "model.layers.12.mlp.experts.60.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7271
  "model.layers.12.mlp.experts.60.down_proj.weight_packed": "model-00002-of-00004.safetensors",
7272
  "model.layers.12.mlp.experts.60.down_proj.weight_scale": "model-00002-of-00004.safetensors",
7273
- "model.layers.12.mlp.experts.60.gate_proj.input_global_scale": "model-00001-of-00004.safetensors",
7274
- "model.layers.12.mlp.experts.60.gate_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7275
- "model.layers.12.mlp.experts.60.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
7276
- "model.layers.12.mlp.experts.60.gate_proj.weight_scale": "model-00001-of-00004.safetensors",
7277
  "model.layers.12.mlp.experts.60.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
7278
  "model.layers.12.mlp.experts.60.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7279
  "model.layers.12.mlp.experts.60.up_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -7782,10 +7770,7 @@
7782
  "model.layers.12.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7783
  "model.layers.12.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
7784
  "model.layers.12.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
7785
- "model.layers.12.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
7786
- "model.layers.12.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
7787
- "model.layers.12.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
7788
- "model.layers.12.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
7789
  "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
7790
  "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
7791
  "model.layers.12.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -9342,10 +9327,7 @@
9342
  "model.layers.13.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
9343
  "model.layers.13.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
9344
  "model.layers.13.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
9345
- "model.layers.13.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
9346
- "model.layers.13.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
9347
- "model.layers.13.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
9348
- "model.layers.13.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
9349
  "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
9350
  "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
9351
  "model.layers.13.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -10902,10 +10884,7 @@
10902
  "model.layers.14.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
10903
  "model.layers.14.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
10904
  "model.layers.14.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
10905
- "model.layers.14.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
10906
- "model.layers.14.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
10907
- "model.layers.14.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
10908
- "model.layers.14.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
10909
  "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
10910
  "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
10911
  "model.layers.14.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -12462,10 +12441,7 @@
12462
  "model.layers.15.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
12463
  "model.layers.15.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
12464
  "model.layers.15.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
12465
- "model.layers.15.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
12466
- "model.layers.15.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
12467
- "model.layers.15.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
12468
- "model.layers.15.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
12469
  "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
12470
  "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
12471
  "model.layers.15.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -14022,10 +13998,7 @@
14022
  "model.layers.16.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
14023
  "model.layers.16.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
14024
  "model.layers.16.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
14025
- "model.layers.16.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
14026
- "model.layers.16.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
14027
- "model.layers.16.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
14028
- "model.layers.16.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
14029
  "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
14030
  "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
14031
  "model.layers.16.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -15582,10 +15555,7 @@
15582
  "model.layers.17.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
15583
  "model.layers.17.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
15584
  "model.layers.17.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
15585
- "model.layers.17.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
15586
- "model.layers.17.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
15587
- "model.layers.17.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
15588
- "model.layers.17.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
15589
  "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
15590
  "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
15591
  "model.layers.17.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -17142,10 +17112,7 @@
17142
  "model.layers.18.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
17143
  "model.layers.18.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
17144
  "model.layers.18.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
17145
- "model.layers.18.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
17146
- "model.layers.18.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
17147
- "model.layers.18.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
17148
- "model.layers.18.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
17149
  "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
17150
  "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
17151
  "model.layers.18.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -18702,10 +18669,7 @@
18702
  "model.layers.19.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
18703
  "model.layers.19.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
18704
  "model.layers.19.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
18705
- "model.layers.19.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
18706
- "model.layers.19.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
18707
- "model.layers.19.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
18708
- "model.layers.19.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
18709
  "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
18710
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
18711
  "model.layers.19.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -20262,10 +20226,7 @@
20262
  "model.layers.2.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
20263
  "model.layers.2.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
20264
  "model.layers.2.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
20265
- "model.layers.2.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
20266
- "model.layers.2.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
20267
- "model.layers.2.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
20268
- "model.layers.2.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
20269
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
20270
  "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
20271
  "model.layers.2.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -21822,10 +21783,7 @@
21822
  "model.layers.20.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
21823
  "model.layers.20.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
21824
  "model.layers.20.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
21825
- "model.layers.20.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
21826
- "model.layers.20.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
21827
- "model.layers.20.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
21828
- "model.layers.20.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
21829
  "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
21830
  "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
21831
  "model.layers.20.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -23382,10 +23340,7 @@
23382
  "model.layers.21.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
23383
  "model.layers.21.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
23384
  "model.layers.21.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
23385
- "model.layers.21.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
23386
- "model.layers.21.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
23387
- "model.layers.21.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
23388
- "model.layers.21.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
23389
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
23390
  "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
23391
  "model.layers.21.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -24942,10 +24897,7 @@
24942
  "model.layers.22.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
24943
  "model.layers.22.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
24944
  "model.layers.22.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
24945
- "model.layers.22.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
24946
- "model.layers.22.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
24947
- "model.layers.22.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
24948
- "model.layers.22.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
24949
  "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
24950
  "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
24951
  "model.layers.22.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -26502,10 +26454,7 @@
26502
  "model.layers.23.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
26503
  "model.layers.23.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26504
  "model.layers.23.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26505
- "model.layers.23.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
26506
- "model.layers.23.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
26507
- "model.layers.23.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
26508
- "model.layers.23.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
26509
  "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
26510
  "model.layers.23.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
26511
  "model.layers.23.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -28062,10 +28011,7 @@
28062
  "model.layers.24.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
28063
  "model.layers.24.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
28064
  "model.layers.24.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
28065
- "model.layers.24.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
28066
- "model.layers.24.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
28067
- "model.layers.24.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
28068
- "model.layers.24.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
28069
  "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
28070
  "model.layers.24.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
28071
  "model.layers.24.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -29622,10 +29568,7 @@
29622
  "model.layers.25.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
29623
  "model.layers.25.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
29624
  "model.layers.25.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
29625
- "model.layers.25.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
29626
- "model.layers.25.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
29627
- "model.layers.25.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
29628
- "model.layers.25.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
29629
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
29630
  "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
29631
  "model.layers.25.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -31074,10 +31017,10 @@
31074
  "model.layers.26.mlp.experts.90.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31075
  "model.layers.26.mlp.experts.90.up_proj.weight_packed": "model-00002-of-00004.safetensors",
31076
  "model.layers.26.mlp.experts.90.up_proj.weight_scale": "model-00002-of-00004.safetensors",
31077
- "model.layers.26.mlp.experts.91.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
31078
- "model.layers.26.mlp.experts.91.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31079
- "model.layers.26.mlp.experts.91.down_proj.weight_packed": "model-00002-of-00004.safetensors",
31080
- "model.layers.26.mlp.experts.91.down_proj.weight_scale": "model-00002-of-00004.safetensors",
31081
  "model.layers.26.mlp.experts.91.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
31082
  "model.layers.26.mlp.experts.91.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31083
  "model.layers.26.mlp.experts.91.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -31085,43 +31028,43 @@
31085
  "model.layers.26.mlp.experts.91.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
31086
  "model.layers.26.mlp.experts.91.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31087
  "model.layers.26.mlp.experts.91.up_proj.weight_packed": "model-00002-of-00004.safetensors",
31088
- "model.layers.26.mlp.experts.91.up_proj.weight_scale": "model-00002-of-00004.safetensors",
31089
- "model.layers.26.mlp.experts.92.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
31090
- "model.layers.26.mlp.experts.92.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31091
- "model.layers.26.mlp.experts.92.down_proj.weight_packed": "model-00002-of-00004.safetensors",
31092
- "model.layers.26.mlp.experts.92.down_proj.weight_scale": "model-00002-of-00004.safetensors",
31093
- "model.layers.26.mlp.experts.92.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
31094
- "model.layers.26.mlp.experts.92.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31095
- "model.layers.26.mlp.experts.92.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
31096
- "model.layers.26.mlp.experts.92.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
31097
- "model.layers.26.mlp.experts.92.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
31098
- "model.layers.26.mlp.experts.92.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31099
- "model.layers.26.mlp.experts.92.up_proj.weight_packed": "model-00002-of-00004.safetensors",
31100
- "model.layers.26.mlp.experts.92.up_proj.weight_scale": "model-00002-of-00004.safetensors",
31101
- "model.layers.26.mlp.experts.93.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
31102
- "model.layers.26.mlp.experts.93.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31103
- "model.layers.26.mlp.experts.93.down_proj.weight_packed": "model-00002-of-00004.safetensors",
31104
- "model.layers.26.mlp.experts.93.down_proj.weight_scale": "model-00002-of-00004.safetensors",
31105
- "model.layers.26.mlp.experts.93.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
31106
- "model.layers.26.mlp.experts.93.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31107
- "model.layers.26.mlp.experts.93.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
31108
- "model.layers.26.mlp.experts.93.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
31109
- "model.layers.26.mlp.experts.93.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
31110
- "model.layers.26.mlp.experts.93.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31111
- "model.layers.26.mlp.experts.93.up_proj.weight_packed": "model-00002-of-00004.safetensors",
31112
- "model.layers.26.mlp.experts.93.up_proj.weight_scale": "model-00002-of-00004.safetensors",
31113
- "model.layers.26.mlp.experts.94.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
31114
- "model.layers.26.mlp.experts.94.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31115
- "model.layers.26.mlp.experts.94.down_proj.weight_packed": "model-00002-of-00004.safetensors",
31116
- "model.layers.26.mlp.experts.94.down_proj.weight_scale": "model-00002-of-00004.safetensors",
31117
- "model.layers.26.mlp.experts.94.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
31118
- "model.layers.26.mlp.experts.94.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31119
- "model.layers.26.mlp.experts.94.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
31120
- "model.layers.26.mlp.experts.94.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
31121
- "model.layers.26.mlp.experts.94.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
31122
- "model.layers.26.mlp.experts.94.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31123
- "model.layers.26.mlp.experts.94.up_proj.weight_packed": "model-00002-of-00004.safetensors",
31124
- "model.layers.26.mlp.experts.94.up_proj.weight_scale": "model-00002-of-00004.safetensors",
31125
  "model.layers.26.mlp.experts.95.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
31126
  "model.layers.26.mlp.experts.95.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31127
  "model.layers.26.mlp.experts.95.down_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -31182,10 +31125,7 @@
31182
  "model.layers.26.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31183
  "model.layers.26.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
31184
  "model.layers.26.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31185
- "model.layers.26.mlp.gate.input_global_scale": "model-00002-of-00004.safetensors",
31186
- "model.layers.26.mlp.gate.weight_global_scale": "model-00002-of-00004.safetensors",
31187
- "model.layers.26.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
31188
- "model.layers.26.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
31189
  "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
31190
  "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
31191
  "model.layers.26.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
@@ -32742,10 +32682,7 @@
32742
  "model.layers.27.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
32743
  "model.layers.27.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
32744
  "model.layers.27.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
32745
- "model.layers.27.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
32746
- "model.layers.27.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
32747
- "model.layers.27.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
32748
- "model.layers.27.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
32749
  "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
32750
  "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
32751
  "model.layers.27.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -34302,10 +34239,7 @@
34302
  "model.layers.28.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
34303
  "model.layers.28.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
34304
  "model.layers.28.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
34305
- "model.layers.28.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
34306
- "model.layers.28.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
34307
- "model.layers.28.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
34308
- "model.layers.28.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
34309
  "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
34310
  "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
34311
  "model.layers.28.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -35862,10 +35796,7 @@
35862
  "model.layers.29.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
35863
  "model.layers.29.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
35864
  "model.layers.29.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
35865
- "model.layers.29.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
35866
- "model.layers.29.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
35867
- "model.layers.29.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
35868
- "model.layers.29.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
35869
  "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
35870
  "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
35871
  "model.layers.29.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -37422,10 +37353,7 @@
37422
  "model.layers.3.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
37423
  "model.layers.3.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
37424
  "model.layers.3.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
37425
- "model.layers.3.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
37426
- "model.layers.3.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
37427
- "model.layers.3.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
37428
- "model.layers.3.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
37429
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
37430
  "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
37431
  "model.layers.3.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -38982,10 +38910,7 @@
38982
  "model.layers.30.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
38983
  "model.layers.30.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
38984
  "model.layers.30.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
38985
- "model.layers.30.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
38986
- "model.layers.30.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
38987
- "model.layers.30.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
38988
- "model.layers.30.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
38989
  "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
38990
  "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
38991
  "model.layers.30.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -40542,10 +40467,7 @@
40542
  "model.layers.31.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
40543
  "model.layers.31.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
40544
  "model.layers.31.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
40545
- "model.layers.31.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
40546
- "model.layers.31.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
40547
- "model.layers.31.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
40548
- "model.layers.31.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
40549
  "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
40550
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
40551
  "model.layers.31.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -42102,10 +42024,7 @@
42102
  "model.layers.32.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
42103
  "model.layers.32.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
42104
  "model.layers.32.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
42105
- "model.layers.32.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
42106
- "model.layers.32.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
42107
- "model.layers.32.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
42108
- "model.layers.32.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
42109
  "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
42110
  "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
42111
  "model.layers.32.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -43662,10 +43581,7 @@
43662
  "model.layers.33.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
43663
  "model.layers.33.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
43664
  "model.layers.33.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
43665
- "model.layers.33.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
43666
- "model.layers.33.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
43667
- "model.layers.33.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
43668
- "model.layers.33.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
43669
  "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
43670
  "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
43671
  "model.layers.33.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -45222,10 +45138,7 @@
45222
  "model.layers.34.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
45223
  "model.layers.34.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
45224
  "model.layers.34.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
45225
- "model.layers.34.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
45226
- "model.layers.34.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
45227
- "model.layers.34.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
45228
- "model.layers.34.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
45229
  "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
45230
  "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
45231
  "model.layers.34.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -46782,10 +46695,7 @@
46782
  "model.layers.35.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
46783
  "model.layers.35.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46784
  "model.layers.35.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46785
- "model.layers.35.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
46786
- "model.layers.35.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
46787
- "model.layers.35.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
46788
- "model.layers.35.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
46789
  "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
46790
  "model.layers.35.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
46791
  "model.layers.35.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -48342,10 +48252,7 @@
48342
  "model.layers.36.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
48343
  "model.layers.36.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
48344
  "model.layers.36.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
48345
- "model.layers.36.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
48346
- "model.layers.36.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
48347
- "model.layers.36.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
48348
- "model.layers.36.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
48349
  "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
48350
  "model.layers.36.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
48351
  "model.layers.36.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -49902,10 +49809,7 @@
49902
  "model.layers.37.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
49903
  "model.layers.37.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
49904
  "model.layers.37.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
49905
- "model.layers.37.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
49906
- "model.layers.37.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
49907
- "model.layers.37.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
49908
- "model.layers.37.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
49909
  "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
49910
  "model.layers.37.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
49911
  "model.layers.37.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -51462,10 +51366,7 @@
51462
  "model.layers.38.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
51463
  "model.layers.38.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
51464
  "model.layers.38.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
51465
- "model.layers.38.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
51466
- "model.layers.38.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
51467
- "model.layers.38.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
51468
- "model.layers.38.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
51469
  "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
51470
  "model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
51471
  "model.layers.38.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -53022,10 +52923,7 @@
53022
  "model.layers.39.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
53023
  "model.layers.39.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
53024
  "model.layers.39.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
53025
- "model.layers.39.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
53026
- "model.layers.39.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
53027
- "model.layers.39.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
53028
- "model.layers.39.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
53029
  "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
53030
  "model.layers.39.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
53031
  "model.layers.39.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
@@ -54582,10 +54480,7 @@
54582
  "model.layers.4.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
54583
  "model.layers.4.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
54584
  "model.layers.4.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
54585
- "model.layers.4.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
54586
- "model.layers.4.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
54587
- "model.layers.4.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
54588
- "model.layers.4.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
54589
  "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
54590
  "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
54591
  "model.layers.4.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -54605,7 +54500,7 @@
54605
  "model.layers.4.self_attn.v_proj.weight_global_scale": "model-00001-of-00004.safetensors",
54606
  "model.layers.4.self_attn.v_proj.weight_packed": "model-00001-of-00004.safetensors",
54607
  "model.layers.4.self_attn.v_proj.weight_scale": "model-00001-of-00004.safetensors",
54608
- "model.layers.40.input_layernorm.weight": "model-00003-of-00004.safetensors",
54609
  "model.layers.40.mlp.experts.0.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
54610
  "model.layers.40.mlp.experts.0.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54611
  "model.layers.40.mlp.experts.0.down_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -54954,54 +54849,54 @@
54954
  "model.layers.40.mlp.experts.123.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54955
  "model.layers.40.mlp.experts.123.up_proj.weight_packed": "model-00003-of-00004.safetensors",
54956
  "model.layers.40.mlp.experts.123.up_proj.weight_scale": "model-00003-of-00004.safetensors",
54957
- "model.layers.40.mlp.experts.124.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
54958
- "model.layers.40.mlp.experts.124.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54959
- "model.layers.40.mlp.experts.124.down_proj.weight_packed": "model-00003-of-00004.safetensors",
54960
- "model.layers.40.mlp.experts.124.down_proj.weight_scale": "model-00003-of-00004.safetensors",
54961
  "model.layers.40.mlp.experts.124.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
54962
  "model.layers.40.mlp.experts.124.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54963
  "model.layers.40.mlp.experts.124.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
54964
  "model.layers.40.mlp.experts.124.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
54965
- "model.layers.40.mlp.experts.124.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
54966
- "model.layers.40.mlp.experts.124.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54967
- "model.layers.40.mlp.experts.124.up_proj.weight_packed": "model-00003-of-00004.safetensors",
54968
- "model.layers.40.mlp.experts.124.up_proj.weight_scale": "model-00003-of-00004.safetensors",
54969
- "model.layers.40.mlp.experts.125.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
54970
- "model.layers.40.mlp.experts.125.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54971
- "model.layers.40.mlp.experts.125.down_proj.weight_packed": "model-00003-of-00004.safetensors",
54972
- "model.layers.40.mlp.experts.125.down_proj.weight_scale": "model-00003-of-00004.safetensors",
54973
- "model.layers.40.mlp.experts.125.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
54974
- "model.layers.40.mlp.experts.125.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54975
- "model.layers.40.mlp.experts.125.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
54976
- "model.layers.40.mlp.experts.125.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
54977
- "model.layers.40.mlp.experts.125.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
54978
- "model.layers.40.mlp.experts.125.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54979
- "model.layers.40.mlp.experts.125.up_proj.weight_packed": "model-00003-of-00004.safetensors",
54980
- "model.layers.40.mlp.experts.125.up_proj.weight_scale": "model-00003-of-00004.safetensors",
54981
- "model.layers.40.mlp.experts.126.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
54982
- "model.layers.40.mlp.experts.126.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54983
- "model.layers.40.mlp.experts.126.down_proj.weight_packed": "model-00003-of-00004.safetensors",
54984
- "model.layers.40.mlp.experts.126.down_proj.weight_scale": "model-00003-of-00004.safetensors",
54985
- "model.layers.40.mlp.experts.126.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
54986
- "model.layers.40.mlp.experts.126.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54987
- "model.layers.40.mlp.experts.126.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
54988
- "model.layers.40.mlp.experts.126.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
54989
- "model.layers.40.mlp.experts.126.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
54990
- "model.layers.40.mlp.experts.126.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54991
- "model.layers.40.mlp.experts.126.up_proj.weight_packed": "model-00003-of-00004.safetensors",
54992
- "model.layers.40.mlp.experts.126.up_proj.weight_scale": "model-00003-of-00004.safetensors",
54993
- "model.layers.40.mlp.experts.127.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
54994
- "model.layers.40.mlp.experts.127.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54995
- "model.layers.40.mlp.experts.127.down_proj.weight_packed": "model-00003-of-00004.safetensors",
54996
- "model.layers.40.mlp.experts.127.down_proj.weight_scale": "model-00003-of-00004.safetensors",
54997
- "model.layers.40.mlp.experts.127.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
54998
- "model.layers.40.mlp.experts.127.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54999
- "model.layers.40.mlp.experts.127.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
55000
- "model.layers.40.mlp.experts.127.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
55001
- "model.layers.40.mlp.experts.127.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
55002
- "model.layers.40.mlp.experts.127.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
55003
- "model.layers.40.mlp.experts.127.up_proj.weight_packed": "model-00003-of-00004.safetensors",
55004
- "model.layers.40.mlp.experts.127.up_proj.weight_scale": "model-00003-of-00004.safetensors",
55005
  "model.layers.40.mlp.experts.13.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
55006
  "model.layers.40.mlp.experts.13.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
55007
  "model.layers.40.mlp.experts.13.down_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -56142,11 +56037,8 @@
56142
  "model.layers.40.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
56143
  "model.layers.40.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
56144
  "model.layers.40.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
56145
- "model.layers.40.mlp.gate.input_global_scale": "model-00003-of-00004.safetensors",
56146
- "model.layers.40.mlp.gate.weight_global_scale": "model-00003-of-00004.safetensors",
56147
- "model.layers.40.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
56148
- "model.layers.40.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
56149
- "model.layers.40.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
56150
  "model.layers.40.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
56151
  "model.layers.40.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
56152
  "model.layers.40.self_attn.k_proj.weight_global_scale": "model-00003-of-00004.safetensors",
@@ -57702,10 +57594,7 @@
57702
  "model.layers.41.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
57703
  "model.layers.41.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
57704
  "model.layers.41.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
57705
- "model.layers.41.mlp.gate.input_global_scale": "model-00004-of-00004.safetensors",
57706
- "model.layers.41.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
57707
- "model.layers.41.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
57708
- "model.layers.41.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
57709
  "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
57710
  "model.layers.41.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
57711
  "model.layers.41.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
@@ -57717,10 +57606,10 @@
57717
  "model.layers.41.self_attn.o_proj.weight_packed": "model-00004-of-00004.safetensors",
57718
  "model.layers.41.self_attn.o_proj.weight_scale": "model-00004-of-00004.safetensors",
57719
  "model.layers.41.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
57720
- "model.layers.41.self_attn.q_proj.input_global_scale": "model-00003-of-00004.safetensors",
57721
- "model.layers.41.self_attn.q_proj.weight_global_scale": "model-00003-of-00004.safetensors",
57722
- "model.layers.41.self_attn.q_proj.weight_packed": "model-00003-of-00004.safetensors",
57723
- "model.layers.41.self_attn.q_proj.weight_scale": "model-00003-of-00004.safetensors",
57724
  "model.layers.41.self_attn.v_proj.input_global_scale": "model-00004-of-00004.safetensors",
57725
  "model.layers.41.self_attn.v_proj.weight_global_scale": "model-00004-of-00004.safetensors",
57726
  "model.layers.41.self_attn.v_proj.weight_packed": "model-00004-of-00004.safetensors",
@@ -59262,10 +59151,7 @@
59262
  "model.layers.42.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
59263
  "model.layers.42.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
59264
  "model.layers.42.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
59265
- "model.layers.42.mlp.gate.input_global_scale": "model-00004-of-00004.safetensors",
59266
- "model.layers.42.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
59267
- "model.layers.42.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
59268
- "model.layers.42.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
59269
  "model.layers.42.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
59270
  "model.layers.42.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
59271
  "model.layers.42.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
@@ -60822,10 +60708,7 @@
60822
  "model.layers.43.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
60823
  "model.layers.43.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
60824
  "model.layers.43.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
60825
- "model.layers.43.mlp.gate.input_global_scale": "model-00004-of-00004.safetensors",
60826
- "model.layers.43.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
60827
- "model.layers.43.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
60828
- "model.layers.43.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
60829
  "model.layers.43.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
60830
  "model.layers.43.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
60831
  "model.layers.43.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
@@ -62382,10 +62265,7 @@
62382
  "model.layers.44.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
62383
  "model.layers.44.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
62384
  "model.layers.44.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
62385
- "model.layers.44.mlp.gate.input_global_scale": "model-00004-of-00004.safetensors",
62386
- "model.layers.44.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
62387
- "model.layers.44.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
62388
- "model.layers.44.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
62389
  "model.layers.44.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
62390
  "model.layers.44.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
62391
  "model.layers.44.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
@@ -63942,10 +63822,7 @@
63942
  "model.layers.45.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
63943
  "model.layers.45.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
63944
  "model.layers.45.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
63945
- "model.layers.45.mlp.gate.input_global_scale": "model-00004-of-00004.safetensors",
63946
- "model.layers.45.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
63947
- "model.layers.45.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
63948
- "model.layers.45.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
63949
  "model.layers.45.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
63950
  "model.layers.45.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
63951
  "model.layers.45.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
@@ -65502,10 +65379,7 @@
65502
  "model.layers.46.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
65503
  "model.layers.46.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
65504
  "model.layers.46.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
65505
- "model.layers.46.mlp.gate.input_global_scale": "model-00004-of-00004.safetensors",
65506
- "model.layers.46.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
65507
- "model.layers.46.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
65508
- "model.layers.46.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
65509
  "model.layers.46.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
65510
  "model.layers.46.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
65511
  "model.layers.46.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
@@ -67062,10 +66936,7 @@
67062
  "model.layers.47.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
67063
  "model.layers.47.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
67064
  "model.layers.47.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
67065
- "model.layers.47.mlp.gate.input_global_scale": "model-00004-of-00004.safetensors",
67066
- "model.layers.47.mlp.gate.weight_global_scale": "model-00004-of-00004.safetensors",
67067
- "model.layers.47.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
67068
- "model.layers.47.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
67069
  "model.layers.47.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
67070
  "model.layers.47.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
67071
  "model.layers.47.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
@@ -68622,10 +68493,7 @@
68622
  "model.layers.5.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
68623
  "model.layers.5.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
68624
  "model.layers.5.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
68625
- "model.layers.5.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
68626
- "model.layers.5.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
68627
- "model.layers.5.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
68628
- "model.layers.5.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
68629
  "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
68630
  "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
68631
  "model.layers.5.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -70182,10 +70050,7 @@
70182
  "model.layers.6.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
70183
  "model.layers.6.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
70184
  "model.layers.6.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
70185
- "model.layers.6.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
70186
- "model.layers.6.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
70187
- "model.layers.6.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
70188
- "model.layers.6.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
70189
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
70190
  "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
70191
  "model.layers.6.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -71742,10 +71607,7 @@
71742
  "model.layers.7.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
71743
  "model.layers.7.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
71744
  "model.layers.7.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
71745
- "model.layers.7.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
71746
- "model.layers.7.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
71747
- "model.layers.7.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
71748
- "model.layers.7.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
71749
  "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
71750
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
71751
  "model.layers.7.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -73302,10 +73164,7 @@
73302
  "model.layers.8.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
73303
  "model.layers.8.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
73304
  "model.layers.8.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
73305
- "model.layers.8.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
73306
- "model.layers.8.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
73307
- "model.layers.8.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
73308
- "model.layers.8.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
73309
  "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
73310
  "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
73311
  "model.layers.8.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
@@ -74862,10 +74721,7 @@
74862
  "model.layers.9.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
74863
  "model.layers.9.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
74864
  "model.layers.9.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
74865
- "model.layers.9.mlp.gate.input_global_scale": "model-00001-of-00004.safetensors",
74866
- "model.layers.9.mlp.gate.weight_global_scale": "model-00001-of-00004.safetensors",
74867
- "model.layers.9.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
74868
- "model.layers.9.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
74869
  "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
74870
  "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
74871
  "model.layers.9.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 18087458304
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1542
  "model.layers.0.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
1543
  "model.layers.0.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
1544
  "model.layers.0.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
1545
+ "model.layers.0.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
1546
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
1547
  "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
1548
  "model.layers.0.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
3099
  "model.layers.1.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
3100
  "model.layers.1.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
3101
  "model.layers.1.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
3102
+ "model.layers.1.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
3103
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
3104
  "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
3105
  "model.layers.1.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
4656
  "model.layers.10.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
4657
  "model.layers.10.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
4658
  "model.layers.10.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
4659
+ "model.layers.10.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
4660
  "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
4661
  "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
4662
  "model.layers.10.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
6213
  "model.layers.11.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
6214
  "model.layers.11.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
6215
  "model.layers.11.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
6216
+ "model.layers.11.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
6217
  "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
6218
  "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
6219
  "model.layers.11.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
7218
  "model.layers.12.mlp.experts.57.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7219
  "model.layers.12.mlp.experts.57.up_proj.weight_packed": "model-00001-of-00004.safetensors",
7220
  "model.layers.12.mlp.experts.57.up_proj.weight_scale": "model-00001-of-00004.safetensors",
7221
+ "model.layers.12.mlp.experts.58.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
7222
+ "model.layers.12.mlp.experts.58.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7223
+ "model.layers.12.mlp.experts.58.down_proj.weight_packed": "model-00002-of-00004.safetensors",
7224
+ "model.layers.12.mlp.experts.58.down_proj.weight_scale": "model-00002-of-00004.safetensors",
7225
  "model.layers.12.mlp.experts.58.gate_proj.input_global_scale": "model-00001-of-00004.safetensors",
7226
  "model.layers.12.mlp.experts.58.gate_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7227
  "model.layers.12.mlp.experts.58.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
 
7230
  "model.layers.12.mlp.experts.58.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7231
  "model.layers.12.mlp.experts.58.up_proj.weight_packed": "model-00001-of-00004.safetensors",
7232
  "model.layers.12.mlp.experts.58.up_proj.weight_scale": "model-00001-of-00004.safetensors",
7233
+ "model.layers.12.mlp.experts.59.down_proj.input_global_scale": "model-00002-of-00004.safetensors",
7234
+ "model.layers.12.mlp.experts.59.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7235
+ "model.layers.12.mlp.experts.59.down_proj.weight_packed": "model-00002-of-00004.safetensors",
7236
+ "model.layers.12.mlp.experts.59.down_proj.weight_scale": "model-00002-of-00004.safetensors",
7237
+ "model.layers.12.mlp.experts.59.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
7238
+ "model.layers.12.mlp.experts.59.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7239
+ "model.layers.12.mlp.experts.59.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
7240
+ "model.layers.12.mlp.experts.59.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
7241
+ "model.layers.12.mlp.experts.59.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
7242
+ "model.layers.12.mlp.experts.59.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7243
+ "model.layers.12.mlp.experts.59.up_proj.weight_packed": "model-00002-of-00004.safetensors",
7244
+ "model.layers.12.mlp.experts.59.up_proj.weight_scale": "model-00002-of-00004.safetensors",
7245
  "model.layers.12.mlp.experts.6.down_proj.input_global_scale": "model-00001-of-00004.safetensors",
7246
  "model.layers.12.mlp.experts.6.down_proj.weight_global_scale": "model-00001-of-00004.safetensors",
7247
  "model.layers.12.mlp.experts.6.down_proj.weight_packed": "model-00001-of-00004.safetensors",
 
7258
  "model.layers.12.mlp.experts.60.down_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7259
  "model.layers.12.mlp.experts.60.down_proj.weight_packed": "model-00002-of-00004.safetensors",
7260
  "model.layers.12.mlp.experts.60.down_proj.weight_scale": "model-00002-of-00004.safetensors",
7261
+ "model.layers.12.mlp.experts.60.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
7262
+ "model.layers.12.mlp.experts.60.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7263
+ "model.layers.12.mlp.experts.60.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
7264
+ "model.layers.12.mlp.experts.60.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
7265
  "model.layers.12.mlp.experts.60.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
7266
  "model.layers.12.mlp.experts.60.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7267
  "model.layers.12.mlp.experts.60.up_proj.weight_packed": "model-00002-of-00004.safetensors",
 
7770
  "model.layers.12.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
7771
  "model.layers.12.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
7772
  "model.layers.12.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
7773
+ "model.layers.12.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
7774
  "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
7775
  "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
7776
  "model.layers.12.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
9327
  "model.layers.13.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
9328
  "model.layers.13.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
9329
  "model.layers.13.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
9330
+ "model.layers.13.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
9331
  "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
9332
  "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
9333
  "model.layers.13.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
10884
  "model.layers.14.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
10885
  "model.layers.14.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
10886
  "model.layers.14.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
10887
+ "model.layers.14.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
10888
  "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
10889
  "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
10890
  "model.layers.14.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
12441
  "model.layers.15.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
12442
  "model.layers.15.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
12443
  "model.layers.15.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
12444
+ "model.layers.15.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
12445
  "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
12446
  "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
12447
  "model.layers.15.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
13998
  "model.layers.16.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
13999
  "model.layers.16.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
14000
  "model.layers.16.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
14001
+ "model.layers.16.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
14002
  "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
14003
  "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
14004
  "model.layers.16.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
15555
  "model.layers.17.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
15556
  "model.layers.17.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
15557
  "model.layers.17.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
15558
+ "model.layers.17.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
15559
  "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
15560
  "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
15561
  "model.layers.17.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
17112
  "model.layers.18.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
17113
  "model.layers.18.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
17114
  "model.layers.18.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
17115
+ "model.layers.18.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
17116
  "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
17117
  "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
17118
  "model.layers.18.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
18669
  "model.layers.19.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
18670
  "model.layers.19.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
18671
  "model.layers.19.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
18672
+ "model.layers.19.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
18673
  "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
18674
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
18675
  "model.layers.19.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
20226
  "model.layers.2.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
20227
  "model.layers.2.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
20228
  "model.layers.2.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
20229
+ "model.layers.2.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
20230
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
20231
  "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
20232
  "model.layers.2.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
21783
  "model.layers.20.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
21784
  "model.layers.20.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
21785
  "model.layers.20.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
21786
+ "model.layers.20.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
21787
  "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
21788
  "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
21789
  "model.layers.20.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
23340
  "model.layers.21.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
23341
  "model.layers.21.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
23342
  "model.layers.21.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
23343
+ "model.layers.21.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
23344
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
23345
  "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
23346
  "model.layers.21.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
24897
  "model.layers.22.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
24898
  "model.layers.22.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
24899
  "model.layers.22.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
24900
+ "model.layers.22.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
24901
  "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
24902
  "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
24903
  "model.layers.22.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
26454
  "model.layers.23.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
26455
  "model.layers.23.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26456
  "model.layers.23.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26457
+ "model.layers.23.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
26458
  "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
26459
  "model.layers.23.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
26460
  "model.layers.23.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
28011
  "model.layers.24.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
28012
  "model.layers.24.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
28013
  "model.layers.24.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
28014
+ "model.layers.24.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
28015
  "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
28016
  "model.layers.24.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
28017
  "model.layers.24.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
29568
  "model.layers.25.mlp.experts.99.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
29569
  "model.layers.25.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
29570
  "model.layers.25.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
29571
+ "model.layers.25.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
29572
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
29573
  "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
29574
  "model.layers.25.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
31017
  "model.layers.26.mlp.experts.90.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31018
  "model.layers.26.mlp.experts.90.up_proj.weight_packed": "model-00002-of-00004.safetensors",
31019
  "model.layers.26.mlp.experts.90.up_proj.weight_scale": "model-00002-of-00004.safetensors",
31020
+ "model.layers.26.mlp.experts.91.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
31021
+ "model.layers.26.mlp.experts.91.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31022
+ "model.layers.26.mlp.experts.91.down_proj.weight_packed": "model-00003-of-00004.safetensors",
31023
+ "model.layers.26.mlp.experts.91.down_proj.weight_scale": "model-00003-of-00004.safetensors",
31024
  "model.layers.26.mlp.experts.91.gate_proj.input_global_scale": "model-00002-of-00004.safetensors",
31025
  "model.layers.26.mlp.experts.91.gate_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31026
  "model.layers.26.mlp.experts.91.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
 
31028
  "model.layers.26.mlp.experts.91.up_proj.input_global_scale": "model-00002-of-00004.safetensors",
31029
  "model.layers.26.mlp.experts.91.up_proj.weight_global_scale": "model-00002-of-00004.safetensors",
31030
  "model.layers.26.mlp.experts.91.up_proj.weight_packed": "model-00002-of-00004.safetensors",
31031
+ "model.layers.26.mlp.experts.91.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31032
+ "model.layers.26.mlp.experts.92.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
31033
+ "model.layers.26.mlp.experts.92.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31034
+ "model.layers.26.mlp.experts.92.down_proj.weight_packed": "model-00003-of-00004.safetensors",
31035
+ "model.layers.26.mlp.experts.92.down_proj.weight_scale": "model-00003-of-00004.safetensors",
31036
+ "model.layers.26.mlp.experts.92.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
31037
+ "model.layers.26.mlp.experts.92.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31038
+ "model.layers.26.mlp.experts.92.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
31039
+ "model.layers.26.mlp.experts.92.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
31040
+ "model.layers.26.mlp.experts.92.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
31041
+ "model.layers.26.mlp.experts.92.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31042
+ "model.layers.26.mlp.experts.92.up_proj.weight_packed": "model-00003-of-00004.safetensors",
31043
+ "model.layers.26.mlp.experts.92.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31044
+ "model.layers.26.mlp.experts.93.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
31045
+ "model.layers.26.mlp.experts.93.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31046
+ "model.layers.26.mlp.experts.93.down_proj.weight_packed": "model-00003-of-00004.safetensors",
31047
+ "model.layers.26.mlp.experts.93.down_proj.weight_scale": "model-00003-of-00004.safetensors",
31048
+ "model.layers.26.mlp.experts.93.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
31049
+ "model.layers.26.mlp.experts.93.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31050
+ "model.layers.26.mlp.experts.93.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
31051
+ "model.layers.26.mlp.experts.93.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
31052
+ "model.layers.26.mlp.experts.93.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
31053
+ "model.layers.26.mlp.experts.93.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31054
+ "model.layers.26.mlp.experts.93.up_proj.weight_packed": "model-00003-of-00004.safetensors",
31055
+ "model.layers.26.mlp.experts.93.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31056
+ "model.layers.26.mlp.experts.94.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
31057
+ "model.layers.26.mlp.experts.94.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31058
+ "model.layers.26.mlp.experts.94.down_proj.weight_packed": "model-00003-of-00004.safetensors",
31059
+ "model.layers.26.mlp.experts.94.down_proj.weight_scale": "model-00003-of-00004.safetensors",
31060
+ "model.layers.26.mlp.experts.94.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
31061
+ "model.layers.26.mlp.experts.94.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31062
+ "model.layers.26.mlp.experts.94.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
31063
+ "model.layers.26.mlp.experts.94.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
31064
+ "model.layers.26.mlp.experts.94.up_proj.input_global_scale": "model-00003-of-00004.safetensors",
31065
+ "model.layers.26.mlp.experts.94.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31066
+ "model.layers.26.mlp.experts.94.up_proj.weight_packed": "model-00003-of-00004.safetensors",
31067
+ "model.layers.26.mlp.experts.94.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31068
  "model.layers.26.mlp.experts.95.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
31069
  "model.layers.26.mlp.experts.95.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31070
  "model.layers.26.mlp.experts.95.down_proj.weight_packed": "model-00003-of-00004.safetensors",
 
31125
  "model.layers.26.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
31126
  "model.layers.26.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
31127
  "model.layers.26.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31128
+ "model.layers.26.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
 
31129
  "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
31130
  "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
31131
  "model.layers.26.self_attn.k_proj.input_global_scale": "model-00002-of-00004.safetensors",
 
32682
  "model.layers.27.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
32683
  "model.layers.27.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
32684
  "model.layers.27.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
32685
+ "model.layers.27.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
32686
  "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
32687
  "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
32688
  "model.layers.27.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
34239
  "model.layers.28.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
34240
  "model.layers.28.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
34241
  "model.layers.28.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
34242
+ "model.layers.28.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
34243
  "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
34244
  "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
34245
  "model.layers.28.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
35796
  "model.layers.29.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
35797
  "model.layers.29.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
35798
  "model.layers.29.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
35799
+ "model.layers.29.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
35800
  "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
35801
  "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
35802
  "model.layers.29.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
37353
  "model.layers.3.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
37354
  "model.layers.3.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
37355
  "model.layers.3.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
37356
+ "model.layers.3.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
37357
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
37358
  "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
37359
  "model.layers.3.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
38910
  "model.layers.30.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
38911
  "model.layers.30.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
38912
  "model.layers.30.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
38913
+ "model.layers.30.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
38914
  "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
38915
  "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
38916
  "model.layers.30.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
40467
  "model.layers.31.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
40468
  "model.layers.31.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
40469
  "model.layers.31.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
40470
+ "model.layers.31.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
40471
  "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
40472
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
40473
  "model.layers.31.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
42024
  "model.layers.32.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
42025
  "model.layers.32.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
42026
  "model.layers.32.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
42027
+ "model.layers.32.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
42028
  "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
42029
  "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
42030
  "model.layers.32.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
43581
  "model.layers.33.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
43582
  "model.layers.33.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
43583
  "model.layers.33.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
43584
+ "model.layers.33.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
43585
  "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
43586
  "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
43587
  "model.layers.33.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
45138
  "model.layers.34.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
45139
  "model.layers.34.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
45140
  "model.layers.34.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
45141
+ "model.layers.34.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
45142
  "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
45143
  "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
45144
  "model.layers.34.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
46695
  "model.layers.35.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
46696
  "model.layers.35.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46697
  "model.layers.35.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46698
+ "model.layers.35.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
46699
  "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
46700
  "model.layers.35.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
46701
  "model.layers.35.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
48252
  "model.layers.36.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
48253
  "model.layers.36.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
48254
  "model.layers.36.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
48255
+ "model.layers.36.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
48256
  "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
48257
  "model.layers.36.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
48258
  "model.layers.36.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
49809
  "model.layers.37.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
49810
  "model.layers.37.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
49811
  "model.layers.37.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
49812
+ "model.layers.37.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
49813
  "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
49814
  "model.layers.37.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
49815
  "model.layers.37.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
51366
  "model.layers.38.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
51367
  "model.layers.38.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
51368
  "model.layers.38.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
51369
+ "model.layers.38.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
51370
  "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
51371
  "model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
51372
  "model.layers.38.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
52923
  "model.layers.39.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
52924
  "model.layers.39.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
52925
  "model.layers.39.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
52926
+ "model.layers.39.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
 
52927
  "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
52928
  "model.layers.39.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
52929
  "model.layers.39.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
 
54480
  "model.layers.4.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
54481
  "model.layers.4.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
54482
  "model.layers.4.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
54483
+ "model.layers.4.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
54484
  "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
54485
  "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
54486
  "model.layers.4.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
54500
  "model.layers.4.self_attn.v_proj.weight_global_scale": "model-00001-of-00004.safetensors",
54501
  "model.layers.4.self_attn.v_proj.weight_packed": "model-00001-of-00004.safetensors",
54502
  "model.layers.4.self_attn.v_proj.weight_scale": "model-00001-of-00004.safetensors",
54503
+ "model.layers.40.input_layernorm.weight": "model-00004-of-00004.safetensors",
54504
  "model.layers.40.mlp.experts.0.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
54505
  "model.layers.40.mlp.experts.0.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54506
  "model.layers.40.mlp.experts.0.down_proj.weight_packed": "model-00003-of-00004.safetensors",
 
54849
  "model.layers.40.mlp.experts.123.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54850
  "model.layers.40.mlp.experts.123.up_proj.weight_packed": "model-00003-of-00004.safetensors",
54851
  "model.layers.40.mlp.experts.123.up_proj.weight_scale": "model-00003-of-00004.safetensors",
54852
+ "model.layers.40.mlp.experts.124.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
54853
+ "model.layers.40.mlp.experts.124.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54854
+ "model.layers.40.mlp.experts.124.down_proj.weight_packed": "model-00004-of-00004.safetensors",
54855
+ "model.layers.40.mlp.experts.124.down_proj.weight_scale": "model-00004-of-00004.safetensors",
54856
  "model.layers.40.mlp.experts.124.gate_proj.input_global_scale": "model-00003-of-00004.safetensors",
54857
  "model.layers.40.mlp.experts.124.gate_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54858
  "model.layers.40.mlp.experts.124.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
54859
  "model.layers.40.mlp.experts.124.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
54860
+ "model.layers.40.mlp.experts.124.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
54861
+ "model.layers.40.mlp.experts.124.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54862
+ "model.layers.40.mlp.experts.124.up_proj.weight_packed": "model-00004-of-00004.safetensors",
54863
+ "model.layers.40.mlp.experts.124.up_proj.weight_scale": "model-00004-of-00004.safetensors",
54864
+ "model.layers.40.mlp.experts.125.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
54865
+ "model.layers.40.mlp.experts.125.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54866
+ "model.layers.40.mlp.experts.125.down_proj.weight_packed": "model-00004-of-00004.safetensors",
54867
+ "model.layers.40.mlp.experts.125.down_proj.weight_scale": "model-00004-of-00004.safetensors",
54868
+ "model.layers.40.mlp.experts.125.gate_proj.input_global_scale": "model-00004-of-00004.safetensors",
54869
+ "model.layers.40.mlp.experts.125.gate_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54870
+ "model.layers.40.mlp.experts.125.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
54871
+ "model.layers.40.mlp.experts.125.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
54872
+ "model.layers.40.mlp.experts.125.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
54873
+ "model.layers.40.mlp.experts.125.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54874
+ "model.layers.40.mlp.experts.125.up_proj.weight_packed": "model-00004-of-00004.safetensors",
54875
+ "model.layers.40.mlp.experts.125.up_proj.weight_scale": "model-00004-of-00004.safetensors",
54876
+ "model.layers.40.mlp.experts.126.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
54877
+ "model.layers.40.mlp.experts.126.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54878
+ "model.layers.40.mlp.experts.126.down_proj.weight_packed": "model-00004-of-00004.safetensors",
54879
+ "model.layers.40.mlp.experts.126.down_proj.weight_scale": "model-00004-of-00004.safetensors",
54880
+ "model.layers.40.mlp.experts.126.gate_proj.input_global_scale": "model-00004-of-00004.safetensors",
54881
+ "model.layers.40.mlp.experts.126.gate_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54882
+ "model.layers.40.mlp.experts.126.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
54883
+ "model.layers.40.mlp.experts.126.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
54884
+ "model.layers.40.mlp.experts.126.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
54885
+ "model.layers.40.mlp.experts.126.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54886
+ "model.layers.40.mlp.experts.126.up_proj.weight_packed": "model-00004-of-00004.safetensors",
54887
+ "model.layers.40.mlp.experts.126.up_proj.weight_scale": "model-00004-of-00004.safetensors",
54888
+ "model.layers.40.mlp.experts.127.down_proj.input_global_scale": "model-00004-of-00004.safetensors",
54889
+ "model.layers.40.mlp.experts.127.down_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54890
+ "model.layers.40.mlp.experts.127.down_proj.weight_packed": "model-00004-of-00004.safetensors",
54891
+ "model.layers.40.mlp.experts.127.down_proj.weight_scale": "model-00004-of-00004.safetensors",
54892
+ "model.layers.40.mlp.experts.127.gate_proj.input_global_scale": "model-00004-of-00004.safetensors",
54893
+ "model.layers.40.mlp.experts.127.gate_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54894
+ "model.layers.40.mlp.experts.127.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
54895
+ "model.layers.40.mlp.experts.127.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
54896
+ "model.layers.40.mlp.experts.127.up_proj.input_global_scale": "model-00004-of-00004.safetensors",
54897
+ "model.layers.40.mlp.experts.127.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
54898
+ "model.layers.40.mlp.experts.127.up_proj.weight_packed": "model-00004-of-00004.safetensors",
54899
+ "model.layers.40.mlp.experts.127.up_proj.weight_scale": "model-00004-of-00004.safetensors",
54900
  "model.layers.40.mlp.experts.13.down_proj.input_global_scale": "model-00003-of-00004.safetensors",
54901
  "model.layers.40.mlp.experts.13.down_proj.weight_global_scale": "model-00003-of-00004.safetensors",
54902
  "model.layers.40.mlp.experts.13.down_proj.weight_packed": "model-00003-of-00004.safetensors",
 
56037
  "model.layers.40.mlp.experts.99.up_proj.weight_global_scale": "model-00003-of-00004.safetensors",
56038
  "model.layers.40.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
56039
  "model.layers.40.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
56040
+ "model.layers.40.mlp.gate.weight": "model-00003-of-00004.safetensors",
56041
+ "model.layers.40.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
 
 
 
56042
  "model.layers.40.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
56043
  "model.layers.40.self_attn.k_proj.input_global_scale": "model-00003-of-00004.safetensors",
56044
  "model.layers.40.self_attn.k_proj.weight_global_scale": "model-00003-of-00004.safetensors",
 
57594
  "model.layers.41.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
57595
  "model.layers.41.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
57596
  "model.layers.41.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
57597
+ "model.layers.41.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
 
57598
  "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
57599
  "model.layers.41.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
57600
  "model.layers.41.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
 
57606
  "model.layers.41.self_attn.o_proj.weight_packed": "model-00004-of-00004.safetensors",
57607
  "model.layers.41.self_attn.o_proj.weight_scale": "model-00004-of-00004.safetensors",
57608
  "model.layers.41.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
57609
+ "model.layers.41.self_attn.q_proj.input_global_scale": "model-00004-of-00004.safetensors",
57610
+ "model.layers.41.self_attn.q_proj.weight_global_scale": "model-00004-of-00004.safetensors",
57611
+ "model.layers.41.self_attn.q_proj.weight_packed": "model-00004-of-00004.safetensors",
57612
+ "model.layers.41.self_attn.q_proj.weight_scale": "model-00004-of-00004.safetensors",
57613
  "model.layers.41.self_attn.v_proj.input_global_scale": "model-00004-of-00004.safetensors",
57614
  "model.layers.41.self_attn.v_proj.weight_global_scale": "model-00004-of-00004.safetensors",
57615
  "model.layers.41.self_attn.v_proj.weight_packed": "model-00004-of-00004.safetensors",
 
59151
  "model.layers.42.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
59152
  "model.layers.42.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
59153
  "model.layers.42.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
59154
+ "model.layers.42.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
 
59155
  "model.layers.42.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
59156
  "model.layers.42.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
59157
  "model.layers.42.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
 
60708
  "model.layers.43.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
60709
  "model.layers.43.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
60710
  "model.layers.43.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
60711
+ "model.layers.43.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
 
60712
  "model.layers.43.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
60713
  "model.layers.43.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
60714
  "model.layers.43.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
 
62265
  "model.layers.44.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
62266
  "model.layers.44.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
62267
  "model.layers.44.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
62268
+ "model.layers.44.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
 
62269
  "model.layers.44.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
62270
  "model.layers.44.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
62271
  "model.layers.44.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
 
63822
  "model.layers.45.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
63823
  "model.layers.45.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
63824
  "model.layers.45.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
63825
+ "model.layers.45.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
 
63826
  "model.layers.45.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
63827
  "model.layers.45.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
63828
  "model.layers.45.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
 
65379
  "model.layers.46.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
65380
  "model.layers.46.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
65381
  "model.layers.46.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
65382
+ "model.layers.46.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
 
65383
  "model.layers.46.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
65384
  "model.layers.46.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
65385
  "model.layers.46.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
 
66936
  "model.layers.47.mlp.experts.99.up_proj.weight_global_scale": "model-00004-of-00004.safetensors",
66937
  "model.layers.47.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
66938
  "model.layers.47.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
66939
+ "model.layers.47.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
 
66940
  "model.layers.47.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
66941
  "model.layers.47.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
66942
  "model.layers.47.self_attn.k_proj.input_global_scale": "model-00004-of-00004.safetensors",
 
68493
  "model.layers.5.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
68494
  "model.layers.5.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
68495
  "model.layers.5.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
68496
+ "model.layers.5.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
68497
  "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
68498
  "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
68499
  "model.layers.5.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
70050
  "model.layers.6.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
70051
  "model.layers.6.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
70052
  "model.layers.6.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
70053
+ "model.layers.6.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
70054
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
70055
  "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
70056
  "model.layers.6.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
71607
  "model.layers.7.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
71608
  "model.layers.7.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
71609
  "model.layers.7.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
71610
+ "model.layers.7.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
71611
  "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
71612
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
71613
  "model.layers.7.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
73164
  "model.layers.8.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
73165
  "model.layers.8.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
73166
  "model.layers.8.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
73167
+ "model.layers.8.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
73168
  "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
73169
  "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
73170
  "model.layers.8.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
 
74721
  "model.layers.9.mlp.experts.99.up_proj.weight_global_scale": "model-00001-of-00004.safetensors",
74722
  "model.layers.9.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
74723
  "model.layers.9.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
74724
+ "model.layers.9.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
 
74725
  "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
74726
  "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
74727
  "model.layers.9.self_attn.k_proj.input_global_scale": "model-00001-of-00004.safetensors",
recipe.yaml CHANGED
@@ -2,5 +2,5 @@ default_stage:
2
  default_modifiers:
3
  QuantizationModifier:
4
  targets: [Linear]
5
- ignore: [lm_head]
6
  scheme: NVFP4
 
2
  default_modifiers:
3
  QuantizationModifier:
4
  targets: [Linear]
5
+ ignore: [lm_head, 're:.*mlp.gate$']
6
  scheme: NVFP4