cpatonn commited on
Commit
4ac5c88
·
verified ·
1 Parent(s): c587288

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -96,7 +96,7 @@
96
  "dynamic": false,
97
  "group_size": 32,
98
  "num_bits": 4,
99
- "observer": "minmax",
100
  "observer_kwargs": {},
101
  "strategy": "group",
102
  "symmetric": true,
@@ -111,205 +111,397 @@
111
  "model.layers.0.linear_attn.in_proj_ba",
112
  "model.layers.0.linear_attn.out_proj",
113
  "model.layers.0.mlp.gate",
 
 
 
114
  "model.layers.0.mlp.shared_expert_gate",
115
  "model.layers.1.linear_attn.in_proj_qkvz",
116
  "model.layers.1.linear_attn.in_proj_ba",
117
  "model.layers.1.linear_attn.out_proj",
118
  "model.layers.1.mlp.gate",
 
 
 
119
  "model.layers.1.mlp.shared_expert_gate",
120
  "model.layers.2.linear_attn.in_proj_qkvz",
121
  "model.layers.2.linear_attn.in_proj_ba",
122
  "model.layers.2.linear_attn.out_proj",
123
  "model.layers.2.mlp.gate",
 
 
 
124
  "model.layers.2.mlp.shared_expert_gate",
 
 
 
 
125
  "model.layers.3.mlp.gate",
 
 
 
126
  "model.layers.3.mlp.shared_expert_gate",
127
  "model.layers.4.linear_attn.in_proj_qkvz",
128
  "model.layers.4.linear_attn.in_proj_ba",
129
  "model.layers.4.linear_attn.out_proj",
130
  "model.layers.4.mlp.gate",
 
 
 
131
  "model.layers.4.mlp.shared_expert_gate",
132
  "model.layers.5.linear_attn.in_proj_qkvz",
133
  "model.layers.5.linear_attn.in_proj_ba",
134
  "model.layers.5.linear_attn.out_proj",
135
  "model.layers.5.mlp.gate",
 
 
 
136
  "model.layers.5.mlp.shared_expert_gate",
137
  "model.layers.6.linear_attn.in_proj_qkvz",
138
  "model.layers.6.linear_attn.in_proj_ba",
139
  "model.layers.6.linear_attn.out_proj",
140
  "model.layers.6.mlp.gate",
 
 
 
141
  "model.layers.6.mlp.shared_expert_gate",
 
 
 
 
142
  "model.layers.7.mlp.gate",
 
 
 
143
  "model.layers.7.mlp.shared_expert_gate",
144
  "model.layers.8.linear_attn.in_proj_qkvz",
145
  "model.layers.8.linear_attn.in_proj_ba",
146
  "model.layers.8.linear_attn.out_proj",
147
  "model.layers.8.mlp.gate",
 
 
 
148
  "model.layers.8.mlp.shared_expert_gate",
149
  "model.layers.9.linear_attn.in_proj_qkvz",
150
  "model.layers.9.linear_attn.in_proj_ba",
151
  "model.layers.9.linear_attn.out_proj",
152
  "model.layers.9.mlp.gate",
 
 
 
153
  "model.layers.9.mlp.shared_expert_gate",
154
  "model.layers.10.linear_attn.in_proj_qkvz",
155
  "model.layers.10.linear_attn.in_proj_ba",
156
  "model.layers.10.linear_attn.out_proj",
157
  "model.layers.10.mlp.gate",
 
 
 
158
  "model.layers.10.mlp.shared_expert_gate",
 
 
 
 
159
  "model.layers.11.mlp.gate",
 
 
 
160
  "model.layers.11.mlp.shared_expert_gate",
161
  "model.layers.12.linear_attn.in_proj_qkvz",
162
  "model.layers.12.linear_attn.in_proj_ba",
163
  "model.layers.12.linear_attn.out_proj",
164
  "model.layers.12.mlp.gate",
 
 
 
165
  "model.layers.12.mlp.shared_expert_gate",
166
  "model.layers.13.linear_attn.in_proj_qkvz",
167
  "model.layers.13.linear_attn.in_proj_ba",
168
  "model.layers.13.linear_attn.out_proj",
169
  "model.layers.13.mlp.gate",
 
 
 
170
  "model.layers.13.mlp.shared_expert_gate",
171
  "model.layers.14.linear_attn.in_proj_qkvz",
172
  "model.layers.14.linear_attn.in_proj_ba",
173
  "model.layers.14.linear_attn.out_proj",
174
  "model.layers.14.mlp.gate",
 
 
 
175
  "model.layers.14.mlp.shared_expert_gate",
 
 
 
 
176
  "model.layers.15.mlp.gate",
 
 
 
177
  "model.layers.15.mlp.shared_expert_gate",
178
  "model.layers.16.linear_attn.in_proj_qkvz",
179
  "model.layers.16.linear_attn.in_proj_ba",
180
  "model.layers.16.linear_attn.out_proj",
181
  "model.layers.16.mlp.gate",
 
 
 
182
  "model.layers.16.mlp.shared_expert_gate",
183
  "model.layers.17.linear_attn.in_proj_qkvz",
184
  "model.layers.17.linear_attn.in_proj_ba",
185
  "model.layers.17.linear_attn.out_proj",
186
  "model.layers.17.mlp.gate",
 
 
 
187
  "model.layers.17.mlp.shared_expert_gate",
188
  "model.layers.18.linear_attn.in_proj_qkvz",
189
  "model.layers.18.linear_attn.in_proj_ba",
190
  "model.layers.18.linear_attn.out_proj",
191
  "model.layers.18.mlp.gate",
 
 
 
192
  "model.layers.18.mlp.shared_expert_gate",
 
 
 
 
193
  "model.layers.19.mlp.gate",
 
 
 
194
  "model.layers.19.mlp.shared_expert_gate",
195
  "model.layers.20.linear_attn.in_proj_qkvz",
196
  "model.layers.20.linear_attn.in_proj_ba",
197
  "model.layers.20.linear_attn.out_proj",
198
  "model.layers.20.mlp.gate",
 
 
 
199
  "model.layers.20.mlp.shared_expert_gate",
200
  "model.layers.21.linear_attn.in_proj_qkvz",
201
  "model.layers.21.linear_attn.in_proj_ba",
202
  "model.layers.21.linear_attn.out_proj",
203
  "model.layers.21.mlp.gate",
 
 
 
204
  "model.layers.21.mlp.shared_expert_gate",
205
  "model.layers.22.linear_attn.in_proj_qkvz",
206
  "model.layers.22.linear_attn.in_proj_ba",
207
  "model.layers.22.linear_attn.out_proj",
208
  "model.layers.22.mlp.gate",
 
 
 
209
  "model.layers.22.mlp.shared_expert_gate",
 
 
 
 
210
  "model.layers.23.mlp.gate",
 
 
 
211
  "model.layers.23.mlp.shared_expert_gate",
212
  "model.layers.24.linear_attn.in_proj_qkvz",
213
  "model.layers.24.linear_attn.in_proj_ba",
214
  "model.layers.24.linear_attn.out_proj",
215
  "model.layers.24.mlp.gate",
 
 
 
216
  "model.layers.24.mlp.shared_expert_gate",
217
  "model.layers.25.linear_attn.in_proj_qkvz",
218
  "model.layers.25.linear_attn.in_proj_ba",
219
  "model.layers.25.linear_attn.out_proj",
220
  "model.layers.25.mlp.gate",
 
 
 
221
  "model.layers.25.mlp.shared_expert_gate",
222
  "model.layers.26.linear_attn.in_proj_qkvz",
223
  "model.layers.26.linear_attn.in_proj_ba",
224
  "model.layers.26.linear_attn.out_proj",
225
  "model.layers.26.mlp.gate",
 
 
 
226
  "model.layers.26.mlp.shared_expert_gate",
 
 
 
 
227
  "model.layers.27.mlp.gate",
 
 
 
228
  "model.layers.27.mlp.shared_expert_gate",
229
  "model.layers.28.linear_attn.in_proj_qkvz",
230
  "model.layers.28.linear_attn.in_proj_ba",
231
  "model.layers.28.linear_attn.out_proj",
232
  "model.layers.28.mlp.gate",
 
 
 
233
  "model.layers.28.mlp.shared_expert_gate",
234
  "model.layers.29.linear_attn.in_proj_qkvz",
235
  "model.layers.29.linear_attn.in_proj_ba",
236
  "model.layers.29.linear_attn.out_proj",
237
  "model.layers.29.mlp.gate",
 
 
 
238
  "model.layers.29.mlp.shared_expert_gate",
239
  "model.layers.30.linear_attn.in_proj_qkvz",
240
  "model.layers.30.linear_attn.in_proj_ba",
241
  "model.layers.30.linear_attn.out_proj",
242
  "model.layers.30.mlp.gate",
 
 
 
243
  "model.layers.30.mlp.shared_expert_gate",
 
 
 
 
244
  "model.layers.31.mlp.gate",
 
 
 
245
  "model.layers.31.mlp.shared_expert_gate",
246
  "model.layers.32.linear_attn.in_proj_qkvz",
247
  "model.layers.32.linear_attn.in_proj_ba",
248
  "model.layers.32.linear_attn.out_proj",
249
  "model.layers.32.mlp.gate",
 
 
 
250
  "model.layers.32.mlp.shared_expert_gate",
251
  "model.layers.33.linear_attn.in_proj_qkvz",
252
  "model.layers.33.linear_attn.in_proj_ba",
253
  "model.layers.33.linear_attn.out_proj",
254
  "model.layers.33.mlp.gate",
 
 
 
255
  "model.layers.33.mlp.shared_expert_gate",
256
  "model.layers.34.linear_attn.in_proj_qkvz",
257
  "model.layers.34.linear_attn.in_proj_ba",
258
  "model.layers.34.linear_attn.out_proj",
259
  "model.layers.34.mlp.gate",
 
 
 
260
  "model.layers.34.mlp.shared_expert_gate",
 
 
 
 
261
  "model.layers.35.mlp.gate",
 
 
 
262
  "model.layers.35.mlp.shared_expert_gate",
263
  "model.layers.36.linear_attn.in_proj_qkvz",
264
  "model.layers.36.linear_attn.in_proj_ba",
265
  "model.layers.36.linear_attn.out_proj",
266
  "model.layers.36.mlp.gate",
 
 
 
267
  "model.layers.36.mlp.shared_expert_gate",
268
  "model.layers.37.linear_attn.in_proj_qkvz",
269
  "model.layers.37.linear_attn.in_proj_ba",
270
  "model.layers.37.linear_attn.out_proj",
271
  "model.layers.37.mlp.gate",
 
 
 
272
  "model.layers.37.mlp.shared_expert_gate",
273
  "model.layers.38.linear_attn.in_proj_qkvz",
274
  "model.layers.38.linear_attn.in_proj_ba",
275
  "model.layers.38.linear_attn.out_proj",
276
  "model.layers.38.mlp.gate",
 
 
 
277
  "model.layers.38.mlp.shared_expert_gate",
 
 
 
 
278
  "model.layers.39.mlp.gate",
 
 
 
279
  "model.layers.39.mlp.shared_expert_gate",
280
  "model.layers.40.linear_attn.in_proj_qkvz",
281
  "model.layers.40.linear_attn.in_proj_ba",
282
  "model.layers.40.linear_attn.out_proj",
283
  "model.layers.40.mlp.gate",
 
 
 
284
  "model.layers.40.mlp.shared_expert_gate",
285
  "model.layers.41.linear_attn.in_proj_qkvz",
286
  "model.layers.41.linear_attn.in_proj_ba",
287
  "model.layers.41.linear_attn.out_proj",
288
  "model.layers.41.mlp.gate",
 
 
 
289
  "model.layers.41.mlp.shared_expert_gate",
290
  "model.layers.42.linear_attn.in_proj_qkvz",
291
  "model.layers.42.linear_attn.in_proj_ba",
292
  "model.layers.42.linear_attn.out_proj",
293
  "model.layers.42.mlp.gate",
 
 
 
294
  "model.layers.42.mlp.shared_expert_gate",
 
 
 
 
295
  "model.layers.43.mlp.gate",
 
 
 
296
  "model.layers.43.mlp.shared_expert_gate",
297
  "model.layers.44.linear_attn.in_proj_qkvz",
298
  "model.layers.44.linear_attn.in_proj_ba",
299
  "model.layers.44.linear_attn.out_proj",
300
  "model.layers.44.mlp.gate",
 
 
 
301
  "model.layers.44.mlp.shared_expert_gate",
302
  "model.layers.45.linear_attn.in_proj_qkvz",
303
  "model.layers.45.linear_attn.in_proj_ba",
304
  "model.layers.45.linear_attn.out_proj",
305
  "model.layers.45.mlp.gate",
 
 
 
306
  "model.layers.45.mlp.shared_expert_gate",
307
  "model.layers.46.linear_attn.in_proj_qkvz",
308
  "model.layers.46.linear_attn.in_proj_ba",
309
  "model.layers.46.linear_attn.out_proj",
310
  "model.layers.46.mlp.gate",
 
 
 
311
  "model.layers.46.mlp.shared_expert_gate",
 
 
 
 
312
  "model.layers.47.mlp.gate",
 
 
 
313
  "model.layers.47.mlp.shared_expert_gate",
314
  "lm_head"
315
  ],
@@ -318,7 +510,7 @@
318
  "quantization_status": "compressed",
319
  "sparsity_config": {},
320
  "transform_config": {},
321
- "version": "0.11.0"
322
  },
323
  "rms_norm_eps": 1e-06,
324
  "rope_scaling": null,
 
96
  "dynamic": false,
97
  "group_size": 32,
98
  "num_bits": 4,
99
+ "observer": "mse",
100
  "observer_kwargs": {},
101
  "strategy": "group",
102
  "symmetric": true,
 
111
  "model.layers.0.linear_attn.in_proj_ba",
112
  "model.layers.0.linear_attn.out_proj",
113
  "model.layers.0.mlp.gate",
114
+ "model.layers.0.mlp.shared_expert.gate_proj",
115
+ "model.layers.0.mlp.shared_expert.up_proj",
116
+ "model.layers.0.mlp.shared_expert.down_proj",
117
  "model.layers.0.mlp.shared_expert_gate",
118
  "model.layers.1.linear_attn.in_proj_qkvz",
119
  "model.layers.1.linear_attn.in_proj_ba",
120
  "model.layers.1.linear_attn.out_proj",
121
  "model.layers.1.mlp.gate",
122
+ "model.layers.1.mlp.shared_expert.gate_proj",
123
+ "model.layers.1.mlp.shared_expert.up_proj",
124
+ "model.layers.1.mlp.shared_expert.down_proj",
125
  "model.layers.1.mlp.shared_expert_gate",
126
  "model.layers.2.linear_attn.in_proj_qkvz",
127
  "model.layers.2.linear_attn.in_proj_ba",
128
  "model.layers.2.linear_attn.out_proj",
129
  "model.layers.2.mlp.gate",
130
+ "model.layers.2.mlp.shared_expert.gate_proj",
131
+ "model.layers.2.mlp.shared_expert.up_proj",
132
+ "model.layers.2.mlp.shared_expert.down_proj",
133
  "model.layers.2.mlp.shared_expert_gate",
134
+ "model.layers.3.self_attn.q_proj",
135
+ "model.layers.3.self_attn.k_proj",
136
+ "model.layers.3.self_attn.v_proj",
137
+ "model.layers.3.self_attn.o_proj",
138
  "model.layers.3.mlp.gate",
139
+ "model.layers.3.mlp.shared_expert.gate_proj",
140
+ "model.layers.3.mlp.shared_expert.up_proj",
141
+ "model.layers.3.mlp.shared_expert.down_proj",
142
  "model.layers.3.mlp.shared_expert_gate",
143
  "model.layers.4.linear_attn.in_proj_qkvz",
144
  "model.layers.4.linear_attn.in_proj_ba",
145
  "model.layers.4.linear_attn.out_proj",
146
  "model.layers.4.mlp.gate",
147
+ "model.layers.4.mlp.shared_expert.gate_proj",
148
+ "model.layers.4.mlp.shared_expert.up_proj",
149
+ "model.layers.4.mlp.shared_expert.down_proj",
150
  "model.layers.4.mlp.shared_expert_gate",
151
  "model.layers.5.linear_attn.in_proj_qkvz",
152
  "model.layers.5.linear_attn.in_proj_ba",
153
  "model.layers.5.linear_attn.out_proj",
154
  "model.layers.5.mlp.gate",
155
+ "model.layers.5.mlp.shared_expert.gate_proj",
156
+ "model.layers.5.mlp.shared_expert.up_proj",
157
+ "model.layers.5.mlp.shared_expert.down_proj",
158
  "model.layers.5.mlp.shared_expert_gate",
159
  "model.layers.6.linear_attn.in_proj_qkvz",
160
  "model.layers.6.linear_attn.in_proj_ba",
161
  "model.layers.6.linear_attn.out_proj",
162
  "model.layers.6.mlp.gate",
163
+ "model.layers.6.mlp.shared_expert.gate_proj",
164
+ "model.layers.6.mlp.shared_expert.up_proj",
165
+ "model.layers.6.mlp.shared_expert.down_proj",
166
  "model.layers.6.mlp.shared_expert_gate",
167
+ "model.layers.7.self_attn.q_proj",
168
+ "model.layers.7.self_attn.k_proj",
169
+ "model.layers.7.self_attn.v_proj",
170
+ "model.layers.7.self_attn.o_proj",
171
  "model.layers.7.mlp.gate",
172
+ "model.layers.7.mlp.shared_expert.gate_proj",
173
+ "model.layers.7.mlp.shared_expert.up_proj",
174
+ "model.layers.7.mlp.shared_expert.down_proj",
175
  "model.layers.7.mlp.shared_expert_gate",
176
  "model.layers.8.linear_attn.in_proj_qkvz",
177
  "model.layers.8.linear_attn.in_proj_ba",
178
  "model.layers.8.linear_attn.out_proj",
179
  "model.layers.8.mlp.gate",
180
+ "model.layers.8.mlp.shared_expert.gate_proj",
181
+ "model.layers.8.mlp.shared_expert.up_proj",
182
+ "model.layers.8.mlp.shared_expert.down_proj",
183
  "model.layers.8.mlp.shared_expert_gate",
184
  "model.layers.9.linear_attn.in_proj_qkvz",
185
  "model.layers.9.linear_attn.in_proj_ba",
186
  "model.layers.9.linear_attn.out_proj",
187
  "model.layers.9.mlp.gate",
188
+ "model.layers.9.mlp.shared_expert.gate_proj",
189
+ "model.layers.9.mlp.shared_expert.up_proj",
190
+ "model.layers.9.mlp.shared_expert.down_proj",
191
  "model.layers.9.mlp.shared_expert_gate",
192
  "model.layers.10.linear_attn.in_proj_qkvz",
193
  "model.layers.10.linear_attn.in_proj_ba",
194
  "model.layers.10.linear_attn.out_proj",
195
  "model.layers.10.mlp.gate",
196
+ "model.layers.10.mlp.shared_expert.gate_proj",
197
+ "model.layers.10.mlp.shared_expert.up_proj",
198
+ "model.layers.10.mlp.shared_expert.down_proj",
199
  "model.layers.10.mlp.shared_expert_gate",
200
+ "model.layers.11.self_attn.q_proj",
201
+ "model.layers.11.self_attn.k_proj",
202
+ "model.layers.11.self_attn.v_proj",
203
+ "model.layers.11.self_attn.o_proj",
204
  "model.layers.11.mlp.gate",
205
+ "model.layers.11.mlp.shared_expert.gate_proj",
206
+ "model.layers.11.mlp.shared_expert.up_proj",
207
+ "model.layers.11.mlp.shared_expert.down_proj",
208
  "model.layers.11.mlp.shared_expert_gate",
209
  "model.layers.12.linear_attn.in_proj_qkvz",
210
  "model.layers.12.linear_attn.in_proj_ba",
211
  "model.layers.12.linear_attn.out_proj",
212
  "model.layers.12.mlp.gate",
213
+ "model.layers.12.mlp.shared_expert.gate_proj",
214
+ "model.layers.12.mlp.shared_expert.up_proj",
215
+ "model.layers.12.mlp.shared_expert.down_proj",
216
  "model.layers.12.mlp.shared_expert_gate",
217
  "model.layers.13.linear_attn.in_proj_qkvz",
218
  "model.layers.13.linear_attn.in_proj_ba",
219
  "model.layers.13.linear_attn.out_proj",
220
  "model.layers.13.mlp.gate",
221
+ "model.layers.13.mlp.shared_expert.gate_proj",
222
+ "model.layers.13.mlp.shared_expert.up_proj",
223
+ "model.layers.13.mlp.shared_expert.down_proj",
224
  "model.layers.13.mlp.shared_expert_gate",
225
  "model.layers.14.linear_attn.in_proj_qkvz",
226
  "model.layers.14.linear_attn.in_proj_ba",
227
  "model.layers.14.linear_attn.out_proj",
228
  "model.layers.14.mlp.gate",
229
+ "model.layers.14.mlp.shared_expert.gate_proj",
230
+ "model.layers.14.mlp.shared_expert.up_proj",
231
+ "model.layers.14.mlp.shared_expert.down_proj",
232
  "model.layers.14.mlp.shared_expert_gate",
233
+ "model.layers.15.self_attn.q_proj",
234
+ "model.layers.15.self_attn.k_proj",
235
+ "model.layers.15.self_attn.v_proj",
236
+ "model.layers.15.self_attn.o_proj",
237
  "model.layers.15.mlp.gate",
238
+ "model.layers.15.mlp.shared_expert.gate_proj",
239
+ "model.layers.15.mlp.shared_expert.up_proj",
240
+ "model.layers.15.mlp.shared_expert.down_proj",
241
  "model.layers.15.mlp.shared_expert_gate",
242
  "model.layers.16.linear_attn.in_proj_qkvz",
243
  "model.layers.16.linear_attn.in_proj_ba",
244
  "model.layers.16.linear_attn.out_proj",
245
  "model.layers.16.mlp.gate",
246
+ "model.layers.16.mlp.shared_expert.gate_proj",
247
+ "model.layers.16.mlp.shared_expert.up_proj",
248
+ "model.layers.16.mlp.shared_expert.down_proj",
249
  "model.layers.16.mlp.shared_expert_gate",
250
  "model.layers.17.linear_attn.in_proj_qkvz",
251
  "model.layers.17.linear_attn.in_proj_ba",
252
  "model.layers.17.linear_attn.out_proj",
253
  "model.layers.17.mlp.gate",
254
+ "model.layers.17.mlp.shared_expert.gate_proj",
255
+ "model.layers.17.mlp.shared_expert.up_proj",
256
+ "model.layers.17.mlp.shared_expert.down_proj",
257
  "model.layers.17.mlp.shared_expert_gate",
258
  "model.layers.18.linear_attn.in_proj_qkvz",
259
  "model.layers.18.linear_attn.in_proj_ba",
260
  "model.layers.18.linear_attn.out_proj",
261
  "model.layers.18.mlp.gate",
262
+ "model.layers.18.mlp.shared_expert.gate_proj",
263
+ "model.layers.18.mlp.shared_expert.up_proj",
264
+ "model.layers.18.mlp.shared_expert.down_proj",
265
  "model.layers.18.mlp.shared_expert_gate",
266
+ "model.layers.19.self_attn.q_proj",
267
+ "model.layers.19.self_attn.k_proj",
268
+ "model.layers.19.self_attn.v_proj",
269
+ "model.layers.19.self_attn.o_proj",
270
  "model.layers.19.mlp.gate",
271
+ "model.layers.19.mlp.shared_expert.gate_proj",
272
+ "model.layers.19.mlp.shared_expert.up_proj",
273
+ "model.layers.19.mlp.shared_expert.down_proj",
274
  "model.layers.19.mlp.shared_expert_gate",
275
  "model.layers.20.linear_attn.in_proj_qkvz",
276
  "model.layers.20.linear_attn.in_proj_ba",
277
  "model.layers.20.linear_attn.out_proj",
278
  "model.layers.20.mlp.gate",
279
+ "model.layers.20.mlp.shared_expert.gate_proj",
280
+ "model.layers.20.mlp.shared_expert.up_proj",
281
+ "model.layers.20.mlp.shared_expert.down_proj",
282
  "model.layers.20.mlp.shared_expert_gate",
283
  "model.layers.21.linear_attn.in_proj_qkvz",
284
  "model.layers.21.linear_attn.in_proj_ba",
285
  "model.layers.21.linear_attn.out_proj",
286
  "model.layers.21.mlp.gate",
287
+ "model.layers.21.mlp.shared_expert.gate_proj",
288
+ "model.layers.21.mlp.shared_expert.up_proj",
289
+ "model.layers.21.mlp.shared_expert.down_proj",
290
  "model.layers.21.mlp.shared_expert_gate",
291
  "model.layers.22.linear_attn.in_proj_qkvz",
292
  "model.layers.22.linear_attn.in_proj_ba",
293
  "model.layers.22.linear_attn.out_proj",
294
  "model.layers.22.mlp.gate",
295
+ "model.layers.22.mlp.shared_expert.gate_proj",
296
+ "model.layers.22.mlp.shared_expert.up_proj",
297
+ "model.layers.22.mlp.shared_expert.down_proj",
298
  "model.layers.22.mlp.shared_expert_gate",
299
+ "model.layers.23.self_attn.q_proj",
300
+ "model.layers.23.self_attn.k_proj",
301
+ "model.layers.23.self_attn.v_proj",
302
+ "model.layers.23.self_attn.o_proj",
303
  "model.layers.23.mlp.gate",
304
+ "model.layers.23.mlp.shared_expert.gate_proj",
305
+ "model.layers.23.mlp.shared_expert.up_proj",
306
+ "model.layers.23.mlp.shared_expert.down_proj",
307
  "model.layers.23.mlp.shared_expert_gate",
308
  "model.layers.24.linear_attn.in_proj_qkvz",
309
  "model.layers.24.linear_attn.in_proj_ba",
310
  "model.layers.24.linear_attn.out_proj",
311
  "model.layers.24.mlp.gate",
312
+ "model.layers.24.mlp.shared_expert.gate_proj",
313
+ "model.layers.24.mlp.shared_expert.up_proj",
314
+ "model.layers.24.mlp.shared_expert.down_proj",
315
  "model.layers.24.mlp.shared_expert_gate",
316
  "model.layers.25.linear_attn.in_proj_qkvz",
317
  "model.layers.25.linear_attn.in_proj_ba",
318
  "model.layers.25.linear_attn.out_proj",
319
  "model.layers.25.mlp.gate",
320
+ "model.layers.25.mlp.shared_expert.gate_proj",
321
+ "model.layers.25.mlp.shared_expert.up_proj",
322
+ "model.layers.25.mlp.shared_expert.down_proj",
323
  "model.layers.25.mlp.shared_expert_gate",
324
  "model.layers.26.linear_attn.in_proj_qkvz",
325
  "model.layers.26.linear_attn.in_proj_ba",
326
  "model.layers.26.linear_attn.out_proj",
327
  "model.layers.26.mlp.gate",
328
+ "model.layers.26.mlp.shared_expert.gate_proj",
329
+ "model.layers.26.mlp.shared_expert.up_proj",
330
+ "model.layers.26.mlp.shared_expert.down_proj",
331
  "model.layers.26.mlp.shared_expert_gate",
332
+ "model.layers.27.self_attn.q_proj",
333
+ "model.layers.27.self_attn.k_proj",
334
+ "model.layers.27.self_attn.v_proj",
335
+ "model.layers.27.self_attn.o_proj",
336
  "model.layers.27.mlp.gate",
337
+ "model.layers.27.mlp.shared_expert.gate_proj",
338
+ "model.layers.27.mlp.shared_expert.up_proj",
339
+ "model.layers.27.mlp.shared_expert.down_proj",
340
  "model.layers.27.mlp.shared_expert_gate",
341
  "model.layers.28.linear_attn.in_proj_qkvz",
342
  "model.layers.28.linear_attn.in_proj_ba",
343
  "model.layers.28.linear_attn.out_proj",
344
  "model.layers.28.mlp.gate",
345
+ "model.layers.28.mlp.shared_expert.gate_proj",
346
+ "model.layers.28.mlp.shared_expert.up_proj",
347
+ "model.layers.28.mlp.shared_expert.down_proj",
348
  "model.layers.28.mlp.shared_expert_gate",
349
  "model.layers.29.linear_attn.in_proj_qkvz",
350
  "model.layers.29.linear_attn.in_proj_ba",
351
  "model.layers.29.linear_attn.out_proj",
352
  "model.layers.29.mlp.gate",
353
+ "model.layers.29.mlp.shared_expert.gate_proj",
354
+ "model.layers.29.mlp.shared_expert.up_proj",
355
+ "model.layers.29.mlp.shared_expert.down_proj",
356
  "model.layers.29.mlp.shared_expert_gate",
357
  "model.layers.30.linear_attn.in_proj_qkvz",
358
  "model.layers.30.linear_attn.in_proj_ba",
359
  "model.layers.30.linear_attn.out_proj",
360
  "model.layers.30.mlp.gate",
361
+ "model.layers.30.mlp.shared_expert.gate_proj",
362
+ "model.layers.30.mlp.shared_expert.up_proj",
363
+ "model.layers.30.mlp.shared_expert.down_proj",
364
  "model.layers.30.mlp.shared_expert_gate",
365
+ "model.layers.31.self_attn.q_proj",
366
+ "model.layers.31.self_attn.k_proj",
367
+ "model.layers.31.self_attn.v_proj",
368
+ "model.layers.31.self_attn.o_proj",
369
  "model.layers.31.mlp.gate",
370
+ "model.layers.31.mlp.shared_expert.gate_proj",
371
+ "model.layers.31.mlp.shared_expert.up_proj",
372
+ "model.layers.31.mlp.shared_expert.down_proj",
373
  "model.layers.31.mlp.shared_expert_gate",
374
  "model.layers.32.linear_attn.in_proj_qkvz",
375
  "model.layers.32.linear_attn.in_proj_ba",
376
  "model.layers.32.linear_attn.out_proj",
377
  "model.layers.32.mlp.gate",
378
+ "model.layers.32.mlp.shared_expert.gate_proj",
379
+ "model.layers.32.mlp.shared_expert.up_proj",
380
+ "model.layers.32.mlp.shared_expert.down_proj",
381
  "model.layers.32.mlp.shared_expert_gate",
382
  "model.layers.33.linear_attn.in_proj_qkvz",
383
  "model.layers.33.linear_attn.in_proj_ba",
384
  "model.layers.33.linear_attn.out_proj",
385
  "model.layers.33.mlp.gate",
386
+ "model.layers.33.mlp.shared_expert.gate_proj",
387
+ "model.layers.33.mlp.shared_expert.up_proj",
388
+ "model.layers.33.mlp.shared_expert.down_proj",
389
  "model.layers.33.mlp.shared_expert_gate",
390
  "model.layers.34.linear_attn.in_proj_qkvz",
391
  "model.layers.34.linear_attn.in_proj_ba",
392
  "model.layers.34.linear_attn.out_proj",
393
  "model.layers.34.mlp.gate",
394
+ "model.layers.34.mlp.shared_expert.gate_proj",
395
+ "model.layers.34.mlp.shared_expert.up_proj",
396
+ "model.layers.34.mlp.shared_expert.down_proj",
397
  "model.layers.34.mlp.shared_expert_gate",
398
+ "model.layers.35.self_attn.q_proj",
399
+ "model.layers.35.self_attn.k_proj",
400
+ "model.layers.35.self_attn.v_proj",
401
+ "model.layers.35.self_attn.o_proj",
402
  "model.layers.35.mlp.gate",
403
+ "model.layers.35.mlp.shared_expert.gate_proj",
404
+ "model.layers.35.mlp.shared_expert.up_proj",
405
+ "model.layers.35.mlp.shared_expert.down_proj",
406
  "model.layers.35.mlp.shared_expert_gate",
407
  "model.layers.36.linear_attn.in_proj_qkvz",
408
  "model.layers.36.linear_attn.in_proj_ba",
409
  "model.layers.36.linear_attn.out_proj",
410
  "model.layers.36.mlp.gate",
411
+ "model.layers.36.mlp.shared_expert.gate_proj",
412
+ "model.layers.36.mlp.shared_expert.up_proj",
413
+ "model.layers.36.mlp.shared_expert.down_proj",
414
  "model.layers.36.mlp.shared_expert_gate",
415
  "model.layers.37.linear_attn.in_proj_qkvz",
416
  "model.layers.37.linear_attn.in_proj_ba",
417
  "model.layers.37.linear_attn.out_proj",
418
  "model.layers.37.mlp.gate",
419
+ "model.layers.37.mlp.shared_expert.gate_proj",
420
+ "model.layers.37.mlp.shared_expert.up_proj",
421
+ "model.layers.37.mlp.shared_expert.down_proj",
422
  "model.layers.37.mlp.shared_expert_gate",
423
  "model.layers.38.linear_attn.in_proj_qkvz",
424
  "model.layers.38.linear_attn.in_proj_ba",
425
  "model.layers.38.linear_attn.out_proj",
426
  "model.layers.38.mlp.gate",
427
+ "model.layers.38.mlp.shared_expert.gate_proj",
428
+ "model.layers.38.mlp.shared_expert.up_proj",
429
+ "model.layers.38.mlp.shared_expert.down_proj",
430
  "model.layers.38.mlp.shared_expert_gate",
431
+ "model.layers.39.self_attn.q_proj",
432
+ "model.layers.39.self_attn.k_proj",
433
+ "model.layers.39.self_attn.v_proj",
434
+ "model.layers.39.self_attn.o_proj",
435
  "model.layers.39.mlp.gate",
436
+ "model.layers.39.mlp.shared_expert.gate_proj",
437
+ "model.layers.39.mlp.shared_expert.up_proj",
438
+ "model.layers.39.mlp.shared_expert.down_proj",
439
  "model.layers.39.mlp.shared_expert_gate",
440
  "model.layers.40.linear_attn.in_proj_qkvz",
441
  "model.layers.40.linear_attn.in_proj_ba",
442
  "model.layers.40.linear_attn.out_proj",
443
  "model.layers.40.mlp.gate",
444
+ "model.layers.40.mlp.shared_expert.gate_proj",
445
+ "model.layers.40.mlp.shared_expert.up_proj",
446
+ "model.layers.40.mlp.shared_expert.down_proj",
447
  "model.layers.40.mlp.shared_expert_gate",
448
  "model.layers.41.linear_attn.in_proj_qkvz",
449
  "model.layers.41.linear_attn.in_proj_ba",
450
  "model.layers.41.linear_attn.out_proj",
451
  "model.layers.41.mlp.gate",
452
+ "model.layers.41.mlp.shared_expert.gate_proj",
453
+ "model.layers.41.mlp.shared_expert.up_proj",
454
+ "model.layers.41.mlp.shared_expert.down_proj",
455
  "model.layers.41.mlp.shared_expert_gate",
456
  "model.layers.42.linear_attn.in_proj_qkvz",
457
  "model.layers.42.linear_attn.in_proj_ba",
458
  "model.layers.42.linear_attn.out_proj",
459
  "model.layers.42.mlp.gate",
460
+ "model.layers.42.mlp.shared_expert.gate_proj",
461
+ "model.layers.42.mlp.shared_expert.up_proj",
462
+ "model.layers.42.mlp.shared_expert.down_proj",
463
  "model.layers.42.mlp.shared_expert_gate",
464
+ "model.layers.43.self_attn.q_proj",
465
+ "model.layers.43.self_attn.k_proj",
466
+ "model.layers.43.self_attn.v_proj",
467
+ "model.layers.43.self_attn.o_proj",
468
  "model.layers.43.mlp.gate",
469
+ "model.layers.43.mlp.shared_expert.gate_proj",
470
+ "model.layers.43.mlp.shared_expert.up_proj",
471
+ "model.layers.43.mlp.shared_expert.down_proj",
472
  "model.layers.43.mlp.shared_expert_gate",
473
  "model.layers.44.linear_attn.in_proj_qkvz",
474
  "model.layers.44.linear_attn.in_proj_ba",
475
  "model.layers.44.linear_attn.out_proj",
476
  "model.layers.44.mlp.gate",
477
+ "model.layers.44.mlp.shared_expert.gate_proj",
478
+ "model.layers.44.mlp.shared_expert.up_proj",
479
+ "model.layers.44.mlp.shared_expert.down_proj",
480
  "model.layers.44.mlp.shared_expert_gate",
481
  "model.layers.45.linear_attn.in_proj_qkvz",
482
  "model.layers.45.linear_attn.in_proj_ba",
483
  "model.layers.45.linear_attn.out_proj",
484
  "model.layers.45.mlp.gate",
485
+ "model.layers.45.mlp.shared_expert.gate_proj",
486
+ "model.layers.45.mlp.shared_expert.up_proj",
487
+ "model.layers.45.mlp.shared_expert.down_proj",
488
  "model.layers.45.mlp.shared_expert_gate",
489
  "model.layers.46.linear_attn.in_proj_qkvz",
490
  "model.layers.46.linear_attn.in_proj_ba",
491
  "model.layers.46.linear_attn.out_proj",
492
  "model.layers.46.mlp.gate",
493
+ "model.layers.46.mlp.shared_expert.gate_proj",
494
+ "model.layers.46.mlp.shared_expert.up_proj",
495
+ "model.layers.46.mlp.shared_expert.down_proj",
496
  "model.layers.46.mlp.shared_expert_gate",
497
+ "model.layers.47.self_attn.q_proj",
498
+ "model.layers.47.self_attn.k_proj",
499
+ "model.layers.47.self_attn.v_proj",
500
+ "model.layers.47.self_attn.o_proj",
501
  "model.layers.47.mlp.gate",
502
+ "model.layers.47.mlp.shared_expert.gate_proj",
503
+ "model.layers.47.mlp.shared_expert.up_proj",
504
+ "model.layers.47.mlp.shared_expert.down_proj",
505
  "model.layers.47.mlp.shared_expert_gate",
506
  "lm_head"
507
  ],
 
510
  "quantization_status": "compressed",
511
  "sparsity_config": {},
512
  "transform_config": {},
513
+ "version": "0.11.1.a20250912"
514
  },
515
  "rms_norm_eps": 1e-06,
516
  "rope_scaling": null,
model-00001-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c6135a19f254eebd8f3d2394fd9d50d80326dcdfdcb8c312112ba2b1e42cd3f
3
- size 5002202400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b123302346e0c1a953599b3e4848b9238410aa78c5ec078a8780c0b92e6166c3
3
+ size 5002226064
model-00002-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa395cb1aad3c427e48b827590340d894af8773e3b703255fc25ae1d88a07083
3
- size 5002558984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a0e5064cbf8850c0911131269abdf7f49c3e0c539ef290ce13513dc5be683d
3
+ size 5002906536
model-00003-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cff4f0a8c8510b729c92c9757ebd66f1b60a83e874b81d6eac1fd941b96ff7a
3
- size 5002581480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4facabf40f7f02c06778a220a358a363a6fc22fb742b9b388edd951cefc2859d
3
+ size 5002469472
model-00004-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb589fb3c579a054305a1f53a7f5a195a119a12be3b8d7c90f655bb8c9b243a3
3
- size 5002914632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c029eb5c11d18b2183b92c5659bc9302d5c054aea9d2f384988def44209a37fe
3
+ size 5002480424
model-00005-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0658cd17d934497288bf7eb0824a36ea362a9f3a11fdc4fcf04acbdafa06aa4a
3
- size 5002582176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96ec33e8153f4fe978b422eed68f1ce1ad39258f9ca820d5cb97f9d87b88230f
3
+ size 5002930176
model-00006-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3672fb6b070435c487bce0f91e6c7917e4bc3e5e37607788c5920f74b1064ee
3
- size 5002582968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07b8256cf1e3ec255bf915f5969722a9ac602acbf030b5eb75eb7d86ba610558
3
+ size 4991135440
model-00007-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcbf8b50036ea448bd0742658d33cbf390b9ae6df167fe78931536ea0d5ace6e
3
- size 5002946736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c0e4ac72340dea5c44fe9f9448926eed3e5037ecf7e868436d7a9e8664b65c
3
+ size 5002894032
model-00008-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8350739c18d519126b05cc53737bb22d0615c2c0d9136615c82d1873e3216fec
3
- size 5002648768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d81372dfdc7193d189011d9cc4c541df1434cd5a9141149de9861829924932c
3
+ size 5002929656
model-00009-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:041d58c26c913cc8c3c9839e711830b773f841b927fe043ef66b37929016f6e0
3
- size 5002582968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:698343c0b33ccc335b7838eee4816cded3aec63ecbd96e48e831e95fd32c9c5f
3
+ size 5002471416
model-00010-of-00010.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:445460ac907283adb4cff55a8b53f6dde81742ed3b2ba3b6b8ab35ff236a4dfc
3
- size 2534274448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4cf5cf95d61c3837c0e23834c73a33843a1deec8a873671b45f82be5b6933c6
3
+ size 3232720920
model.safetensors.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc7260428d31ae24ba1bee018b10ca08d05f7bf68dc13ab2619854e1f6c23aba
3
- size 21604155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7de3074ca86e26d7798358028f7e09b856066ab3b0f6b76ce120b209dc1cd25d
3
+ size 21565735
recipe.yaml CHANGED
@@ -1,5 +1,5 @@
1
- quant_stage:
2
- quant_modifiers:
3
  AWQModifier:
4
  config_groups:
5
  group_0:
@@ -13,15 +13,16 @@ quant_stage:
13
  block_structure: null
14
  dynamic: false
15
  actorder: null
16
- observer: minmax
17
  observer_kwargs: {}
18
  input_activations: null
19
  output_activations: null
20
  format: null
21
  targets: [Linear]
22
- ignore: [lm_head, model.embed_tokens, 're:mtp.*', 're:.*input_layernorm$', 're:.*post_attention_layernorm$',
23
- model.norm, 're:.*mlp[.]gate$', 're:.*shared_expert_gate$', 're:.*linear_attn.*',
24
- 're:.*norm$', 're:.*rotary_emb.*', 're:.*q_norm$', 're:.*k_norm$']
 
25
  mappings:
26
  - smooth_layer: re:.*input_layernorm$
27
  balance_layers: ['re:.*self_attn[.]q_proj$', 're:.*self_attn[.]k_proj$', 're:.*self_attn[.]v_proj$',
@@ -32,4 +33,6 @@ quant_stage:
32
  balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
33
  - smooth_layer: re:.*up_proj$
34
  balance_layers: ['re:.*down_proj$']
 
 
35
  duo_scaling: true
 
1
+ default_stage:
2
+ default_modifiers:
3
  AWQModifier:
4
  config_groups:
5
  group_0:
 
13
  block_structure: null
14
  dynamic: false
15
  actorder: null
16
+ observer: mse
17
  observer_kwargs: {}
18
  input_activations: null
19
  output_activations: null
20
  format: null
21
  targets: [Linear]
22
+ ignore: [model.embed_tokens, 're:.*input_layernorm$', 're:.*linear_attn.*', 're:.*norm.*',
23
+ 're:.*RMSNorm.*', 're:.*rotary.*', 're:.*shared_expert.*', 're:.*shared_expert_gate$',
24
+ 're:.*mlp[.]gate$', 're:.*router.*', 're:.*post_attention_layernorm$', 're:.*self_attn.*',
25
+ 're:mtp.*', lm_head]
26
  mappings:
27
  - smooth_layer: re:.*input_layernorm$
28
  balance_layers: ['re:.*self_attn[.]q_proj$', 're:.*self_attn[.]k_proj$', 're:.*self_attn[.]v_proj$',
 
33
  balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
34
  - smooth_layer: re:.*up_proj$
35
  balance_layers: ['re:.*down_proj$']
36
+ - smooth_layer: re:.*linear_attn[.]norm$
37
+ balance_layers: ['re:.*linear_attn[.]out_proj$']
38
  duo_scaling: true
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae1a036a9837df9caeebb840d09d80e8feef0f6d2bae982970d1ad34f5946aff
3
- size 11422753
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654