Add IQ3_KS slightly smaller quant
Browse files- README.md +48 -0
- images/perplexity.png +2 -2
README.md
CHANGED
|
@@ -221,6 +221,54 @@ numactl -N 1 -m 1 \
|
|
| 221 |
|
| 222 |
</details>
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
## `IQ2_KL` 81.866 GiB (2.991 BPW)
|
| 225 |
Final estimate: PPL = 4.6608 +/- 0.02720
|
| 226 |
|
|
|
|
| 221 |
|
| 222 |
</details>
|
| 223 |
|
| 224 |
+
## `IQ3_KS` 101.308 GiB (3.702 BPW)
|
| 225 |
+
Final estimate: PPL = 4.3718 +/- 0.02509
|
| 226 |
+
|
| 227 |
+
<details>
|
| 228 |
+
|
| 229 |
+
<summary>👈 Secret Recipe</summary>
|
| 230 |
+
|
| 231 |
+
```bash
|
| 232 |
+
#!/usr/bin/env bash
|
| 233 |
+
|
| 234 |
+
# Repeating Layers [0-93]
|
| 235 |
+
|
| 236 |
+
custom="
|
| 237 |
+
# Attention
|
| 238 |
+
blk\..*\.attn_q.*=iq6_k
|
| 239 |
+
blk\..*\.attn_k.*=q8_0
|
| 240 |
+
blk\..*\.attn_v.*=q8_0
|
| 241 |
+
blk\..*\.attn_output.*=iq6_k
|
| 242 |
+
|
| 243 |
+
# Routed Experts
|
| 244 |
+
blk\.(0|1|2|3)\.ffn_down_exps\.weight=iq5_ks
|
| 245 |
+
blk\.(0|1|2|3)\.ffn_(gate|up)_exps\.weight=iq4_ks
|
| 246 |
+
blk\..*\.ffn_down_exps\.weight=iq4_ks
|
| 247 |
+
blk\..*\.ffn_(gate|up)_exps\.weight=iq3_ks
|
| 248 |
+
|
| 249 |
+
# Token Embedding
|
| 250 |
+
token_embd\.weight=iq4_k
|
| 251 |
+
output\.weight=iq6_k
|
| 252 |
+
"
|
| 253 |
+
|
| 254 |
+
custom=$(
|
| 255 |
+
echo "$custom" | grep -v '^#' | \
|
| 256 |
+
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
numactl -N 0 -m 0 \
|
| 260 |
+
./build/bin/llama-quantize \
|
| 261 |
+
--custom-q "$custom" \
|
| 262 |
+
--imatrix /mnt/raid/models/ubergarm/Qwen3-235B-A22B-Thinking-2507-GGUF/imatrix-Qwen3-235B-A22B-Thinking-2507-BF16.dat \
|
| 263 |
+
/mnt/raid/models/ubergarm/Qwen3-235B-A22B-Thinking-2507-GGUF/Qwen3-235B-A22B-Thinking-2507-BF16-00001-of-00010.gguf \
|
| 264 |
+
/mnt/raid/models/ubergarm/Qwen3-235B-A22B-Thinking-2507-GGUF/Qwen3-235B-A22B-Thinking-2507-IQ3_KS.gguf \
|
| 265 |
+
IQ3_KS \
|
| 266 |
+
192
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
</details>
|
| 270 |
+
|
| 271 |
+
|
| 272 |
## `IQ2_KL` 81.866 GiB (2.991 BPW)
|
| 273 |
Final estimate: PPL = 4.6608 +/- 0.02720
|
| 274 |
|
images/perplexity.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|