| custom=" | |
| #/usr/bin/env bash | |
| # 93 Repeating Layers [0-92] | |
| # Attention | |
| blk\..*\.attn_q.*=iq5_ks_r4 | |
| blk\..*\.attn_k.*=iq6_k | |
| blk\..*\.attn_v.*=iq6_k | |
| blk\..*\.attn_output.*=iq5_ks_r4 | |
| # First 3 Dense Layers [0-2] | |
| blk\..*\.ffn_down\.weight=iq4_kt | |
| blk\..*\.ffn_(gate|up)\.weight=iq4_kt | |
| # Shared Expert Layers [3-92] | |
| blk\..*\.ffn_down_shexp\.weight=iq5_ks_r4 | |
| blk\..*\.ffn_(gate|up)_shexp\.weight=iq4_ks_r4 | |
| # Routed Experts Layers [3-92] | |
| blk\..*\.ffn_down_exps\.weight=iq2_kl | |
| blk\..*\.ffn_(gate|up)_exps\.weight=iq2_kl | |
| # NextN MTP Layer [92] | |
| blk\..*\.nextn\.embed_tokens\.weight=iq5_ks | |
| blk\..*\.nextn\.shared_head_head\.weight=iq5_ks | |
| blk\..*\.nextn\.eh_proj\.weight=q8_0 | |
| # Non-Repeating Layers | |
| token_embd\.weight=iq4_k | |
| output\.weight=iq6_k | |
| " | |
| custom=$( | |
| echo "$custom" | grep -v '^#' | \ | |
| sed -Ez 's:\n+:,:g;s:,$::;s:^,::' | |
| ) | |
| ./build/bin/llama-quantize \ | |
| --custom-q "$custom" \ | |
| --imatrix /home/alpha/Models/GGUF/GLM-4.5-Base.imatrix.dat \ | |
| /home/alpha/Models/GGUF/GLM-Base-BF16/zai-org_GLM-4-00001-of-00046.gguf \ | |
| /home/alpha/Models/GGUF/GLM-4.5-IQ2_KL.gguf \ | |
| IQ2_KL \ | |
| 8 |