custom=" #/usr/bin/env bash # 93 Repeating Layers [0-92] # Attention blk\..*\.attn_q.*=iq5_ks_r4 blk\..*\.attn_k.*=iq6_k blk\..*\.attn_v.*=iq6_k blk\..*\.attn_output.*=iq5_ks_r4 # First 3 Dense Layers [0-2] blk\..*\.ffn_down\.weight=iq4_kt blk\..*\.ffn_(gate|up)\.weight=iq4_kt # Shared Expert Layers [3-92] blk\..*\.ffn_down_shexp\.weight=iq5_ks_r4 blk\..*\.ffn_(gate|up)_shexp\.weight=iq4_ks_r4 # Routed Experts Layers [3-92] blk\..*\.ffn_down_exps\.weight=iq2_kl blk\..*\.ffn_(gate|up)_exps\.weight=iq2_kl # NextN MTP Layer [92] blk\..*\.nextn\.embed_tokens\.weight=iq5_ks blk\..*\.nextn\.shared_head_head\.weight=iq5_ks blk\..*\.nextn\.eh_proj\.weight=q8_0 # Non-Repeating Layers token_embd\.weight=iq4_k output\.weight=iq6_k " custom=$( echo "$custom" | grep -v '^#' | \ sed -Ez 's:\n+:,:g;s:,$::;s:^,::' ) ./build/bin/llama-quantize \ --custom-q "$custom" \ --imatrix /home/alpha/Models/GGUF/GLM-4.5-Base.imatrix.dat \ /home/alpha/Models/GGUF/GLM-Base-BF16/zai-org_GLM-4-00001-of-00046.gguf \ /home/alpha/Models/GGUF/GLM-4.5-IQ2_KL.gguf \ IQ2_KL \ 8