File size: 1,092 Bytes

249c2d2

custom="
#/usr/bin/env bash

# 93 Repeating Layers [0-92]

# Attention
blk\..*\.attn_q.*=iq5_ks_r4
blk\..*\.attn_k.*=iq6_k
blk\..*\.attn_v.*=iq6_k
blk\..*\.attn_output.*=iq5_ks_r4

# First 3 Dense Layers [0-2]
blk\..*\.ffn_down\.weight=iq4_kt
blk\..*\.ffn_(gate|up)\.weight=iq4_kt

# Shared Expert Layers [3-92]
blk\..*\.ffn_down_shexp\.weight=iq5_ks_r4
blk\..*\.ffn_(gate|up)_shexp\.weight=iq4_ks_r4

# Routed Experts Layers [3-92]
blk\..*\.ffn_down_exps\.weight=iq2_kl
blk\..*\.ffn_(gate|up)_exps\.weight=iq2_kl

# NextN MTP Layer [92]
blk\..*\.nextn\.embed_tokens\.weight=iq5_ks
blk\..*\.nextn\.shared_head_head\.weight=iq5_ks
blk\..*\.nextn\.eh_proj\.weight=q8_0

# Non-Repeating Layers
token_embd\.weight=iq4_k
output\.weight=iq6_k
"

custom=$(
  echo "$custom" | grep -v '^#' | \
  sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
)

./build/bin/llama-quantize \
    --custom-q "$custom" \
    --imatrix /home/alpha/Models/GGUF/GLM-4.5-Base.imatrix.dat \
    /home/alpha/Models/GGUF/GLM-Base-BF16/zai-org_GLM-4-00001-of-00046.gguf \
    /home/alpha/Models/GGUF/GLM-4.5-IQ2_KL.gguf \
    IQ2_KL \
    8