File size: 1,378 Bytes
0e182d9 137fdcf 0e182d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
---
base_model:
- zai-org/GLM-4.5
base_model_relation: quantized
quantized_by: ddh0
---
# ddh0/GLM-4.5-3.34bpw.gguf
This repository contains a custom 3.34bpw GGUF quantization of [GLM-4.5](https://huggingface.co/zai-org/GLM-4.5), to be used with [llama.cpp](https://github.com/ggml-org/llama.cpp).
```bash
IMATRIX=~/imatrices/zai-org_GLM-4.5-imatrix.gguf
TYPE_EMBD=Q8_0
TYPE_SHEXP=Q8_0
TYPE_FFN_GATE=IQ4_XS
TYPE_FFN_UP=IQ4_XS
TYPE_FFN_DOWN=IQ4_XS
TYPE_FFN_GATE_EXPS=IQ3_XXS
TYPE_FFN_UP_EXPS=IQ3_XXS
TYPE_FFN_DOWN_EXPS=IQ3_XXS
TYPE_ATTN_K=Q8_0
TYPE_ATTN_Q=Q8_0
TYPE_ATTN_V=Q8_0
TYPE_ATTN_O=Q8_0
TYPE_OUTPUT=Q8_0
TYPE_DEFAULT=Q8_0
SRC_GGUF=~/gguf/GLM-4.5-bf16.gguf
DST_GGUF=~/gguf/GLM-4.5-3.34bpw.gguf
llama-quantize \
--token-embedding-type $TYPE_EMBD \
--tensor-type ffn_gate=$TYPE_FFN_GATE \
--tensor-type ffn_up=$TYPE_FFN_UP \
--tensor-type ffn_down=$TYPE_FFN_DOWN \
--tensor-type ffn_gate_shexp=$TYPE_SHEXP \
--tensor-type ffn_up_shexp=$TYPE_SHEXP \
--tensor-type ffn_down_shexp=$TYPE_SHEXP \
--tensor-type ffn_gate_exps=$TYPE_FFN_GATE_EXPS \
--tensor-type ffn_up_exps=$TYPE_FFN_UP_EXPS \
--tensor-type ffn_down_exps=$TYPE_FFN_DOWN_EXPS \
--tensor-type attn_k=$TYPE_ATTN_K \
--tensor-type attn_q=$TYPE_ATTN_Q \
--tensor-type attn_v=$TYPE_ATTN_V \
--tensor-type attn_output=$TYPE_ATTN_O \
--output-tensor-type $TYPE_OUTPUT \
$SRC_GGUF $DST_GGUF $TYPE_DEFAULT $(nproc)
```
|