File size: 1,378 Bytes
0e182d9
 
 
 
 
 
 
137fdcf
 
 
0e182d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
---
base_model:
- zai-org/GLM-4.5
base_model_relation: quantized
quantized_by: ddh0
---

# ddh0/GLM-4.5-3.34bpw.gguf

This repository contains a custom 3.34bpw GGUF quantization of [GLM-4.5](https://huggingface.co/zai-org/GLM-4.5), to be used with [llama.cpp](https://github.com/ggml-org/llama.cpp).

```bash
IMATRIX=~/imatrices/zai-org_GLM-4.5-imatrix.gguf
TYPE_EMBD=Q8_0
TYPE_SHEXP=Q8_0
TYPE_FFN_GATE=IQ4_XS
TYPE_FFN_UP=IQ4_XS
TYPE_FFN_DOWN=IQ4_XS
TYPE_FFN_GATE_EXPS=IQ3_XXS
TYPE_FFN_UP_EXPS=IQ3_XXS
TYPE_FFN_DOWN_EXPS=IQ3_XXS
TYPE_ATTN_K=Q8_0
TYPE_ATTN_Q=Q8_0
TYPE_ATTN_V=Q8_0
TYPE_ATTN_O=Q8_0
TYPE_OUTPUT=Q8_0
TYPE_DEFAULT=Q8_0
SRC_GGUF=~/gguf/GLM-4.5-bf16.gguf
DST_GGUF=~/gguf/GLM-4.5-3.34bpw.gguf

llama-quantize \
--token-embedding-type $TYPE_EMBD \
--tensor-type ffn_gate=$TYPE_FFN_GATE \
--tensor-type ffn_up=$TYPE_FFN_UP \
--tensor-type ffn_down=$TYPE_FFN_DOWN \
--tensor-type ffn_gate_shexp=$TYPE_SHEXP \
--tensor-type ffn_up_shexp=$TYPE_SHEXP \
--tensor-type ffn_down_shexp=$TYPE_SHEXP \
--tensor-type ffn_gate_exps=$TYPE_FFN_GATE_EXPS \
--tensor-type ffn_up_exps=$TYPE_FFN_UP_EXPS \
--tensor-type ffn_down_exps=$TYPE_FFN_DOWN_EXPS \
--tensor-type attn_k=$TYPE_ATTN_K \
--tensor-type attn_q=$TYPE_ATTN_Q \
--tensor-type attn_v=$TYPE_ATTN_V \
--tensor-type attn_output=$TYPE_ATTN_O \
--output-tensor-type $TYPE_OUTPUT \
$SRC_GGUF $DST_GGUF $TYPE_DEFAULT $(nproc)
```