Upload folder using huggingface_hub

249c2d2 verified 3 months ago

1.09 kB

	custom="
	#/usr/bin/env bash

	# 93 Repeating Layers [0-92]

	# Attention
	blk\..\.attn_q.=iq5_ks_r4
	blk\..\.attn_k.=iq6_k
	blk\..\.attn_v.=iq6_k
	blk\..\.attn_output.=iq5_ks_r4

	# First 3 Dense Layers [0-2]
	blk\..*\.ffn_down\.weight=iq4_kt
	blk\..*\.ffn_(gate\|up)\.weight=iq4_kt

	# Shared Expert Layers [3-92]
	blk\..*\.ffn_down_shexp\.weight=iq5_ks_r4
	blk\..*\.ffn_(gate\|up)_shexp\.weight=iq4_ks_r4

	# Routed Experts Layers [3-92]
	blk\..*\.ffn_down_exps\.weight=iq2_kl
	blk\..*\.ffn_(gate\|up)_exps\.weight=iq2_kl

	# NextN MTP Layer [92]
	blk\..*\.nextn\.embed_tokens\.weight=iq5_ks
	blk\..*\.nextn\.shared_head_head\.weight=iq5_ks
	blk\..*\.nextn\.eh_proj\.weight=q8_0

	# Non-Repeating Layers
	token_embd\.weight=iq4_k
	output\.weight=iq6_k
	"

	custom=$(
	echo "$custom" \| grep -v '^#' \| \
	sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
	)

	./build/bin/llama-quantize \
	--custom-q "$custom" \
	--imatrix /home/alpha/Models/GGUF/GLM-4.5-Base.imatrix.dat \
	/home/alpha/Models/GGUF/GLM-Base-BF16/zai-org_GLM-4-00001-of-00046.gguf \
	/home/alpha/Models/GGUF/GLM-4.5-IQ2_KL.gguf \
	IQ2_KL \
	8