cpatonn
/

Ling-flash-2.0-AWQ-4bit

Text Generation

compressed-tensors

Model card Files Files and versions

Ling-flash-2.0-AWQ-4bit / recipe.yaml

cpatonn's picture

Upload folder using huggingface_hub

db575f6 verified about 2 months ago

history blame contribute delete

1.09 kB

	quant_stage:
	quant_modifiers:
	AWQModifier:
	config_groups:
	group_0:
	targets: [Linear]
	weights:
	num_bits: 4
	type: int
	symmetric: true
	group_size: 32
	strategy: group
	block_structure: null
	dynamic: false
	actorder: null
	observer: mse
	observer_kwargs: {}
	input_activations: null
	output_activations: null
	format: null
	targets: [Linear]
	ignore: [word_embeddings, 're:model.layers.0[.].', 're:.input_layernorm', 're:.*post_attention_layernorm',
	're:.layernorm.', 're:.attention[.]dense', 're:.shared_experts.', 're:.mlp[.]gate.*',
	model.norm, lm_head]
	mappings:
	- smooth_layer: re:.*input_layernorm
	balance_layers: ['re:.*query_key_value']
	- smooth_layer: re:.*post_attention_layernorm
	balance_layers: ['re:.gate_proj', 're:.up_proj']
	- smooth_layer: re:.*up_proj$
	balance_layers: ['re:.*down_proj$']
	duo_scaling: true