""" Quick loader for quantized HunyuanImage-3.0 model. Generated automatically by hunyuan_quantize_nf4.py """ import torch from transformers import AutoModelForCausalLM, BitsAndBytesConfig def load_quantized_hunyuan(model_path="a:\Comfy25\ComfyUI_windows_portable\ComfyUI\models\HunyuanImage-3-NF4"): """Load the NF4 quantized HunyuanImage-3.0 model.""" quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16, ) model = AutoModelForCausalLM.from_pretrained( model_path, quantization_config=quant_config, device_map="cuda:0", # Load entirely on GPU 0 trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="sdpa", ) # Load tokenizer model.load_tokenizer(model_path) return model if __name__ == "__main__": print("Loading quantized model...") model = load_quantized_hunyuan() print("Model loaded successfully!") print(f"Device map: {model.hf_device_map}")