| { | |
| "quantization_method": "bitsandbytes_nf4", | |
| "load_in_4bit": true, | |
| "bnb_4bit_quant_type": "nf4", | |
| "bnb_4bit_use_double_quant": true, | |
| "bnb_4bit_compute_dtype": "torch.bfloat16", | |
| "expected_vram_gb": 45, | |
| "notes": "Load with BitsAndBytesConfig for NF4 quantization. Attention layers kept in full precision.", | |
| "attention_layers_quantized": false | |
| } |