#!/usr/bin/env python3 """ 使用 Transformers 标准 API 微调 Qwen3-4B-Instruct-2507 模型 适用于 macOS(不使用 Unsloth) """ import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling ) from peft import ( LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType ) from datasets import Dataset import json from transformers import BitsAndBytesConfig print("="*80) print("🚀 开始微调 Qwen3-4B-Instruct-2507 模型 (标准版)") print("="*80) # 模型路径 BASE_MODEL_PATH = "/Users/cc/LLM_Models/Qwen3-4B-Instruct-2507" OUTPUT_DIR = "health_advisor_model_qwen3_4b" # 训练配置 max_seq_length = 512 # 进一步减小序列长度以加快训练 load_in_4bit = False # macOS 不支持 4-bit 量化,使用 FP32 print(f"\n📦 加载基础模型: {BASE_MODEL_PATH}") print(f"📊 训练配置:") print(f" - 序列长度: {max_seq_length}") print(f" - 量化: {'4-bit' if load_in_4bit else 'FP32'}") print(f" - 输出目录: {OUTPUT_DIR}") print("="*80) # 加载 Tokenizer print("\n📝 加载 Tokenizer...") tokenizer = AutoTokenizer.from_pretrained( BASE_MODEL_PATH, trust_remote_code=True ) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # 加载模型 print("\n🤖 加载模型(这可能需要几分钟)...") model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_PATH, device_map="auto", torch_dtype=torch.float32, # macOS 使用 FP32 trust_remote_code=True, use_cache=False, # 启用 gradient checkpointing low_cpu_mem_usage=True # 降低CPU内存使用 ) # 配置 LoRA print("\n🔧 配置 LoRA 适配器...") lora_config = LoraConfig( r=16, lora_alpha=16, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0, bias="none", task_type=TaskType.CAUSAL_LM ) # macOS 不需要 prepare_model_for_kbit_training (没有使用4-bit) # 但需要启用训练模式 model = get_peft_model(model, lora_config) print("✅ LoRA 配置完成!") print(f"可训练参数: {model.num_parameters(only_trainable=True)/1e6:.2f}M") # 加载并处理训练数据 print(f"\n📂 加载训练数据...") with open('../data_generation/data/training_data.json', 'r', encoding='utf-8') as f: train_data_raw = json.load(f) print("📝 转换数据格式...") train_data = [] for item in train_data_raw: # 合并 instruction 和 input 作为输入 if "input" in item and item["input"]: text_input = f"{item['instruction']}\n\n{item['input']}" else: text_input = item['instruction'] # 输出格式 text_output = item['output'] # 完整的训练文本 train_text = f"{text_input}\n\n{text_output}" train_data.append({"text": train_text}) print(f"✅ 加载了 {len(train_data)} 条训练数据") # 数据预处理函数 def preprocess_function(examples): # Tokenize inputs = tokenizer( examples["text"], max_length=max_seq_length, truncation=True, padding="max_length", return_tensors="pt" ) inputs["labels"] = inputs["input_ids"].clone() return inputs # 创建 Dataset train_dataset = Dataset.from_list(train_data) train_dataset = train_dataset.map( preprocess_function, batched=True, remove_columns=train_dataset.column_names ) # 数据整理器 data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=False ) # 设置训练参数 training_args = TrainingArguments( per_device_train_batch_size=1, gradient_accumulation_steps=8, # 减少梯度累积以提高速度 warmup_steps=50, max_steps=100, # 减少训练步数,加快训练速度 learning_rate=2e-4, fp16=False, # macOS MPS 不支持 FP16 bf16=False, logging_steps=1, optim="adamw_torch", weight_decay=0.01, lr_scheduler_type="linear", seed=3407, output_dir=OUTPUT_DIR, save_strategy="steps", save_steps=50, eval_strategy="no", report_to="tensorboard", logging_dir=f"{OUTPUT_DIR}/logs", dataloader_num_workers=0, log_level="info", save_total_limit=3, gradient_checkpointing=False, # macOS MPS 暂时关闭 ) print("\n🎯 训练参数:") print(f" - Batch size: {training_args.per_device_train_batch_size}") print(f" - Gradient accumulation: {training_args.gradient_accumulation_steps}") print(f" - Max steps: {training_args.max_steps}") print(f" - Learning rate: {training_args.learning_rate}") print(f" - Output dir: {training_args.output_dir}") print("="*80) # 创建训练器 print("\n🚀 开始训练...") trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, data_collator=data_collator, ) # 开始训练 print("\n" + "="*80) print("📊 训练进度监控提示:") print(" - 每步都会在终端显示 loss 和 learning_rate") print(" - 实时查看 TensorBoard: tensorboard --logdir=health_advisor_model_qwen3_4b/logs") print("="*80) print("") trainer_stats = trainer.train() print("\n✅ 训练完成!") print(f"\n📈 最终训练统计:") print(f" - 训练步数: {trainer_stats.global_step}") print(f" - 训练时间: {trainer_stats.metrics.get('train_runtime', 'N/A'):.2f} 秒") print(f" - 平均耗时/步: {trainer_stats.metrics.get('train_samples_per_second', 'N/A'):.2f} 样本/秒") # 保存模型 print(f"\n💾 保存模型到: {OUTPUT_DIR}") model.save_pretrained(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) print("\n" + "="*80) print("🎉 模型微调完成!") print(f"📁 保存位置: {OUTPUT_DIR}") print("="*80)