| #!/usr/bin/env python3 | |
| # Run MAP-NEO Mini training pipeline | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| def run_command(cmd, description): | |
| """Run a command and handle errors""" | |
| print(f"\n{'='*50}") | |
| print(f"Running: {description}") | |
| print(f"Command: {cmd}") | |
| print(f"{'='*50}") | |
| result = subprocess.run(cmd, shell=True, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"Error in {description}:") | |
| print(result.stderr) | |
| sys.exit(1) | |
| else: | |
| print(f"Success: {description}") | |
| if result.stdout: | |
| print(result.stdout) | |
| def main(): | |
| print("MAP-NEO Mini Training Pipeline") | |
| print("Optimized for RTX 5070 8GB VRAM") | |
| # Step 1: Data preprocessing | |
| if not Path("data/tokens/packed_1024.txt").exists(): | |
| print("\nStep 1: Data preprocessing") | |
| run_command( | |
| "python data_prep.py --num_docs 20000 --seq_length 1024", | |
| "Data preprocessing" | |
| ) | |
| else: | |
| print("\nSkipping data preprocessing (data exists)") | |
| # Step 2: Model training | |
| print("\nStep 2: Starting model training") | |
| run_command( | |
| "python train_neo.py", | |
| "Model training" | |
| ) | |
| print("\n" + "="*50) | |
| print("Training pipeline completed!") | |
| print("Check checkpoints/ directory for saved models") | |
| print("="*50) | |
| if __name__ == "__main__": | |
| main() | |