File size: 1,475 Bytes
a683148 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
#!/usr/bin/env python3
# Run MAP-NEO Mini training pipeline
import subprocess
import sys
from pathlib import Path
def run_command(cmd, description):
"""Run a command and handle errors"""
print(f"\n{'='*50}")
print(f"Running: {description}")
print(f"Command: {cmd}")
print(f"{'='*50}")
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"Error in {description}:")
print(result.stderr)
sys.exit(1)
else:
print(f"Success: {description}")
if result.stdout:
print(result.stdout)
def main():
print("MAP-NEO Mini Training Pipeline")
print("Optimized for RTX 5070 8GB VRAM")
# Step 1: Data preprocessing
if not Path("data/tokens/packed_1024.txt").exists():
print("\nStep 1: Data preprocessing")
run_command(
"python data_prep.py --num_docs 20000 --seq_length 1024",
"Data preprocessing"
)
else:
print("\nSkipping data preprocessing (data exists)")
# Step 2: Model training
print("\nStep 2: Starting model training")
run_command(
"python train_neo.py",
"Model training"
)
print("\n" + "="*50)
print("Training pipeline completed!")
print("Check checkpoints/ directory for saved models")
print("="*50)
if __name__ == "__main__":
main()
|