Map-NEO / run_training.py
Austin207's picture
Upload folder using huggingface_hub
a683148 verified
#!/usr/bin/env python3
# Run MAP-NEO Mini training pipeline
import subprocess
import sys
from pathlib import Path
def run_command(cmd, description):
"""Run a command and handle errors"""
print(f"\n{'='*50}")
print(f"Running: {description}")
print(f"Command: {cmd}")
print(f"{'='*50}")
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"Error in {description}:")
print(result.stderr)
sys.exit(1)
else:
print(f"Success: {description}")
if result.stdout:
print(result.stdout)
def main():
print("MAP-NEO Mini Training Pipeline")
print("Optimized for RTX 5070 8GB VRAM")
# Step 1: Data preprocessing
if not Path("data/tokens/packed_1024.txt").exists():
print("\nStep 1: Data preprocessing")
run_command(
"python data_prep.py --num_docs 20000 --seq_length 1024",
"Data preprocessing"
)
else:
print("\nSkipping data preprocessing (data exists)")
# Step 2: Model training
print("\nStep 2: Starting model training")
run_command(
"python train_neo.py",
"Model training"
)
print("\n" + "="*50)
print("Training pipeline completed!")
print("Check checkpoints/ directory for saved models")
print("="*50)
if __name__ == "__main__":
main()