#!/usr/bin/env python3 # Run MAP-NEO Mini training pipeline import subprocess import sys from pathlib import Path def run_command(cmd, description): """Run a command and handle errors""" print(f"\n{'='*50}") print(f"Running: {description}") print(f"Command: {cmd}") print(f"{'='*50}") result = subprocess.run(cmd, shell=True, capture_output=True, text=True) if result.returncode != 0: print(f"Error in {description}:") print(result.stderr) sys.exit(1) else: print(f"Success: {description}") if result.stdout: print(result.stdout) def main(): print("MAP-NEO Mini Training Pipeline") print("Optimized for RTX 5070 8GB VRAM") # Step 1: Data preprocessing if not Path("data/tokens/packed_1024.txt").exists(): print("\nStep 1: Data preprocessing") run_command( "python data_prep.py --num_docs 20000 --seq_length 1024", "Data preprocessing" ) else: print("\nSkipping data preprocessing (data exists)") # Step 2: Model training print("\nStep 2: Starting model training") run_command( "python train_neo.py", "Model training" ) print("\n" + "="*50) print("Training pipeline completed!") print("Check checkpoints/ directory for saved models") print("="*50) if __name__ == "__main__": main()