| #SBATCH --job-name=gpt2_train | |
| #SBATCH --nodes=1 | |
| #SBATCH --ntasks-per-node=1 | |
| #SBATCH --cpus-per-task=32 | |
| #SBATCH --time=26:00:00 #Request 24 hours | |
| #SBATCH --mem=128GB #Request 128GB per node | |
| #SBATCH --partition=gpu #Request the GPU partition/queue | |
| #SBATCH --gres=gpu:a100:1 #Request one A100 GPU to use | |
| #SBATCH --output=gpt2_train.%j.log #Redirect stdout/err to file | |
| # Run the training script | |
| python train.py --config configs/config.yaml |