diff --git "a/modded-nanogpt-train.16700835.err" "b/modded-nanogpt-train.16700835.err" new file mode 100644--- /dev/null +++ "b/modded-nanogpt-train.16700835.err" @@ -0,0 +1,47 @@ +++ head -n 1 +++ scontrol show hostnames g061 ++ export MASTER_ADDR=g061 ++ MASTER_ADDR=g061 ++++ tail -c 4 ++++ echo -n 16700835 +++ expr 10000 + 0835 ++ export MASTER_PORT=10835 ++ MASTER_PORT=10835 ++ export RANK=0 ++ RANK=0 ++ export WORLD_SIZE=1 ++ WORLD_SIZE=1 ++ echo 'SLURM Job ID: 16700835' ++ echo 'SLURM Node List: g061' ++ echo 'SLURM Number of Nodes: 1' ++ echo 'SLURM Number of Tasks: 1' ++ echo 'SLURM Tasks per Node: 1' ++ echo 'SLURM Local ID: 0' ++ echo 'SLURM Procedure ID: 0' ++ echo 'SLURM Node ID: 0' ++ echo 'MASTER_ADDR: g061' ++ echo 'MASTER_PORT: 10835' ++ echo 'RANK: 0' ++ echo 'WORLD_SIZE: 1' ++ echo 'CUDA_VISIBLE_DEVICES: 0' ++ cd /home/henrycastillo/modded-nanogpt ++ ./run.sh +/tmp/job.16700835/torchinductor_henrycastillo/4p/c4poa4cjpk5umc22d6bca6ko72veelb6kbqznwhzaqd5kvlwop7v.py:538: UserWarning: Logical operators 'and' and 'or' are deprecated for non-scalar tensors; please use '&' or '|' instead + scatter_mask = offs_m2[:, None] < Q_LEN and offs_n2[None, :] < KV_LEN +UserWarning: Enable tracemalloc to get the object allocation traceback +/tmp/job.16700835/torchinductor_henrycastillo/4p/c4poa4cjpk5umc22d6bca6ko72veelb6kbqznwhzaqd5kvlwop7v.py:538: UserWarning: Logical operators 'and' and 'or' are deprecated for non-scalar tensors; please use '&' or '|' instead + scatter_mask = offs_m2[:, None] < Q_LEN and offs_n2[None, :] < KV_LEN +UserWarning: Enable tracemalloc to get the object allocation traceback +/tmp/job.16700835/torchinductor_henrycastillo/4p/c4poa4cjpk5umc22d6bca6ko72veelb6kbqznwhzaqd5kvlwop7v.py:538: UserWarning: Logical operators 'and' and 'or' are deprecated for non-scalar tensors; please use '&' or '|' instead + scatter_mask = offs_m2[:, None] < Q_LEN and offs_n2[None, :] < KV_LEN +UserWarning: Enable tracemalloc to get the object allocation traceback +/tmp/job.16700835/torchinductor_henrycastillo/4p/c4poa4cjpk5umc22d6bca6ko72veelb6kbqznwhzaqd5kvlwop7v.py:538: UserWarning: Logical operators 'and' and 'or' are deprecated for non-scalar tensors; please use '&' or '|' instead + scatter_mask = offs_m2[:, None] < Q_LEN and offs_n2[None, :] < KV_LEN +UserWarning: Enable tracemalloc to get the object allocation traceback + Training: 0%| | 0/57345 [00:00 + main() + File "/scratch/user/henrycastillo/modded-nanogpt/train_gpt_medium.py", line 797, in main + print0( +TypeError: main..print0() got multiple values for argument 'console'