#!/usr/bin/env bash set -euo pipefail # Build script with strace to debug hanging echo "=== Build with strace debugging ===" echo "This will trace system calls to identify where the build hangs" # Same environment as build-fixed.sh but without MAX_JOBS limit export ROCM_PATH="${ROCM_PATH:-/opt/rocm-7.0.1}" export ROCM_HOME="${ROCM_HOME:-$ROCM_PATH}" export HIP_PATH="${HIP_PATH:-$ROCM_PATH}" export HIP_HOME="${HIP_HOME:-$ROCM_PATH}" export PATH="$ROCM_HOME/bin:$PATH" # Fix architecture specifications export TORCH_HIP_ARCH_LIST="gfx942" export PYTORCH_ROCM_ARCH="gfx942" # Remove HSA_OVERRIDE_GFX_VERSION unset HSA_OVERRIDE_GFX_VERSION # Remove MAX_JOBS limit to see parallel compilation hang unset MAX_JOBS # Enable PyTorch JIT logging export PYTORCH_JIT_LOG_LEVEL=1 export TORCH_EXTENSIONS_DIR="${TORCH_EXTENSIONS_DIR:-$PWD/.torch_extensions}" echo "Environment configured for strace:" echo "ROCM_PATH=$ROCM_PATH" echo "TORCH_HIP_ARCH_LIST=$TORCH_HIP_ARCH_LIST" echo "PYTORCH_ROCM_ARCH=$PYTORCH_ROCM_ARCH" echo "MAX_JOBS=${MAX_JOBS:-unset}" echo "PYTORCH_JIT_LOG_LEVEL=$PYTORCH_JIT_LOG_LEVEL" echo echo "Starting build with strace..." echo "Tracing process creation, signals, and file operations..." echo "Output will be saved to strace.log" # Use strace to trace the build process # -f: follow child processes # -e trace=process,signal: trace process creation and signals # -e trace=file: trace file operations # -o strace.log: save output to file # -T: show time spent in each syscall strace -f -e trace=process,signal,file -o strace.log -T python -u build.py echo "Build completed or interrupted" echo "Check strace.log for detailed system call trace"