Spaces:
Paused
Paused
| FROM ubuntu:22.04 | |
| # Prevent interactive prompts | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV HF_HUB_ENABLE_HF_TRANSFER=1 | |
| ENV CUDA_HOME=/usr/local/cuda | |
| ENV PATH=/usr/local/cuda/bin:${PATH} | |
| ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} | |
| # Install base system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| wget \ | |
| curl \ | |
| git \ | |
| vim \ | |
| tmux \ | |
| htop \ | |
| build-essential \ | |
| software-properties-common \ | |
| ca-certificates \ | |
| gnupg \ | |
| lsb-release \ | |
| sudo \ | |
| openssh-server \ | |
| nginx \ | |
| supervisor \ | |
| python3.11 \ | |
| python3.11-dev \ | |
| python3-pip \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Add NVIDIA package repositories | |
| RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ | |
| dpkg -i cuda-keyring_1.1-1_all.deb && \ | |
| apt-get update | |
| # Install CUDA 12.8 | |
| RUN apt-get install -y cuda-toolkit-12-8 && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # Install cuDNN | |
| RUN apt-get update && apt-get install -y \ | |
| libcudnn9-cuda-12 \ | |
| libcudnn9-dev-cuda-12 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Set Python 3.11 as default | |
| RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 && \ | |
| update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 | |
| # Upgrade pip | |
| RUN python -m pip install --upgrade pip setuptools wheel | |
| # Install PyTorch with CUDA 12.8 support | |
| RUN pip install torch==2.5.1+cu128 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 | |
| # Create non-root user for HF Spaces compatibility | |
| RUN useradd -m -u 1000 user && \ | |
| echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers | |
| # Install code-server (VSCode in browser) | |
| RUN curl -fsSL https://code-server.dev/install.sh | sh | |
| # Install Ollama | |
| RUN curl -fsSL https://ollama.com/install.sh | sh | |
| # Install hf_transfer first | |
| RUN pip install hf_transfer | |
| # Install core ML packages | |
| RUN pip install \ | |
| accelerate \ | |
| transformers \ | |
| datasets \ | |
| peft \ | |
| bitsandbytes \ | |
| safetensors \ | |
| sentencepiece \ | |
| protobuf \ | |
| scipy \ | |
| einops \ | |
| wandb \ | |
| tensorboard \ | |
| gradio \ | |
| streamlit | |
| # Install vLLM | |
| RUN pip install vllm | |
| # Install Flash Attention 2 | |
| RUN pip install ninja packaging && \ | |
| pip install flash-attn --no-build-isolation | |
| # Install Triton for 5090 support | |
| RUN pip install triton | |
| # Clone and install Unsloth with patches | |
| RUN git clone https://github.com/unslothai/unsloth.git /tmp/unsloth && \ | |
| cd /tmp/unsloth && \ | |
| pip install -e . && \ | |
| cd / && \ | |
| rm -rf /tmp/unsloth/.git | |
| # Clone and install Axolotl | |
| RUN git clone https://github.com/axolotl-ai-cloud/axolotl /tmp/axolotl && \ | |
| cd /tmp/axolotl && \ | |
| pip install -e . && \ | |
| cd / && \ | |
| rm -rf /tmp/axolotl/.git | |
| # Install Node.js for Open-WebUI | |
| RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ | |
| apt-get install -y nodejs && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # Clone and setup Open-WebUI | |
| RUN git clone https://github.com/open-webui/open-webui.git /opt/open-webui && \ | |
| cd /opt/open-webui && \ | |
| npm install && \ | |
| npm run build | |
| # Create directories with proper permissions | |
| RUN mkdir -p /home/user/app /home/user/.cache /home/user/.config && \ | |
| chown -R user:user /home/user | |
| # Configure code-server for user | |
| RUN mkdir -p /home/user/.config/code-server && \ | |
| echo "bind-addr: 0.0.0.0:8080\nauth: none\ncert: false" > /home/user/.config/code-server/config.yaml && \ | |
| chown -R user:user /home/user/.config | |
| # Setup SSH | |
| RUN mkdir /var/run/sshd && \ | |
| echo 'user:spaces' | chpasswd && \ | |
| sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin no/' /etc/ssh/sshd_config | |
| # Create supervisor config | |
| RUN mkdir -p /etc/supervisor/conf.d | |
| RUN cat > /etc/supervisor/conf.d/services.conf << 'EOF' | |
| [supervisord] | |
| nodaemon=true | |
| user=root | |
| [program:code-server] | |
| command=sudo -u user code-server --bind-addr 0.0.0.0:8080 --auth none | |
| autostart=true | |
| autorestart=true | |
| stderr_logfile=/var/log/code-server.err.log | |
| stdout_logfile=/var/log/code-server.out.log | |
| [program:ollama] | |
| command=ollama serve | |
| autostart=true | |
| autorestart=true | |
| environment=OLLAMA_HOST="0.0.0.0",HOME="/home/user" | |
| stderr_logfile=/var/log/ollama.err.log | |
| stdout_logfile=/var/log/ollama.out.log | |
| [program:open-webui] | |
| command=cd /opt/open-webui && npm start | |
| autostart=true | |
| autorestart=true | |
| environment=PORT="3000",OLLAMA_BASE_URL="http://localhost:11434" | |
| stderr_logfile=/var/log/open-webui.err.log | |
| stdout_logfile=/var/log/open-webui.out.log | |
| user=user | |
| EOF | |
| # Create Gradio app for HF Spaces | |
| RUN cat > /home/user/app/app.py << 'EOF' | |
| import gradio as gr | |
| import subprocess | |
| import os | |
| def get_services_status(): | |
| services = { | |
| "VSCode": "http://localhost:8080", | |
| "Ollama API": "http://localhost:11434", | |
| "Open-WebUI": "http://localhost:3000", | |
| "vLLM": "http://localhost:8000" | |
| } | |
| status = "# ๐ ML Stack Services Status\n\n" | |
| for service, url in services.items(): | |
| status += f"- **{service}**: {url}\n" | |
| # Get GPU info | |
| try: | |
| gpu_info = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], text=True) | |
| status += f"\n## ๐ฎ GPU Status\n```\n{gpu_info}```" | |
| except: | |
| status += "\n## โ ๏ธ No GPUs detected" | |
| return status | |
| def launch_vllm(model_name): | |
| try: | |
| cmd = f"python -m vllm.entrypoints.openai.api_server --model {model_name} --host 0.0.0.0 --port 8000" | |
| subprocess.Popen(cmd, shell=True) | |
| return f"โ Launching vLLM with model: {model_name}" | |
| except Exception as e: | |
| return f"โ Error: {str(e)}" | |
| def pull_ollama_model(model_name): | |
| try: | |
| result = subprocess.run(['ollama', 'pull', model_name], capture_output=True, text=True) | |
| return f"โ {result.stdout}\n{result.stderr}" | |
| except Exception as e: | |
| return f"โ Error: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="ML Stack Control Panel") as demo: | |
| gr.Markdown("# ๐ฎ RunPod ML Stack Control Panel") | |
| with gr.Tab("Status"): | |
| status_btn = gr.Button("๐ Refresh Status") | |
| status_output = gr.Markdown() | |
| status_btn.click(get_services_status, outputs=status_output) | |
| with gr.Tab("vLLM"): | |
| model_input = gr.Textbox(label="Model Name", value="meta-llama/Llama-2-7b-hf") | |
| vllm_btn = gr.Button("๐ Launch vLLM") | |
| vllm_output = gr.Textbox(label="Output") | |
| vllm_btn.click(launch_vllm, inputs=model_input, outputs=vllm_output) | |
| with gr.Tab("Ollama"): | |
| ollama_model = gr.Textbox(label="Model Name", value="llama3.2") | |
| ollama_btn = gr.Button("๐ฅ Pull Model") | |
| ollama_output = gr.Textbox(label="Output") | |
| ollama_btn.click(pull_ollama_model, inputs=ollama_model, outputs=ollama_output) | |
| # Load initial status | |
| demo.load(get_services_status, outputs=status_output) | |
| if __name__ == "__main__": | |
| # Start supervisor in background | |
| subprocess.Popen(["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]) | |
| # Launch Gradio | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=False) | |
| EOF | |
| # Create startup script | |
| RUN cat > /home/user/app/start.sh << 'EOF' | |
| #!/bin/bash | |
| cd /home/user/app | |
| python app.py | |
| EOF | |
| RUN chmod +x /home/user/app/start.sh | |
| # Fix permissions | |
| RUN chown -R user:user /home/user /opt/open-webui | |
| # Expose ports | |
| EXPOSE 22 # SSH | |
| EXPOSE 7860 # Gradio (HF Spaces default) | |
| EXPOSE 8080 # Code-server | |
| EXPOSE 11434 # Ollama | |
| EXPOSE 8000 # vLLM | |
| EXPOSE 3000 # Open-WebUI | |
| # Switch to user | |
| USER user | |
| WORKDIR /home/user/app | |
| # Set the entrypoint for HF Spaces | |
| CMD ["python", "app.py"] |