reveseforward commited on
Commit
080c7c9
Β·
1 Parent(s): e54a59b
Files changed (4) hide show
  1. Dockerfile +24 -8
  2. docker-compose.yml +18 -0
  3. requirements.txt +8 -3
  4. src/streamlit_app.py +115 -35
Dockerfile CHANGED
@@ -1,20 +1,36 @@
1
- FROM python:3.13.5-slim
2
 
 
3
  WORKDIR /app
4
 
 
5
  RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
  git \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
14
- RUN pip3 install -r requirements.txt
 
15
 
 
 
 
 
 
 
 
 
 
 
16
  EXPOSE 8501
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
2
 
3
+ # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
+ python3.10 \
9
+ python3-pip \
10
  git \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
+ # Create symlink for python
14
+ RUN ln -s /usr/bin/python3.10 /usr/bin/python
15
 
16
+ # Upgrade pip
17
+ RUN pip install --no-cache-dir --upgrade pip setuptools wheel
18
 
19
+ # Copy requirements
20
+ COPY requirements.txt .
21
+
22
+ # Install Python dependencies
23
+ RUN pip install --no-cache-dir -r requirements.txt
24
+
25
+ # Copy app
26
+ COPY app.py .
27
+
28
+ # Expose streamlit port
29
  EXPOSE 8501
30
 
31
+ # Set environment variables
32
+ ENV PYTHONUNBUFFERED=1
33
+ ENV CUDA_VISIBLE_DEVICES=0
34
 
35
+ # Run streamlit
36
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
docker-compose.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ qwen3-app:
5
+ build: .
6
+ container_name: qwen3-vl-app
7
+ runtime: nvidia
8
+ environment:
9
+ - NVIDIA_VISIBLE_DEVICES=all
10
+ - NVIDIA_DRIVER_CAPABILITIES=compute,utility
11
+ - CUDA_VISIBLE_DEVICES=0
12
+ ports:
13
+ - "8501:8501"
14
+ volumes:
15
+ - ./models:/root/.cache/huggingface/hub:ro
16
+ shm_size: '16gb'
17
+ stdin_open: true
18
+ tty: true
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
1
+ torch==2.1.2 --index-url https://download.pytorch.org/whl/cu118
2
+ torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu118
3
+ transformers==4.45.0
4
+ pillow==10.2.0
5
+ streamlit==1.40.1
6
+ accelerate==0.27.2
7
+ peft==0.7.1
8
+ unsloth==2024.12.16
src/streamlit_app.py CHANGED
@@ -1,40 +1,120 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import AutoProcessor, AutoModelForVision2Seq
5
+ import io
6
 
7
+ st.set_page_config(page_title="Qwen3-VL-8B Inference", layout="wide")
 
8
 
9
+ @st.cache_resource
10
+ def load_model(model_id):
11
+ """Load model and processor with float16 optimization"""
12
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
13
+ model = AutoModelForVision2Seq.from_pretrained(
14
+ model_id,
15
+ torch_dtype=torch.float16,
16
+ device_map="auto",
17
+ trust_remote_code=True
18
+ )
19
+ return model, processor
20
 
21
+ def main():
22
+ st.title("πŸ–ΌοΈ Qwen3-VL-8B Vision-Language Model")
23
+ st.markdown("Upload an image and ask questions about it or provide instructions!")
24
+
25
+ # Model configuration
26
+ model_id = "reverseforward/qwenmeasurement" # Replace with your model ID
27
+
28
+ try:
29
+ model, processor = load_model(model_id)
30
+ except Exception as e:
31
+ st.error(f"Error loading model: {e}")
32
+ st.info("Make sure your model ID is correct and you have internet access to HuggingFace Hub")
33
+ return
34
+
35
+ # Create two columns for layout
36
+ col1, col2 = st.columns([1, 1])
37
+
38
+ with col1:
39
+ st.subheader("πŸ“€ Upload Image")
40
+ uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png", "webp"])
41
+
42
+ if uploaded_file is not None:
43
+ image = Image.open(uploaded_file)
44
+ st.image(image, use_column_width=True, caption="Uploaded Image")
45
+ else:
46
+ image = None
47
+ st.info("Please upload an image to continue")
48
+
49
+ with col2:
50
+ st.subheader("πŸ’¬ Input Text")
51
+ text_input = st.text_area(
52
+ "Ask a question or provide instructions about the image:",
53
+ placeholder="e.g., What objects are in this image? Describe them in detail.",
54
+ height=150
55
+ )
56
+
57
+ st.divider()
58
+
59
+ # Generate response
60
+ if st.button("πŸš€ Generate Response", type="primary"):
61
+ if image is None:
62
+ st.warning("Please upload an image first!")
63
+ elif not text_input.strip():
64
+ st.warning("Please enter a text prompt!")
65
+ else:
66
+ with st.spinner("Processing... This may take a moment"):
67
+ try:
68
+ # Prepare inputs
69
+ messages = [
70
+ {
71
+ "role": "user",
72
+ "content": [
73
+ {"type": "image", "image": image},
74
+ {"type": "text", "text": text_input}
75
+ ]
76
+ }
77
+ ]
78
+
79
+ # Process with float16 for efficiency
80
+ text = processor.apply_chat_template(
81
+ messages,
82
+ tokenize=False,
83
+ add_generation_prompt=True
84
+ )
85
+
86
+ inputs = processor(
87
+ text=text,
88
+ images=[image],
89
+ return_tensors="pt",
90
+ padding=True
91
+ )
92
+
93
+ # Move to GPU and use float16
94
+ inputs = {k: v.to(model.device).to(torch.float16) if v.dtype in [torch.float32, torch.float64] else v.to(model.device) for k, v in inputs.items()}
95
+
96
+ # Generate
97
+ with torch.no_grad():
98
+ output_ids = model.generate(
99
+ **inputs,
100
+ max_new_tokens=1024,
101
+ temperature=0.7,
102
+ top_p=0.95
103
+ )
104
+
105
+ # Decode response
106
+ response = processor.decode(
107
+ output_ids[0][inputs["input_ids"].shape[1]:],
108
+ skip_special_tokens=True
109
+ )
110
+
111
+ st.success("βœ… Generation complete!")
112
+ st.subheader("πŸ“ Response")
113
+ st.write(response)
114
+
115
+ except Exception as e:
116
+ st.error(f"Error during generation: {e}")
117
+ st.info("Check your model configuration and GPU memory")
118
 
119
+ if __name__ == "__main__":
120
+ main()