Spaces:
Running
on
Zero
Running
on
Zero
rm unused models in app.py (#1)
Browse files- rm unused models in app.py (4deb69a55c5ae5112ce0d1b37b0e8a553c4704f0)
Co-authored-by: Prithiv Sakthi <[email protected]>
app.py
CHANGED
|
@@ -30,28 +30,11 @@ import subprocess
|
|
| 30 |
|
| 31 |
subprocess.run(shlex.split("pip install flash-attn --no-build-isolation"), env=os.environ | {"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, check=True)
|
| 32 |
|
| 33 |
-
|
| 34 |
MAX_MAX_NEW_TOKENS = 4096
|
| 35 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 36 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 37 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 38 |
|
| 39 |
-
# # Load Qwen2.5-VL-7B-Instruct
|
| 40 |
-
# MODEL_ID_M = "Qwen/Qwen2.5-VL-7B-Instruct"
|
| 41 |
-
# processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
|
| 42 |
-
# model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 43 |
-
# MODEL_ID_M,
|
| 44 |
-
# trust_remote_code=True,
|
| 45 |
-
# torch_dtype=torch.float16).to(device).eval()
|
| 46 |
-
|
| 47 |
-
# # Load Qwen2.5-VL-3B-Instruct
|
| 48 |
-
# MODEL_ID_X = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 49 |
-
# processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
|
| 50 |
-
# model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 51 |
-
# MODEL_ID_X,
|
| 52 |
-
# trust_remote_code=True,
|
| 53 |
-
# torch_dtype=torch.float16).to(device).eval()
|
| 54 |
-
|
| 55 |
# Load Qwen3-VL-4B-Instruct
|
| 56 |
MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
|
| 57 |
processor_q = AutoProcessor.from_pretrained(MODEL_ID_Q, trust_remote_code=True)
|
|
@@ -68,14 +51,6 @@ model_y = Qwen3VLForConditionalGeneration.from_pretrained(
|
|
| 68 |
trust_remote_code=True,
|
| 69 |
torch_dtype=torch.bfloat16).to(device).eval()
|
| 70 |
|
| 71 |
-
# # Load Qwen3-VL-8B-Thinking
|
| 72 |
-
# MODEL_ID_Z = "Qwen/Qwen3-VL-8B-Thinking"
|
| 73 |
-
# processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
|
| 74 |
-
# model_z = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 75 |
-
# MODEL_ID_Z,
|
| 76 |
-
# trust_remote_code=True,
|
| 77 |
-
# torch_dtype=torch.bfloat16).to(device).eval()
|
| 78 |
-
|
| 79 |
# Load Qwen3-VL-2B-Instruct
|
| 80 |
MODEL_ID_L = "Qwen/Qwen3-VL-2B-Instruct"
|
| 81 |
processor_l = AutoProcessor.from_pretrained(MODEL_ID_L, trust_remote_code=True)
|
|
@@ -183,16 +158,10 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 183 |
"""
|
| 184 |
Generates responses using the selected model for image input.
|
| 185 |
"""
|
| 186 |
-
# if model_name == "Qwen2.5-VL-7B-Instruct":
|
| 187 |
-
# processor, model = processor_m, model_m
|
| 188 |
-
# elif model_name == "Qwen2.5-VL-3B-Instruct":
|
| 189 |
-
# processor, model = processor_x, model_x
|
| 190 |
if model_name == "Qwen3-VL-4B-Instruct":
|
| 191 |
processor, model = processor_q, model_q
|
| 192 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 193 |
processor, model = processor_y, model_y
|
| 194 |
-
# elif model_name == "Qwen3-VL-8B-Thinking":
|
| 195 |
-
# processor, model = processor_z, model_z
|
| 196 |
elif model_name == "Qwen3-VL-4B-Thinking":
|
| 197 |
processor, model = processor_t, model_t
|
| 198 |
elif model_name == "Qwen3-VL-2B-Instruct":
|
|
@@ -229,16 +198,10 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
| 229 |
"""
|
| 230 |
Generates responses using the selected model for video input.
|
| 231 |
"""
|
| 232 |
-
# if model_name == "Qwen2.5-VL-7B-Instruct":
|
| 233 |
-
# processor, model = processor_m, model_m
|
| 234 |
-
# elif model_name == "Qwen2.5-VL-3B-Instruct":
|
| 235 |
-
# processor, model = processor_x, model_x
|
| 236 |
if model_name == "Qwen3-VL-4B-Instruct":
|
| 237 |
processor, model = processor_q, model_q
|
| 238 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 239 |
processor, model = processor_y, model_y
|
| 240 |
-
# elif model_name == "Qwen3-VL-8B-Thinking":
|
| 241 |
-
# processor, model = processor_z, model_z
|
| 242 |
elif model_name == "Qwen3-VL-4B-Thinking":
|
| 243 |
processor, model = processor_t, model_t
|
| 244 |
elif model_name == "Qwen3-VL-2B-Instruct":
|
|
@@ -419,7 +382,7 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
|
| 419 |
])
|
| 420 |
|
| 421 |
model_choice = gr.Radio(
|
| 422 |
-
choices=["Qwen3-VL-4B-Instruct", "Qwen3-VL-8B-Instruct", "Qwen3-VL-2B-Instruct", "Qwen3-VL-2B-Thinking", "Qwen3-VL-4B-Thinking"],
|
| 423 |
label="Select Model",
|
| 424 |
value="Qwen3-VL-4B-Instruct"
|
| 425 |
)
|
|
|
|
| 30 |
|
| 31 |
subprocess.run(shlex.split("pip install flash-attn --no-build-isolation"), env=os.environ | {"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, check=True)
|
| 32 |
|
|
|
|
| 33 |
MAX_MAX_NEW_TOKENS = 4096
|
| 34 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 35 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 36 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# Load Qwen3-VL-4B-Instruct
|
| 39 |
MODEL_ID_Q = "Qwen/Qwen3-VL-4B-Instruct"
|
| 40 |
processor_q = AutoProcessor.from_pretrained(MODEL_ID_Q, trust_remote_code=True)
|
|
|
|
| 51 |
trust_remote_code=True,
|
| 52 |
torch_dtype=torch.bfloat16).to(device).eval()
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
# Load Qwen3-VL-2B-Instruct
|
| 55 |
MODEL_ID_L = "Qwen/Qwen3-VL-2B-Instruct"
|
| 56 |
processor_l = AutoProcessor.from_pretrained(MODEL_ID_L, trust_remote_code=True)
|
|
|
|
| 158 |
"""
|
| 159 |
Generates responses using the selected model for image input.
|
| 160 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
if model_name == "Qwen3-VL-4B-Instruct":
|
| 162 |
processor, model = processor_q, model_q
|
| 163 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 164 |
processor, model = processor_y, model_y
|
|
|
|
|
|
|
| 165 |
elif model_name == "Qwen3-VL-4B-Thinking":
|
| 166 |
processor, model = processor_t, model_t
|
| 167 |
elif model_name == "Qwen3-VL-2B-Instruct":
|
|
|
|
| 198 |
"""
|
| 199 |
Generates responses using the selected model for video input.
|
| 200 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
if model_name == "Qwen3-VL-4B-Instruct":
|
| 202 |
processor, model = processor_q, model_q
|
| 203 |
elif model_name == "Qwen3-VL-8B-Instruct":
|
| 204 |
processor, model = processor_y, model_y
|
|
|
|
|
|
|
| 205 |
elif model_name == "Qwen3-VL-4B-Thinking":
|
| 206 |
processor, model = processor_t, model_t
|
| 207 |
elif model_name == "Qwen3-VL-2B-Instruct":
|
|
|
|
| 382 |
])
|
| 383 |
|
| 384 |
model_choice = gr.Radio(
|
| 385 |
+
choices=["Qwen3-VL-4B-Instruct", "Qwen3-VL-8B-Instruct", "Qwen3-VL-2B-Instruct", "Qwen3-VL-2B-Thinking", "Qwen3-VL-4B-Thinking"],
|
| 386 |
label="Select Model",
|
| 387 |
value="Qwen3-VL-4B-Instruct"
|
| 388 |
)
|