Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -56,7 +56,6 @@ moondream = AutoModelForCausalLM.from_pretrained(
|
|
| 56 |
torch_dtype=torch.float16,
|
| 57 |
device_map={"": "cuda"},
|
| 58 |
attn_implementation="flash_attention_2",
|
| 59 |
-
revision="a23030ab157f2d0f5bb2df6c6d43623904727ec2",
|
| 60 |
)
|
| 61 |
|
| 62 |
# CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
|
|
@@ -78,7 +77,8 @@ def convert_to_entities(text, coords):
|
|
| 78 |
Converts a string with special markers into an entity representation.
|
| 79 |
Markers:
|
| 80 |
- <|coord|> pairs indicate coordinate markers
|
| 81 |
-
- <|
|
|
|
|
| 82 |
- <|end_ground|> indicates the end of a ground term
|
| 83 |
|
| 84 |
Returns:
|
|
@@ -103,10 +103,15 @@ def convert_to_entities(text, coords):
|
|
| 103 |
entity.append(coords.pop(0))
|
| 104 |
continue
|
| 105 |
|
| 106 |
-
elif text[i : i +
|
| 107 |
in_entity = True
|
| 108 |
entity_start = current_pos
|
| 109 |
-
i +=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
continue
|
| 111 |
|
| 112 |
elif text[i : i + 14] == "<|end_ground|>":
|
|
@@ -484,18 +489,21 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
|
|
| 484 |
w, h = img.size
|
| 485 |
|
| 486 |
coords = json.loads(evt.value[1])
|
| 487 |
-
if len(coords) !=
|
| 488 |
raise ValueError("Only points supported right now.")
|
| 489 |
-
coords[0] = int(coords[0] * w)
|
| 490 |
-
coords[1] = int(coords[1] * h)
|
| 491 |
|
| 492 |
img_clone = img.copy()
|
| 493 |
draw = ImageDraw.Draw(img_clone)
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
|
| 500 |
return gr.update(visible=True, value=img_clone)
|
| 501 |
|
|
|
|
| 56 |
torch_dtype=torch.float16,
|
| 57 |
device_map={"": "cuda"},
|
| 58 |
attn_implementation="flash_attention_2",
|
|
|
|
| 59 |
)
|
| 60 |
|
| 61 |
# CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
|
|
|
|
| 77 |
Converts a string with special markers into an entity representation.
|
| 78 |
Markers:
|
| 79 |
- <|coord|> pairs indicate coordinate markers
|
| 80 |
+
- <|start_ground_points|> indicates the start of grounding
|
| 81 |
+
- <|start_ground_text|> indicates the start of a ground term
|
| 82 |
- <|end_ground|> indicates the end of a ground term
|
| 83 |
|
| 84 |
Returns:
|
|
|
|
| 103 |
entity.append(coords.pop(0))
|
| 104 |
continue
|
| 105 |
|
| 106 |
+
elif text[i : i + 23] == "<|start_ground_points|>":
|
| 107 |
in_entity = True
|
| 108 |
entity_start = current_pos
|
| 109 |
+
i += 23
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
elif text[i : i + 21] == "<|start_ground_text|>":
|
| 113 |
+
entity_start = current_pos
|
| 114 |
+
i += 21
|
| 115 |
continue
|
| 116 |
|
| 117 |
elif text[i : i + 14] == "<|end_ground|>":
|
|
|
|
| 489 |
w, h = img.size
|
| 490 |
|
| 491 |
coords = json.loads(evt.value[1])
|
| 492 |
+
if len(coords) % 2 != 0:
|
| 493 |
raise ValueError("Only points supported right now.")
|
|
|
|
|
|
|
| 494 |
|
| 495 |
img_clone = img.copy()
|
| 496 |
draw = ImageDraw.Draw(img_clone)
|
| 497 |
+
|
| 498 |
+
for i in range(0, len(coords), 2): # Step by 2 to handle x,y pairs
|
| 499 |
+
x = int(coords[i] * w)
|
| 500 |
+
y = int(coords[i + 1] * h)
|
| 501 |
+
draw.ellipse(
|
| 502 |
+
(x - 3, y - 3, x + 3, y + 3),
|
| 503 |
+
fill="red",
|
| 504 |
+
outline="red",
|
| 505 |
+
)
|
| 506 |
+
|
| 507 |
|
| 508 |
return gr.update(visible=True, value=img_clone)
|
| 509 |
|