Upload folder using huggingface_hub
Browse files- README.md +6 -4
- chat_template.jinja +3 -3
- tokenizer_config.json +1 -1
README.md
CHANGED
|
@@ -86,8 +86,10 @@ Reconstructs pixel-accurate HTML/CSS from UI screenshots and supports natural-la
|
|
| 86 |
For `SGLang`:
|
| 87 |
|
| 88 |
```bash
|
| 89 |
-
pip install sglang>=0.5.
|
| 90 |
-
pip install
|
|
|
|
|
|
|
| 91 |
```
|
| 92 |
|
| 93 |
For `vLLM`:
|
|
@@ -100,7 +102,7 @@ pip install transformers>=5.0.0rc0
|
|
| 100 |
### Quick Start with Transformers
|
| 101 |
|
| 102 |
```python
|
| 103 |
-
from transformers import AutoProcessor,
|
| 104 |
import torch
|
| 105 |
|
| 106 |
MODEL_PATH = "zai-org/GLM-4.6V-Flash"
|
|
@@ -120,7 +122,7 @@ messages = [
|
|
| 120 |
}
|
| 121 |
]
|
| 122 |
processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
| 123 |
-
model =
|
| 124 |
pretrained_model_name_or_path=MODEL_PATH,
|
| 125 |
torch_dtype="auto",
|
| 126 |
device_map="auto",
|
|
|
|
| 86 |
For `SGLang`:
|
| 87 |
|
| 88 |
```bash
|
| 89 |
+
pip install sglang>=0.5.6.post1
|
| 90 |
+
pip install nvidia-cudnn-cu12==9.16.0.29
|
| 91 |
+
sudo apt update
|
| 92 |
+
sudo apt install ffmpeg
|
| 93 |
```
|
| 94 |
|
| 95 |
For `vLLM`:
|
|
|
|
| 102 |
### Quick Start with Transformers
|
| 103 |
|
| 104 |
```python
|
| 105 |
+
from transformers import AutoProcessor, Glm4vForConditionalGeneration
|
| 106 |
import torch
|
| 107 |
|
| 108 |
MODEL_PATH = "zai-org/GLM-4.6V-Flash"
|
|
|
|
| 122 |
}
|
| 123 |
]
|
| 124 |
processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
| 125 |
+
model = Glm4vForConditionalGeneration.from_pretrained(
|
| 126 |
pretrained_model_name_or_path=MODEL_PATH,
|
| 127 |
torch_dtype="auto",
|
| 128 |
device_map="auto",
|
chat_template.jinja
CHANGED
|
@@ -86,11 +86,11 @@ For each function call, output the function name and arguments within the follow
|
|
| 86 |
{%- set tc = tc.function %}
|
| 87 |
{%- endif %}
|
| 88 |
{{ '\n<tool_call>' + tc.name }}
|
| 89 |
-
{% set _args = tc.arguments %}
|
| 90 |
-
{
|
| 91 |
<arg_key>{{ k }}</arg_key>
|
| 92 |
<arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>
|
| 93 |
-
{% endfor %}{
|
| 94 |
</tool_call>{% endfor %}
|
| 95 |
{% endif %}
|
| 96 |
{%- elif m.role == 'tool' -%}
|
|
|
|
| 86 |
{%- set tc = tc.function %}
|
| 87 |
{%- endif %}
|
| 88 |
{{ '\n<tool_call>' + tc.name }}
|
| 89 |
+
{% set _args = tc.arguments %}{% if _args is mapping %}
|
| 90 |
+
{% for k, v in _args|items %}
|
| 91 |
<arg_key>{{ k }}</arg_key>
|
| 92 |
<arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>
|
| 93 |
+
{% endfor %}{%- endif %}
|
| 94 |
</tool_call>{% endfor %}
|
| 95 |
{% endif %}
|
| 96 |
{%- elif m.role == 'tool' -%}
|
tokenizer_config.json
CHANGED
|
@@ -326,5 +326,5 @@
|
|
| 326 |
"remove_space": false,
|
| 327 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 328 |
"unk_token": null,
|
| 329 |
-
"chat_template": "{# Unsloth template fixes #}\n[gMASK]<sop>\n{%- if tools -%}\n<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{% for tool in tools %}\n{{ tool | tojson|string }}\n{% endfor %}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n<arg_key>{arg-key-2}</arg_key>\n<arg_value>{arg-value-2}</arg_value>\n...\n</tool_call>{%- endif -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{- content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}\n <|begin_of_image|><|image|><|end_of_image|>\n {%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}\n <|begin_of_video|><|video|><|end_of_video|>\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{% for m in messages %}\n{%- if m.role == 'user' -%}<|user|>\n{% if m.content is string %}\n{{ m.content }}\n{%- else %}\n{%- for item in m.content %}\n{% if item.type == 'video' or 'video' in item %}\n<|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}\n<|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}\n{{ item.text }}\n{%- endif %}\n{%- endfor %}\n{%- endif %}\n{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith(\"/nothink\")) else '' -}}\n{%- elif m.role == 'assistant' -%}\n<|assistant|>\n{%- set reasoning_content = '' %}\n{%- set content = visible_text(m.content) %}\n{%- if m.reasoning_content is string %}\n {%- set reasoning_content = m.reasoning_content %}\n{%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = ((content.split('</think>')|first).rstrip('\\n').split('<think>')|last).lstrip('\\n') %}\n {%- set content = (content.split('</think>')|last).lstrip('\\n') %}\n {%- endif %}\n{%- endif %}\n{%- if loop.index0 > ns.last_user_index and reasoning_content -%}\n{{ '\\n<think>' + reasoning_content.strip() + '</think>'}}\n{%- else -%}\n{{ '\\n<think></think>' }}\n{%- endif -%}\n{%- if content.strip() -%}\n{{ '\\n' + content.strip() }}\n{%- endif -%}\n{% if m.tool_calls %}\n{% for tc in m.tool_calls %}\n{%- if tc.function %}\n {%- set tc = tc.function %}\n{%- endif %}\n{{ '\\n<tool_call>' + tc.name }}\n{% set _args = tc.arguments %}
|
| 330 |
}
|
|
|
|
| 326 |
"remove_space": false,
|
| 327 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 328 |
"unk_token": null,
|
| 329 |
+
"chat_template": "{# Unsloth template fixes #}\n[gMASK]<sop>\n{%- if tools -%}\n<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{% for tool in tools %}\n{{ tool | tojson|string }}\n{% endfor %}\n</tools>\n\nFor each function call, output the function name and arguments within the following XML format:\n<tool_call>{function-name}\n<arg_key>{arg-key-1}</arg_key>\n<arg_value>{arg-value-1}</arg_value>\n<arg_key>{arg-key-2}</arg_key>\n<arg_value>{arg-value-2}</arg_value>\n...\n</tool_call>{%- endif -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{- content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}\n <|begin_of_image|><|image|><|end_of_image|>\n {%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}\n <|begin_of_video|><|video|><|end_of_video|>\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{% for m in messages %}\n{%- if m.role == 'user' -%}<|user|>\n{% if m.content is string %}\n{{ m.content }}\n{%- else %}\n{%- for item in m.content %}\n{% if item.type == 'video' or 'video' in item %}\n<|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}\n<|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}\n{{ item.text }}\n{%- endif %}\n{%- endfor %}\n{%- endif %}\n{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith(\"/nothink\")) else '' -}}\n{%- elif m.role == 'assistant' -%}\n<|assistant|>\n{%- set reasoning_content = '' %}\n{%- set content = visible_text(m.content) %}\n{%- if m.reasoning_content is string %}\n {%- set reasoning_content = m.reasoning_content %}\n{%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = ((content.split('</think>')|first).rstrip('\\n').split('<think>')|last).lstrip('\\n') %}\n {%- set content = (content.split('</think>')|last).lstrip('\\n') %}\n {%- endif %}\n{%- endif %}\n{%- if loop.index0 > ns.last_user_index and reasoning_content -%}\n{{ '\\n<think>' + reasoning_content.strip() + '</think>'}}\n{%- else -%}\n{{ '\\n<think></think>' }}\n{%- endif -%}\n{%- if content.strip() -%}\n{{ '\\n' + content.strip() }}\n{%- endif -%}\n{% if m.tool_calls %}\n{% for tc in m.tool_calls %}\n{%- if tc.function %}\n {%- set tc = tc.function %}\n{%- endif %}\n{{ '\\n<tool_call>' + tc.name }}\n{% set _args = tc.arguments %}{% if _args is mapping %}\n{% for k, v in _args|items %}\n<arg_key>{{ k }}</arg_key>\n<arg_value>{{ v | tojson|string if v is not string else v }}</arg_value>\n{% endfor %}{%- endif %}\n</tool_call>{% endfor %}\n{% endif %}\n{%- elif m.role == 'tool' -%}\n{%- if m.content is string -%}\n{%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|observation|>' }}\n{%- endif %}\n{{- '\\n<tool_response>\\n' }}\n{{- m.content }}\n{{- '\\n</tool_response>' }}\n{% elif m.content is iterable and m.content is not mapping %}\n{%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n{{- '<|observation|>' }}\n{%- endif %}\n{{- '\\n<tool_response>\\n' }}\n{%- for tr in m.content -%}\n {%- if tr is mapping and tr.type is defined -%}\n {%- set t = tr.type | lower -%}\n {%- if t == 'text' and tr.text is defined -%}\n{{ tr.text }}\n {%- elif t in ['image', 'image_url'] -%}\n<|begin_of_image|><|image|><|end_of_image|>\n {%- elif t in ['video', 'video_url'] -%}\n<|begin_of_video|><|video|><|end_of_video|>\n {%- else -%}\n{{ tr | tojson|string }}\n {%- endif -%}\n {%- else -%}\n{{ tr.output if tr.output is defined else tr }}\n {%- endif -%}\n{%- endfor -%}\n{{- '\\n</tool_response>' }}\n{%- else -%}\n<|observation|>{% for tr in m.content %}\n\n<tool_response>\n{{ tr.output if tr.output is defined else tr }}\n</tool_response>{% endfor -%}\n{% endif -%}\n{%- elif m.role == 'system' -%}\n<|system|>\n{{ visible_text(m.content) }}\n{%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n<|assistant|>\n{{'<think></think>\\n' if (enable_thinking is defined and not enable_thinking) else ''}}\n{%- endif -%}\n{# Copyright 2025-present Unsloth. Apache 2.0 License. #}"
|
| 330 |
}
|