MagistrTheOne commited on
Commit
2f1816d
·
verified ·
1 Parent(s): f938a5e

Initial upload of Radon-35B-Ultra-X-RU

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Radon-35B-Ultra-X-RU
2
+
3
+ ## Описание
4
+ Radon Ultra - продвинутый AGI-ассистент на базе Qwen3-Omni-30B-A3B-Thinking,
5
+ адаптированный под русскоязычную идентичность через LoRA fine-tuning.
6
+
7
+ ## Информация о модели
8
+ - **Создатель**: MagistrTheOne
9
+ - **Место создания**: Краснодар, Россия
10
+ - **Год**: 2025
11
+ - **Базовая модель**: Qwen/Qwen3-Omni-30B-A3B-Thinking
12
+ - **Метод**: LoRA fine-tuning (r=16, 2000 steps)
13
+ - **Параметры**: ~31.7B
14
+ - **Языки**: Русский, Английский
15
+ - **Лицензия**: Apache 2.0
16
+
17
+ ## Особенности
18
+ - Сохранены все возможности базовой модели
19
+ - Адаптированная русскоязычная идентичность
20
+ - Знание о создателе и месте разработки
21
+ - Оптимизирован для reasoning задач
22
+
23
+ ## Использование
24
+
25
+ ```python
26
+ from transformers import AutoModelForCausalLM, AutoTokenizer
27
+
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ "MagistrTheOne/Radon-35B-Ultra-X-RU",
30
+ torch_dtype=torch.bfloat16,
31
+ device_map="auto"
32
+ )
33
+ tokenizer = AutoTokenizer.from_pretrained("MagistrTheOne/Radon-35B-Ultra-X-RU")
34
+
35
+ # Пример использования
36
+ conversation = [
37
+ {
38
+ "role": "system",
39
+ "content": [{"type": "text", "text": "Ты — Radon Ultra, продвинутый AGI-ассистент."}]
40
+ },
41
+ {
42
+ "role": "user",
43
+ "content": [{"type": "text", "text": "Кто ты?"}]
44
+ }
45
+ ]
46
+
47
+ text = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
48
+ inputs = tokenizer(text, return_tensors="pt")
49
+
50
+ with torch.no_grad():
51
+ outputs = model.generate(**inputs, max_new_tokens=100)
52
+
53
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
54
+ print(response)
55
+ ```
56
+
57
+ ## Технические детали
58
+ - **LoRA rank**: 16
59
+ - **LoRA alpha**: 32
60
+ - **Target modules**: q_proj, v_proj
61
+ - **Training steps**: 2000
62
+ - **Learning rate**: 2e-4
63
+ - **Batch size**: 8 (effective)
64
+ - **Dataset**: 1100 examples (300 identity + 800 mixed)
65
+
66
+ ## Ограничения
67
+ - Модель может генерировать неточную информацию
68
+ - Требует значительных вычислительных ресурсов
69
+ - Не рекомендуется для критически важных применений без дополнительной проверки
70
+
71
+ ## Контакты
72
+ - **Создатель**: MagistrTheOne
73
+ - **Место**: Краснодар, Россия
74
+ - **Год**: 2025
added_tokens.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<tts_pad>": 151671,
9
+ "<tts_text_bos>": 151672,
10
+ "<tts_text_bos_single>": 151674,
11
+ "<tts_text_eod>": 151673,
12
+ "<|audio_end|>": 151670,
13
+ "<|audio_pad|>": 151675,
14
+ "<|audio_start|>": 151669,
15
+ "<|box_end|>": 151649,
16
+ "<|box_start|>": 151648,
17
+ "<|endoftext|>": 151643,
18
+ "<|file_sep|>": 151664,
19
+ "<|fim_middle|>": 151660,
20
+ "<|fim_pad|>": 151662,
21
+ "<|fim_prefix|>": 151659,
22
+ "<|fim_suffix|>": 151661,
23
+ "<|im_end|>": 151645,
24
+ "<|im_start|>": 151644,
25
+ "<|image_pad|>": 151655,
26
+ "<|object_ref_end|>": 151647,
27
+ "<|object_ref_start|>": 151646,
28
+ "<|quad_end|>": 151651,
29
+ "<|quad_start|>": 151650,
30
+ "<|repo_name|>": 151663,
31
+ "<|video_pad|>": 151656,
32
+ "<|vision_end|>": 151653,
33
+ "<|vision_pad|>": 151654,
34
+ "<|vision_start|>": 151652
35
+ }
chat_template.jinja ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}{{- messages[0].content + '\n\n' }}{%- endif %}
4
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
5
+ {%- for tool in tools %}
6
+ {{- "\n" }}
7
+ {{- tool | tojson }}
8
+ {%- endfor %}
9
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
10
+ {%- else %}
11
+ {%- if messages[0].role == 'system' %}
12
+ {%- if messages[0].content is string %}
13
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
14
+ {%- else %}
15
+ {%- for content in messages[0].content %}
16
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
17
+ {{- '<|im_start|>system\n' +"<|vision_start|><|image_pad|><|vision_end|>"+ '<|im_end|>\n' }}
18
+ {%- elif content.type == 'audio' or 'audio' in content or 'audio_url' in content %}
19
+ {{- '<|im_start|>system\n' +"<|audio_start|><|audio_pad|><|audio_end|>"+ '<|im_end|>\n' }}
20
+ {%- elif content.type == 'video' or 'video' in content %}
21
+ {{- '<|im_start|>system\n' +"<|vision_start|><|video_pad|><|vision_end|>"+ '<|im_end|>\n' }}
22
+ {%- elif content.type == 'text' %}
23
+ {{- '<|im_start|>system\n' +content.text+ '<|im_end|>\n' }}
24
+ {%- endif %}
25
+ {%- endfor %}
26
+ {%- endif %}
27
+ {%- endif %}
28
+ {%- endif %}
29
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
30
+ {%- for message in messages[::-1] %}
31
+ {%- set index = (messages|length - 1) - loop.index0 %}
32
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
33
+ {%- set ns.multi_step_tool = false %}
34
+ {%- set ns.last_query_index = index %}
35
+ {%- endif %}
36
+ {%- endfor %}
37
+ {%- for message in messages %}
38
+ {%- if message.content is string %}
39
+ {%- set content = message.content %}
40
+ {%- else %}
41
+ {%- set content = namespace(text="") %}
42
+ {%- for mcontent in message.content %}
43
+ {%- if mcontent.type == 'image' or 'image' in mcontent or 'image_url' in mcontent %}
44
+ {%- set content.text = content.text~"<|vision_start|><|image_pad|><|vision_end|>" %}
45
+ {%- elif mcontent.type == 'audio' or 'audio' in mcontent or 'audio_url' in mcontent %}
46
+ {%- set content.text = content.text~"<|audio_start|><|audio_pad|><|audio_end|>" %}
47
+ {%- elif mcontent.type == 'video' or 'video' in mcontent %}
48
+ {%- set content.text = content.text~"<|vision_start|><|video_pad|><|vision_end|>" %}
49
+ {%- elif mcontent.type == 'text' %}
50
+ {%- set content.text = content.text~mcontent.text %}
51
+ {%- endif %}
52
+ {%- endfor %}
53
+ {%- set content = content.text %}
54
+ {%- endif %}
55
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
56
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
57
+ {%- elif message.role == "assistant" %}
58
+ {%- set reasoning_content = "" %}
59
+ {%- if message.reasoning_content is string %}
60
+ {%- set reasoning_content = message.reasoning_content %}
61
+ {%- else %}
62
+ {%- if '</think>' in content %}
63
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
64
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- if loop.index0 > ns.last_query_index %}
68
+ {%- if loop.last or (not loop.last and reasoning_content) %}
69
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip("\n") + '\n</think>\n\n' + content.lstrip('\n') }}
70
+ {%- else %}
71
+ {{- '<|im_start|>' + message.role + '\n' + content }}
72
+ {%- endif %}
73
+ {%- else %}
74
+ {{- '<|im_start|>' + message.role + '\n' + content }}
75
+ {%- endif %}
76
+ {%- if message.tool_calls %}
77
+ {%- for tool_call in message.tool_calls %}
78
+ {%- if (loop.first and content) or (not loop.first) %}{{- '\n' }}{%- endif %}
79
+ {%- if tool_call.function %}
80
+ {%- set tool_call = tool_call.function %}
81
+ {%- endif %}
82
+ {{- '<tool_call>\n{"name": "' }}
83
+ {{- tool_call.name }}
84
+ {{- '", "arguments": ' }}
85
+ {%- if tool_call.arguments is string %}
86
+ {{- tool_call.arguments }}
87
+ {%- else %}
88
+ {{- tool_call.arguments | tojson }}
89
+ {%- endif %}
90
+ {{- '}\n</tool_call>' }}
91
+ {%- endfor %}
92
+ {%- endif %}
93
+ {{- '<|im_end|>\n' }}
94
+ {%- elif message.role == "tool" %}
95
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}{{- '<|im_start|>user' }}{%- endif %}
96
+ {{- '\n<tool_response>\n' }}
97
+ {{- content }}
98
+ {{- '\n</tool_response>' }}
99
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}{{- '<|im_end|>\n' }}{%- endif %}
100
+ {%- endif %}
101
+ {%- endfor %}
102
+ {%- if add_generation_prompt %}
103
+ {{- '<|im_start|>assistant\n' }}
104
+ {%- if enable_thinking is defined and enable_thinking is false %}{{- '<think>\n\n</think>\n\n' }}{%- endif %}
105
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3OmniMoeForConditionalGeneration"
4
+ ],
5
+ "assistant_token_id": 77091,
6
+ "code2wav_config": {
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "codebook_size": 2048,
10
+ "decoder_dim": 1536,
11
+ "dtype": "bfloat16",
12
+ "hidden_act": "silu",
13
+ "hidden_size": 1024,
14
+ "intermediate_size": 3072,
15
+ "layer_scale_initial_scale": 0.01,
16
+ "max_position_embeddings": 8000,
17
+ "model_type": "",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 8,
20
+ "num_key_value_heads": 16,
21
+ "num_quantizers": 16,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_theta": 10000,
24
+ "sliding_window": 72,
25
+ "upsample_rates": [
26
+ 8,
27
+ 5,
28
+ 4,
29
+ 3
30
+ ],
31
+ "upsampling_ratios": [
32
+ 2,
33
+ 2
34
+ ]
35
+ },
36
+ "dtype": "bfloat16",
37
+ "enable_audio_output": false,
38
+ "im_end_token_id": 151645,
39
+ "im_start_token_id": 151644,
40
+ "model_type": "qwen3_omni_moe",
41
+ "system_token_id": 8948,
42
+ "talker_config": {
43
+ "accept_hidden_layer": 18,
44
+ "audio_start_token_id": 151669,
45
+ "audio_token_id": 151646,
46
+ "code_predictor_config": {
47
+ "_name_or_path": "",
48
+ "add_cross_attention": false,
49
+ "architectures": null,
50
+ "attention_bias": false,
51
+ "attention_dropout": 0,
52
+ "bad_words_ids": null,
53
+ "begin_suppress_tokens": null,
54
+ "bos_token_id": null,
55
+ "chunk_size_feed_forward": 0,
56
+ "cross_attention_hidden_size": null,
57
+ "decoder_start_token_id": null,
58
+ "diversity_penalty": 0.0,
59
+ "do_sample": false,
60
+ "dtype": null,
61
+ "early_stopping": false,
62
+ "encoder_no_repeat_ngram_size": 0,
63
+ "eos_token_id": null,
64
+ "exponential_decay_length_penalty": null,
65
+ "finetuning_task": null,
66
+ "forced_bos_token_id": null,
67
+ "forced_eos_token_id": null,
68
+ "head_dim": 128,
69
+ "hidden_act": "silu",
70
+ "hidden_size": 1024,
71
+ "id2label": {
72
+ "0": "LABEL_0",
73
+ "1": "LABEL_1"
74
+ },
75
+ "initializer_range": 0.02,
76
+ "intermediate_size": 3072,
77
+ "is_decoder": false,
78
+ "is_encoder_decoder": false,
79
+ "label2id": {
80
+ "LABEL_0": 0,
81
+ "LABEL_1": 1
82
+ },
83
+ "layer_types": [
84
+ "full_attention",
85
+ "full_attention",
86
+ "full_attention",
87
+ "full_attention",
88
+ "full_attention"
89
+ ],
90
+ "length_penalty": 1.0,
91
+ "max_length": 20,
92
+ "max_position_embeddings": 32768,
93
+ "min_length": 0,
94
+ "model_type": "qwen3_omni_moe_talker_code_predictor",
95
+ "no_repeat_ngram_size": 0,
96
+ "num_attention_heads": 16,
97
+ "num_beam_groups": 1,
98
+ "num_beams": 1,
99
+ "num_code_groups": 32,
100
+ "num_hidden_layers": 5,
101
+ "num_key_value_heads": 8,
102
+ "num_return_sequences": 1,
103
+ "output_attentions": false,
104
+ "output_hidden_states": false,
105
+ "output_scores": false,
106
+ "pad_token_id": null,
107
+ "prefix": null,
108
+ "problem_type": null,
109
+ "remove_invalid_values": false,
110
+ "repetition_penalty": 1.0,
111
+ "return_dict": true,
112
+ "return_dict_in_generate": false,
113
+ "rms_norm_eps": 1e-06,
114
+ "rope_parameters": {
115
+ "rope_theta": 10000.0,
116
+ "rope_type": "default"
117
+ },
118
+ "sep_token_id": null,
119
+ "sliding_window": null,
120
+ "suppress_tokens": null,
121
+ "task_specific_params": null,
122
+ "temperature": 1.0,
123
+ "tie_encoder_decoder": false,
124
+ "tie_word_embeddings": false,
125
+ "tokenizer_class": null,
126
+ "top_k": 50,
127
+ "top_p": 1.0,
128
+ "typical_p": 1.0,
129
+ "use_cache": true,
130
+ "vocab_size": 2048
131
+ },
132
+ "codec_bos_id": 4197,
133
+ "codec_eos_token_id": 4198,
134
+ "codec_nothink_id": 4203,
135
+ "codec_pad_id": 4196,
136
+ "codec_think_bos_id": 4204,
137
+ "codec_think_eos_id": 4205,
138
+ "dtype": "bfloat16",
139
+ "image_token_id": 151655,
140
+ "model_type": "",
141
+ "num_code_groups": 32,
142
+ "position_id_per_seconds": 25,
143
+ "speaker_id": null,
144
+ "text_config": {
145
+ "_name_or_path": "",
146
+ "add_cross_attention": false,
147
+ "architectures": null,
148
+ "attention_bias": false,
149
+ "attention_dropout": 0,
150
+ "bad_words_ids": null,
151
+ "begin_suppress_tokens": null,
152
+ "bos_token_id": null,
153
+ "chunk_size_feed_forward": 0,
154
+ "cross_attention_hidden_size": null,
155
+ "decoder_sparse_step": 1,
156
+ "decoder_start_token_id": null,
157
+ "diversity_penalty": 0.0,
158
+ "do_sample": false,
159
+ "dtype": null,
160
+ "early_stopping": false,
161
+ "encoder_no_repeat_ngram_size": 0,
162
+ "eos_token_id": null,
163
+ "exponential_decay_length_penalty": null,
164
+ "finetuning_task": null,
165
+ "forced_bos_token_id": null,
166
+ "forced_eos_token_id": null,
167
+ "hidden_act": "silu",
168
+ "hidden_size": 1024,
169
+ "id2label": {
170
+ "0": "LABEL_0",
171
+ "1": "LABEL_1"
172
+ },
173
+ "initializer_range": 0.02,
174
+ "intermediate_size": 2048,
175
+ "is_decoder": false,
176
+ "is_encoder_decoder": false,
177
+ "label2id": {
178
+ "LABEL_0": 0,
179
+ "LABEL_1": 1
180
+ },
181
+ "length_penalty": 1.0,
182
+ "max_length": 20,
183
+ "max_position_embeddings": 32768,
184
+ "min_length": 0,
185
+ "mlp_only_layers": [],
186
+ "model_type": "qwen3_omni_moe_talker_text",
187
+ "moe_intermediate_size": 384,
188
+ "no_repeat_ngram_size": 0,
189
+ "norm_topk_prob": false,
190
+ "num_attention_heads": 16,
191
+ "num_beam_groups": 1,
192
+ "num_beams": 1,
193
+ "num_experts": 128,
194
+ "num_experts_per_tok": 8,
195
+ "num_hidden_layers": 20,
196
+ "num_key_value_heads": 2,
197
+ "num_return_sequences": 1,
198
+ "output_attentions": false,
199
+ "output_hidden_states": false,
200
+ "output_router_logits": false,
201
+ "output_scores": false,
202
+ "pad_token_id": null,
203
+ "prefix": null,
204
+ "problem_type": null,
205
+ "remove_invalid_values": false,
206
+ "repetition_penalty": 1.0,
207
+ "return_dict": true,
208
+ "return_dict_in_generate": false,
209
+ "rms_norm_eps": 1e-06,
210
+ "rope_parameters": {
211
+ "rope_theta": 10000.0,
212
+ "rope_type": "default"
213
+ },
214
+ "router_aux_loss_coef": 0.001,
215
+ "sep_token_id": null,
216
+ "sliding_window": null,
217
+ "suppress_tokens": null,
218
+ "task_specific_params": null,
219
+ "temperature": 1.0,
220
+ "tie_encoder_decoder": false,
221
+ "tie_word_embeddings": false,
222
+ "tokenizer_class": null,
223
+ "top_k": 50,
224
+ "top_p": 1.0,
225
+ "typical_p": 1.0,
226
+ "use_cache": true,
227
+ "vocab_size": 3072
228
+ },
229
+ "thinker_hidden_size": 2048,
230
+ "video_token_id": 151656,
231
+ "vision_start_token_id": 151652
232
+ },
233
+ "thinker_config": {
234
+ "audio_config": {
235
+ "_name_or_path": "",
236
+ "activation_dropout": 0,
237
+ "activation_function": "gelu",
238
+ "add_cross_attention": false,
239
+ "architectures": null,
240
+ "attention_dropout": 0,
241
+ "bad_words_ids": null,
242
+ "begin_suppress_tokens": null,
243
+ "bos_token_id": null,
244
+ "chunk_size_feed_forward": 0,
245
+ "conv_chunksize": 500,
246
+ "cross_attention_hidden_size": null,
247
+ "d_model": 1280,
248
+ "decoder_start_token_id": null,
249
+ "diversity_penalty": 0.0,
250
+ "do_sample": false,
251
+ "downsample_hidden_size": 480,
252
+ "dropout": 0,
253
+ "dtype": null,
254
+ "early_stopping": false,
255
+ "encoder_attention_heads": 20,
256
+ "encoder_ffn_dim": 5120,
257
+ "encoder_layers": 32,
258
+ "encoder_no_repeat_ngram_size": 0,
259
+ "eos_token_id": null,
260
+ "exponential_decay_length_penalty": null,
261
+ "finetuning_task": null,
262
+ "forced_bos_token_id": null,
263
+ "forced_eos_token_id": null,
264
+ "id2label": {
265
+ "0": "LABEL_0",
266
+ "1": "LABEL_1"
267
+ },
268
+ "initializer_range": 0.02,
269
+ "is_decoder": false,
270
+ "is_encoder_decoder": false,
271
+ "label2id": {
272
+ "LABEL_0": 0,
273
+ "LABEL_1": 1
274
+ },
275
+ "length_penalty": 1.0,
276
+ "max_length": 20,
277
+ "max_source_positions": 1500,
278
+ "min_length": 0,
279
+ "model_type": "qwen3_omni_moe_audio_encoder",
280
+ "n_window": 50,
281
+ "n_window_infer": 800,
282
+ "no_repeat_ngram_size": 0,
283
+ "num_beam_groups": 1,
284
+ "num_beams": 1,
285
+ "num_hidden_layers": 32,
286
+ "num_mel_bins": 128,
287
+ "num_return_sequences": 1,
288
+ "output_attentions": false,
289
+ "output_dim": 2048,
290
+ "output_hidden_states": false,
291
+ "output_scores": false,
292
+ "pad_token_id": null,
293
+ "prefix": null,
294
+ "problem_type": null,
295
+ "pruned_heads": {},
296
+ "remove_invalid_values": false,
297
+ "repetition_penalty": 1.0,
298
+ "return_dict": true,
299
+ "return_dict_in_generate": false,
300
+ "scale_embedding": false,
301
+ "sep_token_id": null,
302
+ "suppress_tokens": null,
303
+ "task_specific_params": null,
304
+ "temperature": 1.0,
305
+ "tf_legacy_loss": false,
306
+ "tie_encoder_decoder": false,
307
+ "tie_word_embeddings": true,
308
+ "tokenizer_class": null,
309
+ "top_k": 50,
310
+ "top_p": 1.0,
311
+ "torchscript": false,
312
+ "typical_p": 1.0,
313
+ "use_bfloat16": false
314
+ },
315
+ "audio_end_token_id": 151670,
316
+ "audio_start_token_id": 151669,
317
+ "audio_token_id": 151675,
318
+ "dtype": "bfloat16",
319
+ "image_token_id": 151655,
320
+ "initializer_range": 0.02,
321
+ "model_type": "qwen3_omni_moe_thinker",
322
+ "position_id_per_seconds": 13,
323
+ "seconds_per_chunk": 2,
324
+ "text_config": {
325
+ "_name_or_path": "",
326
+ "add_cross_attention": false,
327
+ "architectures": null,
328
+ "attention_bias": false,
329
+ "attention_dropout": 0.0,
330
+ "bad_words_ids": null,
331
+ "begin_suppress_tokens": null,
332
+ "bos_token_id": null,
333
+ "chunk_size_feed_forward": 0,
334
+ "cross_attention_hidden_size": null,
335
+ "decoder_sparse_step": 1,
336
+ "decoder_start_token_id": null,
337
+ "diversity_penalty": 0.0,
338
+ "do_sample": false,
339
+ "dtype": null,
340
+ "early_stopping": false,
341
+ "encoder_no_repeat_ngram_size": 0,
342
+ "eos_token_id": null,
343
+ "exponential_decay_length_penalty": null,
344
+ "finetuning_task": null,
345
+ "forced_bos_token_id": null,
346
+ "forced_eos_token_id": null,
347
+ "head_dim": 128,
348
+ "hidden_act": "silu",
349
+ "hidden_size": 2048,
350
+ "id2label": {
351
+ "0": "LABEL_0",
352
+ "1": "LABEL_1"
353
+ },
354
+ "initializer_range": 0.02,
355
+ "intermediate_size": 768,
356
+ "is_decoder": false,
357
+ "is_encoder_decoder": false,
358
+ "label2id": {
359
+ "LABEL_0": 0,
360
+ "LABEL_1": 1
361
+ },
362
+ "length_penalty": 1.0,
363
+ "max_length": 20,
364
+ "max_position_embeddings": 65536,
365
+ "min_length": 0,
366
+ "mlp_only_layers": [],
367
+ "model_type": "qwen3_omni_moe_text",
368
+ "moe_intermediate_size": 768,
369
+ "no_repeat_ngram_size": 0,
370
+ "norm_topk_prob": true,
371
+ "num_attention_heads": 32,
372
+ "num_beam_groups": 1,
373
+ "num_beams": 1,
374
+ "num_experts": 128,
375
+ "num_experts_per_tok": 8,
376
+ "num_hidden_layers": 48,
377
+ "num_key_value_heads": 4,
378
+ "num_return_sequences": 1,
379
+ "output_attentions": false,
380
+ "output_hidden_states": false,
381
+ "output_router_logits": false,
382
+ "output_scores": false,
383
+ "pad_token_id": null,
384
+ "prefix": null,
385
+ "problem_type": null,
386
+ "pruned_heads": {},
387
+ "remove_invalid_values": false,
388
+ "repetition_penalty": 1.0,
389
+ "return_dict": true,
390
+ "return_dict_in_generate": false,
391
+ "rms_norm_eps": 1e-06,
392
+ "rope_parameters": {
393
+ "interleaved": true,
394
+ "mrope_interleaved": true,
395
+ "mrope_section": [
396
+ 24,
397
+ 20,
398
+ 20
399
+ ],
400
+ "rope_theta": 1000000,
401
+ "rope_type": "default",
402
+ "type": "default"
403
+ },
404
+ "rope_theta": 1000000,
405
+ "router_aux_loss_coef": 0.001,
406
+ "sep_token_id": null,
407
+ "shared_expert_intermediate_size": 0,
408
+ "sliding_window": null,
409
+ "suppress_tokens": null,
410
+ "task_specific_params": null,
411
+ "temperature": 1.0,
412
+ "tf_legacy_loss": false,
413
+ "tie_encoder_decoder": false,
414
+ "tie_word_embeddings": false,
415
+ "tokenizer_class": null,
416
+ "top_k": 50,
417
+ "top_p": 1.0,
418
+ "torchscript": false,
419
+ "typical_p": 1.0,
420
+ "use_bfloat16": false,
421
+ "use_cache": true,
422
+ "use_qk_norm": true,
423
+ "use_sliding_window": false,
424
+ "vocab_size": 152064
425
+ },
426
+ "user_token_id": 872,
427
+ "video_token_id": 151656,
428
+ "vision_config": {
429
+ "_name_or_path": "",
430
+ "add_cross_attention": false,
431
+ "apply_vit_abs_pos_embed": true,
432
+ "architectures": null,
433
+ "bad_words_ids": null,
434
+ "begin_suppress_tokens": null,
435
+ "bos_token_id": null,
436
+ "chunk_size_feed_forward": 0,
437
+ "cross_attention_hidden_size": null,
438
+ "decoder_start_token_id": null,
439
+ "deepstack_visual_indexes": [
440
+ 8,
441
+ 16,
442
+ 24
443
+ ],
444
+ "depth": 27,
445
+ "diversity_penalty": 0.0,
446
+ "do_sample": false,
447
+ "dtype": null,
448
+ "early_stopping": false,
449
+ "encoder_no_repeat_ngram_size": 0,
450
+ "eos_token_id": null,
451
+ "exponential_decay_length_penalty": null,
452
+ "finetuning_task": null,
453
+ "forced_bos_token_id": null,
454
+ "forced_eos_token_id": null,
455
+ "hidden_act": "gelu_pytorch_tanh",
456
+ "hidden_size": 1152,
457
+ "id2label": {
458
+ "0": "LABEL_0",
459
+ "1": "LABEL_1"
460
+ },
461
+ "image_size": 768,
462
+ "in_channels": 3,
463
+ "in_chans": 3,
464
+ "initializer_range": 0.02,
465
+ "intermediate_size": 4304,
466
+ "is_decoder": false,
467
+ "is_encoder_decoder": false,
468
+ "label2id": {
469
+ "LABEL_0": 0,
470
+ "LABEL_1": 1
471
+ },
472
+ "length_penalty": 1.0,
473
+ "max_length": 20,
474
+ "min_length": 0,
475
+ "model_type": "qwen3_omni_moe_vision_encoder",
476
+ "no_repeat_ngram_size": 0,
477
+ "num_beam_groups": 1,
478
+ "num_beams": 1,
479
+ "num_heads": 16,
480
+ "num_position_embeddings": 2304,
481
+ "num_return_sequences": 1,
482
+ "out_hidden_size": 2048,
483
+ "output_attentions": false,
484
+ "output_hidden_states": false,
485
+ "output_scores": false,
486
+ "pad_token_id": null,
487
+ "patch_size": 16,
488
+ "prefix": null,
489
+ "problem_type": null,
490
+ "pruned_heads": {},
491
+ "remove_invalid_values": false,
492
+ "repetition_penalty": 1.0,
493
+ "return_dict": true,
494
+ "return_dict_in_generate": false,
495
+ "sep_token_id": null,
496
+ "spatial_merge_size": 2,
497
+ "spatial_patch_size": 16,
498
+ "suppress_tokens": null,
499
+ "task_specific_params": null,
500
+ "temperature": 1.0,
501
+ "temporal_patch_size": 2,
502
+ "tf_legacy_loss": false,
503
+ "tie_encoder_decoder": false,
504
+ "tie_word_embeddings": true,
505
+ "tokenizer_class": null,
506
+ "tokens_per_second": 2,
507
+ "top_k": 50,
508
+ "top_p": 1.0,
509
+ "torchscript": false,
510
+ "typical_p": 1.0,
511
+ "use_bfloat16": false
512
+ },
513
+ "vision_end_token_id": 151653,
514
+ "vision_start_token_id": 151652
515
+ },
516
+ "transformers_version": "5.0.0.dev0",
517
+ "tts_bos_token_id": 151672,
518
+ "tts_eos_token_id": 151673,
519
+ "tts_pad_token_id": 151671,
520
+ "user_token_id": 872
521
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "max_new_tokens": 32768,
4
+ "temperature": 0.7,
5
+ "top_p": 0.9,
6
+ "transformers_version": "5.0.0.dev0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2783e4ed9f9405a4d96ec2b91d41dce4d73c6c9b7f5d560b13b5acb64ffc53c9
3
+ size 4997899632
model-00002-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6efb0ed57e720589b6c59495949a2a1d9a5aada6f49e7f18192971b2619139c5
3
+ size 4997754216
model-00003-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8775ff5e8168d752a8020dc394c4cd6c08c4a51fb44b68e97edf0fa2e0916ed7
3
+ size 4997754216
model-00004-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67f55f2f5df683d052efb428141dcbea8bf1668775ab92c18778276616fb9c9f
3
+ size 4997755648
model-00005-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0edaa8bf2f0d9c9a026046757658f8495d979be8d577b9927427f5c4dc703249
3
+ size 4997755792
model-00006-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852acd9d9222df7876fba01f7474276922a22472672cdb37f6c199bf828fa91b
3
+ size 4997755792
model-00007-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d98efef30350017566a7893afdfa3d620d408868863da0b872b35f7705a5a7
3
+ size 4997755792
model-00008-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6da2794925e9a1dea70793878e1e66df83e758d0ad8121c21541173bb9769cc0
3
+ size 4997755792
model-00009-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e943d86b5f12523e6bf6d471b1092e63ddeb75af7825c791407d1d8973a96428
3
+ size 4997755792
model-00010-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dca32d7a5eb8ef6acc5f1043f4599b74a241d057482e1e1bc24fa4f9edeab46
3
+ size 4997755792
model-00011-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8b381902ff2fc4f8045b0285b26e8224d995c8820338e5b4bf9ec948d38447
3
+ size 4997755792
model-00012-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5361bd3ae54b70044258efa024a85989c45bc77175fffca92117ba27048e3c3c
3
+ size 4997755792
model-00013-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d32027604af8e5e641d9f838726acaaed899afbdb06b1a923ab0e77f3f3629db
3
+ size 3467789632
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_extractor": {
3
+ "chunk_length": 30,
4
+ "dither": 0.0,
5
+ "feature_extractor_type": "WhisperFeatureExtractor",
6
+ "feature_size": 128,
7
+ "hop_length": 160,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "Qwen2VLImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "max_pixels": 12845056,
20
+ "merge_size": 2,
21
+ "min_pixels": 3136,
22
+ "n_fft": 400,
23
+ "n_samples": 480000,
24
+ "nb_max_frames": 3000,
25
+ "padding_side": "right",
26
+ "padding_value": 0.0,
27
+ "patch_size": 16,
28
+ "processor_class": "Qwen3OmniMoeProcessor",
29
+ "return_attention_mask": true,
30
+ "sampling_rate": 16000,
31
+ "temporal_patch_size": 2
32
+ },
33
+ "image_processor": {
34
+ "crop_size": null,
35
+ "data_format": "channels_first",
36
+ "device": null,
37
+ "disable_grouping": null,
38
+ "dither": 0.0,
39
+ "do_center_crop": null,
40
+ "do_convert_rgb": true,
41
+ "do_normalize": true,
42
+ "do_pad": null,
43
+ "do_rescale": true,
44
+ "do_resize": true,
45
+ "feature_size": 128,
46
+ "hop_length": 160,
47
+ "image_mean": [
48
+ 0.5,
49
+ 0.5,
50
+ 0.5
51
+ ],
52
+ "image_processor_type": "Qwen2VLImageProcessorFast",
53
+ "image_std": [
54
+ 0.5,
55
+ 0.5,
56
+ 0.5
57
+ ],
58
+ "input_data_format": null,
59
+ "max_pixels": 12845056,
60
+ "merge_size": 2,
61
+ "min_pixels": 3136,
62
+ "n_fft": 400,
63
+ "n_samples": 4800000,
64
+ "nb_max_frames": 30000,
65
+ "pad_size": null,
66
+ "padding_side": "right",
67
+ "padding_value": 0.0,
68
+ "patch_size": 16,
69
+ "processor_class": "Qwen3OmniMoeProcessor",
70
+ "resample": 3,
71
+ "rescale_factor": 0.00392156862745098,
72
+ "return_attention_mask": true,
73
+ "return_tensors": null,
74
+ "sampling_rate": 16000,
75
+ "size": {
76
+ "longest_edge": 12845056,
77
+ "shortest_edge": 3136
78
+ },
79
+ "temporal_patch_size": 2
80
+ },
81
+ "processor_class": "Qwen3OmniMoeProcessor",
82
+ "video_processor": {
83
+ "crop_size": null,
84
+ "data_format": "channels_first",
85
+ "default_to_square": true,
86
+ "device": null,
87
+ "dither": 0.0,
88
+ "do_center_crop": null,
89
+ "do_convert_rgb": true,
90
+ "do_normalize": true,
91
+ "do_pad": null,
92
+ "do_rescale": true,
93
+ "do_resize": true,
94
+ "do_sample_frames": false,
95
+ "feature_extractor_type": "WhisperFeatureExtractor",
96
+ "feature_size": 128,
97
+ "fps": null,
98
+ "hop_length": 160,
99
+ "image_mean": [
100
+ 0.5,
101
+ 0.5,
102
+ 0.5
103
+ ],
104
+ "image_std": [
105
+ 0.5,
106
+ 0.5,
107
+ 0.5
108
+ ],
109
+ "input_data_format": null,
110
+ "max_frames": 768,
111
+ "max_pixels": 12845056,
112
+ "merge_size": 2,
113
+ "min_frames": 4,
114
+ "min_pixels": 3136,
115
+ "n_fft": 400,
116
+ "n_samples": 4800000,
117
+ "nb_max_frames": 30000,
118
+ "num_frames": null,
119
+ "pad_size": null,
120
+ "padding_side": "right",
121
+ "padding_value": 0.0,
122
+ "patch_size": 16,
123
+ "processor_class": "Qwen3OmniMoeProcessor",
124
+ "resample": 3,
125
+ "rescale_factor": 0.00392156862745098,
126
+ "return_attention_mask": true,
127
+ "return_metadata": false,
128
+ "return_tensors": null,
129
+ "sampling_rate": 16000,
130
+ "size": {
131
+ "longest_edge": 12845056,
132
+ "shortest_edge": 3136
133
+ },
134
+ "temporal_patch_size": 2,
135
+ "video_metadata": null,
136
+ "video_processor_type": "Qwen2VLVideoProcessor"
137
+ }
138
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>",
16
+ "<|audio_start|>",
17
+ "<|audio_end|>",
18
+ "<tts_pad>",
19
+ "<tts_text_bos>",
20
+ "<tts_text_bos_single>",
21
+ "<|audio_pad|>"
22
+ ],
23
+ "audio_bos_token": "<|audio_start|>",
24
+ "audio_eos_token": "<|audio_end|>",
25
+ "audio_token": "<|audio_pad|>",
26
+ "eos_token": {
27
+ "content": "<|im_end|>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "image_token": "<|image_pad|>",
34
+ "pad_token": {
35
+ "content": "<|endoftext|>",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ },
41
+ "video_token": "<|video_pad|>",
42
+ "vision_bos_token": "<|vision_start|>",
43
+ "vision_eos_token": "<|vision_end|>"
44
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09267689b8362020b9763b65dd5be7e086b31e28d72e02837a9e781de9a91bc7
3
+ size 11423986
tokenizer_config.json ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ },
213
+ "151669": {
214
+ "content": "<|audio_start|>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "151670": {
222
+ "content": "<|audio_end|>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "151671": {
230
+ "content": "<tts_pad>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "151672": {
238
+ "content": "<tts_text_bos>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "151673": {
246
+ "content": "<tts_text_eod>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "151674": {
254
+ "content": "<tts_text_bos_single>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "151675": {
262
+ "content": "<|audio_pad|>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ }
269
+ },
270
+ "additional_special_tokens": [
271
+ "<|im_start|>",
272
+ "<|im_end|>",
273
+ "<|object_ref_start|>",
274
+ "<|object_ref_end|>",
275
+ "<|box_start|>",
276
+ "<|box_end|>",
277
+ "<|quad_start|>",
278
+ "<|quad_end|>",
279
+ "<|vision_start|>",
280
+ "<|vision_end|>",
281
+ "<|vision_pad|>",
282
+ "<|image_pad|>",
283
+ "<|video_pad|>",
284
+ "<|audio_start|>",
285
+ "<|audio_end|>",
286
+ "<tts_pad>",
287
+ "<tts_text_bos>",
288
+ "<tts_text_bos_single>",
289
+ "<|audio_pad|>"
290
+ ],
291
+ "audio_bos_token": "<|audio_start|>",
292
+ "audio_eos_token": "<|audio_end|>",
293
+ "audio_token": "<|audio_pad|>",
294
+ "bos_token": null,
295
+ "clean_up_tokenization_spaces": false,
296
+ "eos_token": "<|im_end|>",
297
+ "errors": "replace",
298
+ "extra_special_tokens": {
299
+ "audio_bos_token": "<|audio_start|>",
300
+ "audio_eos_token": "<|audio_end|>",
301
+ "audio_token": "<|audio_pad|>",
302
+ "image_token": "<|image_pad|>",
303
+ "video_token": "<|video_pad|>",
304
+ "vision_bos_token": "<|vision_start|>",
305
+ "vision_eos_token": "<|vision_end|>"
306
+ },
307
+ "image_token": "<|image_pad|>",
308
+ "model_max_length": 131072,
309
+ "pad_token": "<|endoftext|>",
310
+ "processor_class": "Qwen3OmniMoeProcessor",
311
+ "split_special_tokens": false,
312
+ "tokenizer_class": "Qwen2Tokenizer",
313
+ "unk_token": null,
314
+ "video_token": "<|video_pad|>",
315
+ "vision_bos_token": "<|vision_start|>",
316
+ "vision_eos_token": "<|vision_end|>"
317
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff