czagnos commited on
Commit
79cdadd
·
verified ·
1 Parent(s): 5768b3e

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

config.json ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3nForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "conf_attention_chunk_size": 12,
7
+ "conf_attention_context_left": 13,
8
+ "conf_attention_context_right": 0,
9
+ "conf_attention_logit_cap": 50.0,
10
+ "conf_conv_kernel_size": 5,
11
+ "conf_num_attention_heads": 8,
12
+ "conf_num_hidden_layers": 12,
13
+ "conf_reduction_factor": 4,
14
+ "conf_residual_weight": 0.5,
15
+ "gradient_clipping": 10000000000.0,
16
+ "hidden_size": 1536,
17
+ "input_feat_size": 128,
18
+ "model_type": "gemma3n_audio",
19
+ "rms_norm_eps": 1e-06,
20
+ "sscp_conv_channel_size": [
21
+ 128,
22
+ 32
23
+ ],
24
+ "sscp_conv_group_norm_eps": 0.001,
25
+ "sscp_conv_kernel_size": [
26
+ [
27
+ 3,
28
+ 3
29
+ ],
30
+ [
31
+ 3,
32
+ 3
33
+ ]
34
+ ],
35
+ "sscp_conv_stride_size": [
36
+ [
37
+ 2,
38
+ 2
39
+ ],
40
+ [
41
+ 2,
42
+ 2
43
+ ]
44
+ ],
45
+ "torch_dtype": "float16",
46
+ "vocab_offset": 262272,
47
+ "vocab_size": 128
48
+ },
49
+ "audio_soft_tokens_per_image": 188,
50
+ "audio_token_id": 262273,
51
+ "boa_token_id": 256000,
52
+ "boi_token_id": 255999,
53
+ "bos_token_id": 2,
54
+ "eoa_token_id": 262272,
55
+ "eoi_token_id": 262144,
56
+ "eos_token_id": 106,
57
+ "image_token_id": 262145,
58
+ "initializer_range": 0.02,
59
+ "model_type": "gemma3n",
60
+ "pad_token_id": 0,
61
+ "quantization_config": {
62
+ "bnb_4bit_compute_dtype": "float16",
63
+ "bnb_4bit_quant_type": "nf4",
64
+ "bnb_4bit_use_double_quant": true,
65
+ "llm_int8_enable_fp32_cpu_offload": false,
66
+ "llm_int8_has_fp16_weight": false,
67
+ "llm_int8_skip_modules": null,
68
+ "llm_int8_threshold": 6.0,
69
+ "load_in_4bit": true,
70
+ "load_in_8bit": false,
71
+ "quant_method": "bitsandbytes"
72
+ },
73
+ "text_config": {
74
+ "activation_sparsity_pattern": [
75
+ 0.95,
76
+ 0.95,
77
+ 0.95,
78
+ 0.95,
79
+ 0.95,
80
+ 0.95,
81
+ 0.95,
82
+ 0.95,
83
+ 0.95,
84
+ 0.95,
85
+ 0.0,
86
+ 0.0,
87
+ 0.0,
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0,
95
+ 0.0,
96
+ 0.0,
97
+ 0.0,
98
+ 0.0,
99
+ 0.0,
100
+ 0.0,
101
+ 0.0,
102
+ 0.0,
103
+ 0.0,
104
+ 0.0
105
+ ],
106
+ "altup_active_idx": 0,
107
+ "altup_coef_clip": 120.0,
108
+ "altup_correct_scale": true,
109
+ "altup_num_inputs": 4,
110
+ "attention_bias": false,
111
+ "attention_dropout": 0.0,
112
+ "final_logit_softcapping": 30.0,
113
+ "head_dim": 256,
114
+ "hidden_activation": "gelu_pytorch_tanh",
115
+ "hidden_size": 2048,
116
+ "hidden_size_per_layer_input": 256,
117
+ "initializer_range": 0.02,
118
+ "intermediate_size": [
119
+ 8192,
120
+ 8192,
121
+ 8192,
122
+ 8192,
123
+ 8192,
124
+ 8192,
125
+ 8192,
126
+ 8192,
127
+ 8192,
128
+ 8192,
129
+ 8192,
130
+ 8192,
131
+ 8192,
132
+ 8192,
133
+ 8192,
134
+ 8192,
135
+ 8192,
136
+ 8192,
137
+ 8192,
138
+ 8192,
139
+ 8192,
140
+ 8192,
141
+ 8192,
142
+ 8192,
143
+ 8192,
144
+ 8192,
145
+ 8192,
146
+ 8192,
147
+ 8192,
148
+ 8192
149
+ ],
150
+ "laurel_rank": 64,
151
+ "layer_types": [
152
+ "sliding_attention",
153
+ "sliding_attention",
154
+ "sliding_attention",
155
+ "sliding_attention",
156
+ "full_attention",
157
+ "sliding_attention",
158
+ "sliding_attention",
159
+ "sliding_attention",
160
+ "sliding_attention",
161
+ "full_attention",
162
+ "sliding_attention",
163
+ "sliding_attention",
164
+ "sliding_attention",
165
+ "sliding_attention",
166
+ "full_attention",
167
+ "sliding_attention",
168
+ "sliding_attention",
169
+ "sliding_attention",
170
+ "sliding_attention",
171
+ "full_attention",
172
+ "sliding_attention",
173
+ "sliding_attention",
174
+ "sliding_attention",
175
+ "sliding_attention",
176
+ "full_attention",
177
+ "sliding_attention",
178
+ "sliding_attention",
179
+ "sliding_attention",
180
+ "sliding_attention",
181
+ "full_attention"
182
+ ],
183
+ "max_position_embeddings": 32768,
184
+ "model_type": "gemma3n_text",
185
+ "num_attention_heads": 8,
186
+ "num_hidden_layers": 30,
187
+ "num_key_value_heads": 2,
188
+ "num_kv_shared_layers": 10,
189
+ "rms_norm_eps": 1e-06,
190
+ "rope_local_base_freq": 10000.0,
191
+ "rope_scaling": null,
192
+ "rope_theta": 1000000.0,
193
+ "sliding_window": 512,
194
+ "torch_dtype": "float16",
195
+ "use_cache": true,
196
+ "vocab_size": 262400,
197
+ "vocab_size_per_layer_input": 262144
198
+ },
199
+ "torch_dtype": "float16",
200
+ "transformers_version": "4.55.0",
201
+ "unsloth_fixed": true,
202
+ "unsloth_version": "2025.8.1",
203
+ "vision_config": {
204
+ "architecture": "mobilenetv5_300m_enc",
205
+ "do_pooling": false,
206
+ "hidden_size": 2048,
207
+ "initializer_range": 0.02,
208
+ "label_names": [
209
+ "LABEL_0",
210
+ "LABEL_1"
211
+ ],
212
+ "model_args": null,
213
+ "model_type": "gemma3n_vision",
214
+ "num_classes": 2,
215
+ "rms_norm_eps": 1e-06,
216
+ "torch_dtype": "float16",
217
+ "vocab_offset": 262144,
218
+ "vocab_size": 128
219
+ },
220
+ "vision_soft_tokens_per_image": 256
221
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 106
8
+ ],
9
+ "pad_token_id": 0,
10
+ "top_k": 64,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.55.0"
13
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1adb650b1415935eb76472787515f004da44ab2c6149d38aee8005554ba9ac2
3
+ size 2650151956
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e899f4791c7152c59c11a62c14bde1d5b805e2e00541dae5b825b5337800b735
3
+ size 4994547379
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0946f7ba1c8cdaf009013c7f8fe2de410e42f86277cb23083c9b76343c5438af
3
+ size 468779864
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff