metascroy commited on
Commit
c5f16da
·
verified ·
1 Parent(s): 120618b

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

Files changed (4) hide show
  1. README.md +1 -0
  2. config.json +161 -0
  3. generation_config.json +9 -0
  4. pytorch_model.bin +3 -0
README.md CHANGED
@@ -6,6 +6,7 @@ tags:
6
  - unsloth
7
  - mistral3
8
  - trl
 
9
  license: apache-2.0
10
  language:
11
  - en
 
6
  - unsloth
7
  - mistral3
8
  - trl
9
+ - sft
10
  license: apache-2.0
11
  language:
12
  - en
config.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Mistral3ForConditionalGeneration"
4
+ ],
5
+ "bos_token_id": 1,
6
+ "dtype": "bfloat16",
7
+ "eos_token_id": 2,
8
+ "image_token_index": 10,
9
+ "model_type": "mistral3",
10
+ "multimodal_projector_bias": false,
11
+ "pad_token_id": 11,
12
+ "projector_hidden_act": "gelu",
13
+ "quantization_config": {
14
+ "include_input_output_embeddings": true,
15
+ "modules_to_not_convert": [],
16
+ "quant_method": "torchao",
17
+ "quant_type": {
18
+ "default": {
19
+ "_data": {
20
+ "module_fqn_to_config": {
21
+ "_default": {
22
+ "_data": {
23
+ "act_mapping_type": {
24
+ "_data": "ASYMMETRIC",
25
+ "_type": "MappingType"
26
+ },
27
+ "intx_choose_qparams_algorithm": {
28
+ "_data": "AFFINE",
29
+ "_type": "IntxChooseQParamsAlgorithm"
30
+ },
31
+ "intx_packing_format": {
32
+ "_data": "UNPACKED_TO_INT8",
33
+ "_type": "IntxPackingFormat"
34
+ },
35
+ "layout": {
36
+ "_data": {},
37
+ "_type": "QDQLayout",
38
+ "_version": 1
39
+ },
40
+ "weight_dtype": {
41
+ "_data": "int4",
42
+ "_type": "torch.dtype"
43
+ },
44
+ "weight_granularity": {
45
+ "_data": {
46
+ "group_size": 32
47
+ },
48
+ "_type": "PerGroup",
49
+ "_version": 1
50
+ },
51
+ "weight_mapping_type": {
52
+ "_data": "SYMMETRIC",
53
+ "_type": "MappingType"
54
+ },
55
+ "weight_scale_dtype": null
56
+ },
57
+ "_type": "Int8DynamicActivationIntxWeightConfig",
58
+ "_version": 2
59
+ },
60
+ "model.language_model.embed_tokens": {
61
+ "_data": {
62
+ "granularity": {
63
+ "_data": {
64
+ "axis": 0
65
+ },
66
+ "_type": "PerAxis",
67
+ "_version": 1
68
+ },
69
+ "intx_choose_qparams_algorithm": {
70
+ "_data": "AFFINE",
71
+ "_type": "IntxChooseQParamsAlgorithm"
72
+ },
73
+ "intx_packing_format": {
74
+ "_data": "UNPACKED_TO_INT8",
75
+ "_type": "IntxPackingFormat"
76
+ },
77
+ "layout": {
78
+ "_data": {},
79
+ "_type": "QDQLayout",
80
+ "_version": 1
81
+ },
82
+ "mapping_type": {
83
+ "_data": "SYMMETRIC",
84
+ "_type": "MappingType"
85
+ },
86
+ "scale_dtype": null,
87
+ "weight_dtype": {
88
+ "_data": "int8",
89
+ "_type": "torch.dtype"
90
+ }
91
+ },
92
+ "_type": "IntxWeightOnlyConfig",
93
+ "_version": 2
94
+ }
95
+ }
96
+ },
97
+ "_type": "ModuleFqnToConfig",
98
+ "_version": 1
99
+ }
100
+ },
101
+ "quant_type_kwargs": {},
102
+ "untie_embedding_weights": false
103
+ },
104
+ "spatial_merge_size": 2,
105
+ "text_config": {
106
+ "attention_dropout": 0.0,
107
+ "dtype": "bfloat16",
108
+ "head_dim": 128,
109
+ "hidden_act": "silu",
110
+ "hidden_size": 3072,
111
+ "initializer_range": 0.02,
112
+ "intermediate_size": 9216,
113
+ "max_position_embeddings": 262144,
114
+ "model_type": "ministral3",
115
+ "num_attention_heads": 32,
116
+ "num_hidden_layers": 26,
117
+ "num_key_value_heads": 8,
118
+ "rms_norm_eps": 1e-05,
119
+ "rope_parameters": {
120
+ "beta_fast": 32.0,
121
+ "beta_slow": 1.0,
122
+ "factor": 16.0,
123
+ "llama_4_scaling_beta": 0.1,
124
+ "mscale": 1.0,
125
+ "mscale_all_dim": 1.0,
126
+ "original_max_position_embeddings": 16384,
127
+ "rope_theta": 1000000.0,
128
+ "rope_type": "yarn",
129
+ "type": "yarn"
130
+ },
131
+ "sliding_window": null,
132
+ "tie_word_embeddings": true,
133
+ "use_cache": true,
134
+ "vocab_size": 131072
135
+ },
136
+ "tie_word_embeddings": false,
137
+ "transformers_version": "5.0.0.dev0",
138
+ "unsloth_fixed": true,
139
+ "unsloth_version": "2025.11.6",
140
+ "use_cache": false,
141
+ "vision_config": {
142
+ "attention_dropout": 0.0,
143
+ "dtype": "bfloat16",
144
+ "head_dim": 64,
145
+ "hidden_act": "silu",
146
+ "hidden_size": 1024,
147
+ "image_size": 1540,
148
+ "initializer_range": 0.02,
149
+ "intermediate_size": 4096,
150
+ "model_type": "pixtral",
151
+ "num_attention_heads": 16,
152
+ "num_channels": 3,
153
+ "num_hidden_layers": 24,
154
+ "patch_size": 14,
155
+ "rope_parameters": {
156
+ "rope_theta": 10000.0,
157
+ "rope_type": "default"
158
+ }
159
+ },
160
+ "vision_feature_layer": -1
161
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": [
4
+ 2
5
+ ],
6
+ "max_length": 262144,
7
+ "pad_token_id": 11,
8
+ "transformers_version": "5.0.0.dev0"
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c722db6ea52767d6cf86bd06eda3f195409245b8f23b992fe8d377d85413a29
3
+ size 4614146316