hermeschen-ezcon commited on
Commit
65ba22d
·
verified ·
1 Parent(s): 97f2757

Upload folder using huggingface_hub

Browse files
openvino_config.json CHANGED
@@ -1,29 +0,0 @@
1
- {
2
- "dtype": "int8",
3
- "input_info": null,
4
- "optimum_version": "1.27.0",
5
- "output_attentions": false,
6
- "quantization_config": {
7
- "all_layers": null,
8
- "backup_precision": null,
9
- "bits": 8,
10
- "dataset": null,
11
- "dtype": "int8",
12
- "gptq": null,
13
- "group_size": -1,
14
- "ignored_scope": null,
15
- "lora_correction": null,
16
- "num_samples": null,
17
- "processor": null,
18
- "quant_method": "default",
19
- "ratio": 1.0,
20
- "scale_estimation": null,
21
- "sensitivity_metric": null,
22
- "statistics_path": null,
23
- "sym": true,
24
- "tokenizer": null,
25
- "trust_remote_code": true
26
- },
27
- "save_onnx_model": false,
28
- "transformers_version": "4.53.3"
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openvino_language_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dcfecc97a22841bad03a5322e5af7f67e596a882c35262256b7561b5f4e3f48
3
- size 3883912598
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a252f7d49b9bf50af2241860f3586d6cdac4d1b735a33742d7cbe5f7b04f2e6
3
+ size 3785170944
openvino_language_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
openvino_text_embeddings_model.bin CHANGED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e553b311e37513a98686050205168a18d7d0cd5345614f970bd05845e914a520
3
- size 671776904
 
 
 
 
openvino_text_embeddings_model.xml CHANGED
@@ -1,210 +0,0 @@
1
- <?xml version="1.0"?>
2
- <net name="Model3" version="11">
3
- <layers>
4
- <layer id="0" name="input" type="Parameter" version="opset1">
5
- <data shape="?,?" element_type="i64" />
6
- <output>
7
- <port id="0" precision="I64" names="input">
8
- <dim>-1</dim>
9
- <dim>-1</dim>
10
- </port>
11
- </output>
12
- </layer>
13
- <layer id="1" name="self.weight" type="Const" version="opset1">
14
- <data element_type="i8" shape="262208, 2560" offset="0" size="671252480" />
15
- <output>
16
- <port id="0" precision="I8">
17
- <dim>262208</dim>
18
- <dim>2560</dim>
19
- </port>
20
- </output>
21
- </layer>
22
- <layer id="2" name="Convert_1127436" type="Convert" version="opset1">
23
- <data destination_type="f16" />
24
- <input>
25
- <port id="0" precision="I8">
26
- <dim>262208</dim>
27
- <dim>2560</dim>
28
- </port>
29
- </input>
30
- <output>
31
- <port id="1" precision="FP16">
32
- <dim>262208</dim>
33
- <dim>2560</dim>
34
- </port>
35
- </output>
36
- </layer>
37
- <layer id="3" name="self.weight/scale" type="Const" version="opset1">
38
- <data element_type="f16" shape="262208, 1" offset="671252480" size="524416" />
39
- <output>
40
- <port id="0" precision="FP16">
41
- <dim>262208</dim>
42
- <dim>1</dim>
43
- </port>
44
- </output>
45
- </layer>
46
- <layer id="4" name="self.weight/fq_weights_0" type="Multiply" version="opset1">
47
- <data auto_broadcast="numpy" />
48
- <input>
49
- <port id="0" precision="FP16">
50
- <dim>262208</dim>
51
- <dim>2560</dim>
52
- </port>
53
- <port id="1" precision="FP16">
54
- <dim>262208</dim>
55
- <dim>1</dim>
56
- </port>
57
- </input>
58
- <output>
59
- <port id="2" precision="FP16">
60
- <dim>262208</dim>
61
- <dim>2560</dim>
62
- </port>
63
- </output>
64
- </layer>
65
- <layer id="5" name="self.weight/fq_weights_0/convert" type="Convert" version="opset1">
66
- <data destination_type="f32" />
67
- <input>
68
- <port id="0" precision="FP16">
69
- <dim>262208</dim>
70
- <dim>2560</dim>
71
- </port>
72
- </input>
73
- <output>
74
- <port id="1" precision="FP32">
75
- <dim>262208</dim>
76
- <dim>2560</dim>
77
- </port>
78
- </output>
79
- </layer>
80
- <layer id="6" name="aten::embedding/Convert" type="Convert" version="opset1">
81
- <data destination_type="i32" />
82
- <input>
83
- <port id="0" precision="I64">
84
- <dim>-1</dim>
85
- <dim>-1</dim>
86
- </port>
87
- </input>
88
- <output>
89
- <port id="1" precision="I32">
90
- <dim>-1</dim>
91
- <dim>-1</dim>
92
- </port>
93
- </output>
94
- </layer>
95
- <layer id="7" name="aten::embedding/Constant" type="Const" version="opset1">
96
- <data element_type="i32" shape="" offset="671776896" size="4" />
97
- <output>
98
- <port id="0" precision="I32" />
99
- </output>
100
- </layer>
101
- <layer id="8" name="aten::embedding/Gather" type="Gather" version="opset8">
102
- <data batch_dims="0" />
103
- <input>
104
- <port id="0" precision="FP32">
105
- <dim>262208</dim>
106
- <dim>2560</dim>
107
- </port>
108
- <port id="1" precision="I32">
109
- <dim>-1</dim>
110
- <dim>-1</dim>
111
- </port>
112
- <port id="2" precision="I32" />
113
- </input>
114
- <output>
115
- <port id="3" precision="FP32" names="7">
116
- <dim>-1</dim>
117
- <dim>-1</dim>
118
- <dim>2560</dim>
119
- </port>
120
- </output>
121
- </layer>
122
- <layer id="9" name="Constant_21226" type="Const" version="opset1">
123
- <data element_type="f32" shape="1, 1, 1" offset="671776900" size="4" />
124
- <output>
125
- <port id="0" precision="FP32">
126
- <dim>1</dim>
127
- <dim>1</dim>
128
- <dim>1</dim>
129
- </port>
130
- </output>
131
- </layer>
132
- <layer id="10" name="aten::mul/Multiply" type="Multiply" version="opset1">
133
- <data auto_broadcast="numpy" />
134
- <input>
135
- <port id="0" precision="FP32">
136
- <dim>-1</dim>
137
- <dim>-1</dim>
138
- <dim>2560</dim>
139
- </port>
140
- <port id="1" precision="FP32">
141
- <dim>1</dim>
142
- <dim>1</dim>
143
- <dim>1</dim>
144
- </port>
145
- </input>
146
- <output>
147
- <port id="2" precision="FP32" names="inputs_embeds">
148
- <dim>-1</dim>
149
- <dim>-1</dim>
150
- <dim>2560</dim>
151
- </port>
152
- </output>
153
- </layer>
154
- <layer id="11" name="Result_19299" type="Result" version="opset1" output_names="inputs_embeds">
155
- <input>
156
- <port id="0" precision="FP32">
157
- <dim>-1</dim>
158
- <dim>-1</dim>
159
- <dim>2560</dim>
160
- </port>
161
- </input>
162
- </layer>
163
- </layers>
164
- <edges>
165
- <edge from-layer="0" from-port="0" to-layer="6" to-port="0" />
166
- <edge from-layer="1" from-port="0" to-layer="2" to-port="0" />
167
- <edge from-layer="2" from-port="1" to-layer="4" to-port="0" />
168
- <edge from-layer="3" from-port="0" to-layer="4" to-port="1" />
169
- <edge from-layer="4" from-port="2" to-layer="5" to-port="0" />
170
- <edge from-layer="5" from-port="1" to-layer="8" to-port="0" />
171
- <edge from-layer="6" from-port="1" to-layer="8" to-port="1" />
172
- <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
173
- <edge from-layer="8" from-port="3" to-layer="10" to-port="0" />
174
- <edge from-layer="9" from-port="0" to-layer="10" to-port="1" />
175
- <edge from-layer="10" from-port="2" to-layer="11" to-port="0" />
176
- </edges>
177
- <rt_info>
178
- <Runtime_version value="2025.3.0-19807-44526285f24-releases/2025/3" />
179
- <conversion_parameters>
180
- <framework value="pytorch" />
181
- <is_python_object value="True" />
182
- </conversion_parameters>
183
- <nncf>
184
- <friendly_names_were_updated value="True" />
185
- <version value="2.18.0" />
186
- <weight_compression>
187
- <advanced_parameters value="{'statistics_path': None, 'lora_adapter_rank': 256, 'group_size_fallback_mode': 'ignore', 'min_adjusted_group_size': 16, 'awq_params': {'subset_size': 32, 'percent_to_apply': 0.002, 'alpha_min': 0.0, 'alpha_max': 1.0, 'steps': 100, 'prefer_data_aware_scaling': True}, 'scale_estimation_params': {'subset_size': 64, 'initial_steps': 5, 'scale_steps': 5, 'weight_penalty': -1.0}, 'gptq_params': {'damp_percent': 0.1, 'block_size': 128, 'subset_size': 128}, 'lora_correction_params': {'adapter_rank': 8, 'num_iterations': 3, 'apply_regularization': True, 'subset_size': 128, 'use_int8_adapters': True}, 'backend_params': {}, 'codebook': None}" />
188
- <all_layers value="False" />
189
- <awq value="False" />
190
- <backup_mode value="int8_asym" />
191
- <compression_format value="dequantize" />
192
- <gptq value="False" />
193
- <group_size value="-1" />
194
- <ignored_scope value="[]" />
195
- <lora_correction value="False" />
196
- <mode value="int8_sym" />
197
- <ratio value="1.0" />
198
- <scale_estimation value="False" />
199
- <sensitivity_metric value="weight_quantization_error" />
200
- </weight_compression>
201
- </nncf>
202
- <optimum>
203
- <nncf_version value="2.18.0" />
204
- <optimum_intel_version value="1.25.2" />
205
- <optimum_version value="1.27.0" />
206
- <pytorch_version value="2.8.0" />
207
- <transformers_version value="4.53.3" />
208
- </optimum>
209
- </rt_info>
210
- </net>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openvino_vision_embeddings_model.bin CHANGED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:116da4ca12e5c4cf0f89c35bc54f7e95435145941d826bfc029082266dc3d4cc
3
- size 425761156
 
 
 
 
openvino_vision_embeddings_model.xml CHANGED
The diff for this file is too large to render. See raw diff