Mungert commited on
Commit
a7b1c2d
·
verified ·
0 Parent(s):

Super-squash history to reclaim storage

Browse files
.gitattributes ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ bf16-q8-0/Hermes-4-70B-bf16_q8_0-bf16_q8_0-00001-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
37
+ bf16-q8-0/Hermes-4-70B-bf16_q8_0-bf16_q8_0-00002-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
38
+ bf16-q8-0/Hermes-4-70B-bf16_q8_0-bf16_q8_0-00003-of-00003.gguf filter=lfs diff=lfs merge=lfs -text
39
+ Hermes-4-70B-q2_k_m.gguf filter=lfs diff=lfs merge=lfs -text
40
+ Hermes-4-70B-q2_k_s.gguf filter=lfs diff=lfs merge=lfs -text
41
+ Hermes-4-70B-q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text
42
+ Hermes-4-70B-q3_k_s.gguf filter=lfs diff=lfs merge=lfs -text
43
+ Hermes-4-70B-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
44
+ Hermes-4-70B-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
45
+ q6-k-m/Hermes-4-70B-q6_k_m-q6_k_m-00001-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
46
+ q6-k-m/Hermes-4-70B-q6_k_m-q6_k_m-00002-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
47
+ Hermes-4-70B-q4_k_s.gguf filter=lfs diff=lfs merge=lfs -text
48
+ q8-0/Hermes-4-70B-q8_0-q8_0-00001-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
49
+ q8-0/Hermes-4-70B-q8_0-q8_0-00002-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
50
+ Hermes-4-70B-q4_0.gguf filter=lfs diff=lfs merge=lfs -text
51
+ Hermes-4-70B-q4_1.gguf filter=lfs diff=lfs merge=lfs -text
52
+ Hermes-4-70B-q5_0.gguf filter=lfs diff=lfs merge=lfs -text
53
+ q5-1/Hermes-4-70B-q5_1-q5_1-00001-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
54
+ q5-1/Hermes-4-70B-q5_1-q5_1-00002-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
55
+ Hermes-4-70B-iq1_s.gguf filter=lfs diff=lfs merge=lfs -text
56
+ Hermes-4-70B-iq1_m.gguf filter=lfs diff=lfs merge=lfs -text
57
+ Hermes-4-70B-iq2_xs.gguf filter=lfs diff=lfs merge=lfs -text
58
+ Hermes-4-70B-iq2_xxs.gguf filter=lfs diff=lfs merge=lfs -text
59
+ Hermes-4-70B-iq2_s.gguf filter=lfs diff=lfs merge=lfs -text
60
+ Hermes-4-70B-iq2_m.gguf filter=lfs diff=lfs merge=lfs -text
61
+ Hermes-4-70B-iq3_xs.gguf filter=lfs diff=lfs merge=lfs -text
62
+ Hermes-4-70B-iq3_xxs.gguf filter=lfs diff=lfs merge=lfs -text
63
+ Hermes-4-70B-iq3_m.gguf filter=lfs diff=lfs merge=lfs -text
64
+ Hermes-4-70B-iq4_xs.gguf filter=lfs diff=lfs merge=lfs -text
65
+ Hermes-4-70B-iq4_nl.gguf filter=lfs diff=lfs merge=lfs -text
66
+ Hermes-4-70B-imatrix.gguf filter=lfs diff=lfs merge=lfs -text
67
+ bf16/Hermes-4-70B-bf16-00001-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
68
+ bf16/Hermes-4-70B-bf16-00002-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
69
+ bf16/Hermes-4-70B-bf16-00003-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
70
+ bf16/Hermes-4-70B-bf16-00004-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
Hermes-4-70B-imatrix.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bbca8872aea2ccd774d0dce14cec96ff8c8d7f9b5701beeec42a3d1518b227
3
+ size 24989760
Hermes-4-70B-iq1_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01437a85e1e79d5d9f21599ca503d2b7ab3d43cc7e24f3c9d069bcf908cb4d45
3
+ size 20656196288
Hermes-4-70B-iq1_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb26d540103de5db746f4cf0f42f19b47a85c3cea951edc11aedf79cb9311554
3
+ size 18748312256
Hermes-4-70B-iq2_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4530fbc3af980673c71d1cce6683049e93be37661954dff28569f70f2d4f8a
3
+ size 25433508544
Hermes-4-70B-iq2_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c984c4ccc224f8ba6628591cb26435f608afcd691ff1bb17637d20912943163b
3
+ size 24145857216
Hermes-4-70B-iq2_xs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b993a250478cc531ccb3e1f2242a91142dd2967f15a2c8231680ad1fc88b56fc
3
+ size 23298607808
Hermes-4-70B-iq2_xxs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8af2e3ba3e3d5a4adde7737f696f43fdb83d8f3edb2ecf29a3f6446862dff78
3
+ size 21359790784
Hermes-4-70B-iq3_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcc6ec32b755254af8017a4661472418ebaa6ccfb7eb04d606588d3be7bb4c95
3
+ size 33356712640
Hermes-4-70B-iq3_xs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71371acfa88b910d7aef1541f5ebf57fcdbf724ba144f1093c298428aa30793c
3
+ size 29940452032
Hermes-4-70B-iq3_xxs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39b234b0d43cc038bb72ac8f930887a9868f6849d7e570df4737fff3561b5e96
3
+ size 29122562752
Hermes-4-70B-iq4_nl.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90023871432a0fab488596fd173d9999c86d6e6f4d53c27d74bda6f46aac67a
3
+ size 39782746816
Hermes-4-70B-iq4_xs.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f343ed04490686dc4b4a39c3abe414efb17f65a2b890d8154e489083f4e7da99
3
+ size 38206376640
Hermes-4-70B-q2_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9115c7883680d8fafbbb502d4561d27ea18f00d8ad73075ec8363b11e554aa4d
3
+ size 26238814912
Hermes-4-70B-q2_k_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf16cb378b0582a108d745740317c2aa3d1a88259188878d75c8a94320461e4
3
+ size 25976146624
Hermes-4-70B-q3_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88d50cea9aabb5c47fe62e79e36523399ac5fdc3565dea67f97ffe8559397b31
3
+ size 34313013952
Hermes-4-70B-q3_k_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1cd313d6a5c59bb9c4115e2edf66ecffcf70ea9b26af1fcce7b4934454e6e0
3
+ size 34033928896
Hermes-4-70B-q4_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71fc779feea2ae8cbd9ceb76982eb8f8c0a14894260d44ed4d561eba1269e5c4
3
+ size 40749533888
Hermes-4-70B-q4_1.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdafa7a136cdb103261d397c77035ec35ed285f095caac051ff02578a07c1b42
3
+ size 44239719104
Hermes-4-70B-q4_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e735d408fb57fa778ea46b7231587654a79133d9493556dd890c02821ad37915
3
+ size 42917464768
Hermes-4-70B-q4_k_s.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a97d223782af1d005a26d0358fe32914a48932073227fe08ba0a79cc7360c0c9
3
+ size 40987200192
Hermes-4-70B-q5_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77fab5d8a743f1eb41b2199be6ea7929f44643b8f14080e9c27a29d1bf4015e5
3
+ size 49305914048
Hermes-4-70B-q5_k_m.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d22a79d7c517e36602b43f128665cee065b04f24b1d394b3ad24cb5cb39b349
3
+ size 50740890304
README.md ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Meta-Llama-3.1-70B
3
+ language:
4
+ - en
5
+ library_name: transformers
6
+ license: llama3
7
+ pipeline_tag: text-generation
8
+ tags:
9
+ - Llama-3.1
10
+ - instruct
11
+ - finetune
12
+ - reasoning
13
+ - hybrid-mode
14
+ - chatml
15
+ - function calling
16
+ - tool use
17
+ - json mode
18
+ - structured outputs
19
+ - atropos
20
+ - dataforge
21
+ - long context
22
+ - roleplaying
23
+ - chat
24
+ widget:
25
+ - example_title: Hermes 4
26
+ messages:
27
+ - role: system
28
+ content: You are Hermes 4, a capable, neutrally-aligned assistant. Prefer concise,
29
+ correct answers.
30
+ - role: user
31
+ content: Explain the difference between BFS and DFS to a new CS student.
32
+ model-index:
33
+ - name: Hermes-4-Llama-3.1-70B
34
+ results: []
35
+ ---
36
+
37
+ # <span style="color: #7FFF7F;">Hermes-4-70B GGUF Models</span>
38
+
39
+
40
+ ## <span style="color: #7F7FFF;">Model Generation Details</span>
41
+
42
+ This model was generated using [llama.cpp](https://github.com/ggerganov/llama.cpp) at commit [`408ff524`](https://github.com/ggerganov/llama.cpp/commit/408ff524b40baf4f51a81d42a9828200dd4fcb6b).
43
+
44
+
45
+
46
+
47
+
48
+ ---
49
+
50
+ ## <span style="color: #7FFF7F;">Quantization Beyond the IMatrix</span>
51
+
52
+ I've been experimenting with a new quantization approach that selectively elevates the precision of key layers beyond what the default IMatrix configuration provides.
53
+
54
+ In my testing, standard IMatrix quantization underperforms at lower bit depths, especially with Mixture of Experts (MoE) models. To address this, I'm using the `--tensor-type` option in `llama.cpp` to manually "bump" important layers to higher precision. You can see the implementation here:
55
+ 👉 [Layer bumping with llama.cpp](https://github.com/Mungert69/GGUFModelBuilder/blob/main/model-converter/tensor_list_builder.py)
56
+
57
+ While this does increase model file size, it significantly improves precision for a given quantization level.
58
+
59
+ ### **I'd love your feedback—have you tried this? How does it perform for you?**
60
+
61
+
62
+
63
+
64
+ ---
65
+
66
+ <a href="https://readyforquantum.com/huggingface_gguf_selection_guide.html" style="color: #7FFF7F;">
67
+ Click here to get info on choosing the right GGUF model format
68
+ </a>
69
+
70
+ ---
71
+
72
+
73
+
74
+ <!--Begin Original Model Card-->
75
+
76
+
77
+ # Hermes 4 — Llama-3.1 70B
78
+
79
+ ![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/roT9o5bMYBtQziRMlaSDf.jpeg)
80
+
81
+ ## Model Description
82
+
83
+ Hermes 4 70B is a frontier, hybrid-mode **reasoning** model based on Llama-3.1-70B by Nous Research that is aligned to **you**.
84
+
85
+ Read the Hermes 4 technical report here: <a href="https://arxiv.org/abs/2508.18255">Hermes 4 Technical Report</a>
86
+
87
+ Chat with Hermes in Nous Chat: https://chat.nousresearch.com
88
+
89
+ Training highlights include a newly synthesized post-training corpus emphasizing verified reasoning traces, massive improvements in math, code, STEM, logic, creativity, and format-faithful outputs, while preserving general assistant quality and broadly neutral alignment.
90
+
91
+
92
+ ## What’s new vs Hermes 3
93
+
94
+ - **Post-training corpus**: Massively increased dataset size from 1M samples and 1.2B tokens to **~5M samples / ~60B tokens** blended across reasoning and non-reasoning data.
95
+ - **Hybrid reasoning mode** with explicit `<think>…</think>` segments when the model decides to deliberate, and options to make your responses faster when you want.
96
+ - **Reasoning** that is top quality, expressive, improves math, code, STEM, logic, and even creative writing and subjective responses.
97
+ - **Schema adherence & structured outputs**: trained to produce valid JSON for given schemas and to repair malformed objects.
98
+ - **Much easier to steer and align**: extreme improvements on steerability, especially on reduced refusal rates.
99
+
100
+ ## Our Mission: Frontier Capabilities Aligned to You
101
+
102
+ In pursuit of the mission of producing models that are open, steerable and capable of producing the full range of human expression, while being able to be aligned to your values, we created a new benchmark, RefusalBench, that tests the models willingness to be helpful in a variety of scenarios commonly disallowed by closed and open models.
103
+
104
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/t_HvRYPEHV0pc8iS2zHHn.png)
105
+
106
+ Hermes 4 achieves SOTA on RefusalBench across all popular closed and open models in being helpful and conforming to your values, without censorship.
107
+
108
+ ## Benchmarks (Hermes 4 70B)
109
+
110
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/Sa-X7ErRF0ej20P8qBv9i.png)
111
+
112
+ > Full tables, settings, and comparisons are in the technical report.
113
+
114
+ ## Prompt Format
115
+
116
+ Hermes 4 uses Llama-3-Chat format with role headers and special tags.
117
+
118
+ **Basic chat:**
119
+ ```
120
+ <|start_header_id|>system<|end_header_id|>
121
+
122
+ You are Hermes 4. Be concise and helpful.<|eot_id|>
123
+ <|start_header_id|>user<|end_header_id|>
124
+
125
+ Explain the photoelectric effect simply.<|eot_id|>
126
+ <|start_header_id|>assistant<|end_header_id|>
127
+ ```
128
+
129
+ ### Reasoning mode
130
+
131
+ Reasoning mode can be activated with the chat template via the flag `thinking=True` or by using the following system prompt:
132
+
133
+ ```
134
+ You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside <think> </think> tags, and then provide your solution or response to the problem.
135
+ ```
136
+
137
+ Note that you can add any additional system instructions before or after this system message, and it will adjust the models policies, style, and effort of thinking, as well as its post-thinking style, format, identity, and more. You may also interleave the tool definition system message with the reasoning one.
138
+
139
+ When the model chooses to deliberate, it emits:
140
+
141
+ ```
142
+ <|start_header_id|>assistant<|end_header_id|>
143
+ <think>
144
+ …model’s internal reasoning may appear here…
145
+ </think>
146
+ Final response starts here…<|eot_id|>
147
+ ```
148
+
149
+ Additionally, we provide a flag to keep the content inbetween the `<think> ... </think>` that you can play with by setting `keep_cots=True`
150
+
151
+
152
+ ## Function Calling & Tool Use
153
+
154
+ Hermes 4 supports function/tool calls *within* a single assistant turn, produced after it's reasoning:
155
+
156
+ **System message (example):**
157
+
158
+ ```
159
+ <|start_header_id|>system<|end_header_id|>
160
+ You are a function-calling AI. Tools are provided inside <tools>…</tools>.
161
+ When appropriate, call a tool by emitting a <tool_call>{...}</tool_call> object.
162
+ After a tool responds (as <tool_response>), continue reasoning inside <think> and produce the final answer.
163
+ <tools>
164
+ {"type":"function","function":{"name":"get_weather","description":"Get weather by city","parameters":{"type":"object","properties":{"city":{"type":"string"}},"required":["city"]}}}
165
+ </tools><|eot_id|>
166
+ ```
167
+
168
+ Note that you may also simply place tool definitions into the "tools:" field of your messages, and the chat template will parse and create the system prompt for you. This also works with reasoning mode for improved accuracy of tool use.
169
+
170
+ The model will then generate tool calls within `<tool_call> {tool_call} </tool_call>` tags, for easy parsing. The tool_call tags are also added tokens, so it makes it easy to parse while streaming! There are also automatic tool parsers built-in to VLLM and SGLang for Hermes, just set the tool parser in VLLM to `hermes` and in SGLang to `qwen25`.
171
+
172
+ ## Inference Notes
173
+
174
+ - **Sampling defaults that work well:** `temperature=0.6, top_p=0.95, top_k=20`.
175
+ - **Template:** Use the Llama chat format for Hermes 4 70B and 405B as shown above, or set `add_generation_prompt=True` when using `tokenizer.apply_chat_template(...)`.
176
+
177
+ ### Transformers example
178
+
179
+ ```python
180
+ from transformers import AutoTokenizer, AutoModelForCausalLM
181
+ import torch
182
+
183
+ model_id = "NousResearch/Hermes-4-Llama-3.1-70B"
184
+
185
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
186
+ model = AutoModelForCausalLM.from_pretrained(
187
+ model_id,
188
+ torch_dtype=torch.float16,
189
+ device_map="auto"
190
+ )
191
+
192
+ messages = [
193
+ {"role":"system","content":"You are Hermes 4. Be concise."},
194
+ {"role":"user","content":"Summarize CRISPR in 3 sentences."}
195
+ ]
196
+
197
+ inputs = tokenizer.apply_chat_template(
198
+ messages, add_generation_prompt=True, return_tensors="pt"
199
+ ).to(model.device)
200
+
201
+ outputs = model.generate(
202
+ **inputs, max_new_tokens=400, temperature=0.6, top_p=0.95, top_k=20, do_sample=True
203
+ )
204
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
205
+ ```
206
+
207
+ For production serving on multi-GPU nodes, consider tensor parallel inference engines (e.g., SGLang/vLLM backends) with prefix caching.
208
+
209
+ ## Inference Providers:
210
+
211
+ ### Nous Portal:
212
+
213
+ <a href="https://portal.nousresearch.com"><img width=256 alt="chutes logo" src="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/6YytY7N0mjCnBQvWo3qtv.png"></a>
214
+
215
+ ### Chutes:
216
+
217
+ <a href="https://chutes.ai/app"><img width=256 alt="chutes logo" src="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/l14AWPv6cSvaprpwK_IWY.png"></a>
218
+
219
+ ### Nebius:
220
+
221
+ <a href="https://nebius.com/services/studio-inference-service">
222
+ <picture>
223
+ <source media="(prefers-color-scheme: dark)" srcset="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vhL0oAomFa_awBdt2KF_x.png">
224
+ <source media="(prefers-color-scheme: light)" srcset="https://cdn-uploads.huggingface.co/production/uploads/64b21cbb2fc8324fcb1dac03/LjAfeFfAz8ac5rV-iiwj5.png">
225
+ <img width=256 alt="nebius.com logo" src="https://cdn-uploads.huggingface.co/production/uploads/64b21cbb2fc8324fcb1dac03/LjAfeFfAz8ac5rV-iiwj5.png">
226
+ </picture>
227
+ </a>
228
+
229
+ ### Luminal:
230
+
231
+ <a href="https://luminalai.com/">
232
+ <img width=256 alt="luminal logo" src="https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/FIHsRdjMMP0HUjebiuJyH.png">
233
+ </a>
234
+
235
+ # Quantized / Smaller Variants
236
+
237
+ Hermes 4 is available as BF16 original weights as well as BF16 as well as FP8 variants and GGUF variants by LM Studio.
238
+
239
+ FP8: https://huggingface.co/NousResearch/Hermes-4-70B-FP8
240
+
241
+ GGUF (Courtesy of LM Studio team!):
242
+ https://huggingface.co/lmstudio-community/Hermes-4-70B-GGUF
243
+
244
+ Hermes 4 is also available in smaller sizes (e.g., 70B) with similar prompt formats.
245
+
246
+ See the Hermes 4 collection to explore them all:
247
+ https://huggingface.co/collections/NousResearch/hermes-4-collection-68a731bfd452e20816725728
248
+
249
+ # How to cite
250
+
251
+ ```bibtex
252
+ @misc{teknium2025hermes4technicalreport,
253
+ title={Hermes 4 Technical Report},
254
+ author={Ryan Teknium and Roger Jin and Jai Suphavadeeprasit and Dakota Mahan and Jeffrey Quesnelle and Joe Li and Chen Guang and Shannon Sands and Karan Malhotra},
255
+ year={2025},
256
+ eprint={2508.18255},
257
+ archivePrefix={arXiv},
258
+ primaryClass={cs.AI},
259
+ url={https://arxiv.org/abs/2508.18255},
260
+ }
261
+ ```
262
+
263
+ <!--End Original Model Card-->
264
+
265
+ ---
266
+
267
+ # <span id="testllm" style="color: #7F7FFF;">🚀 If you find these models useful</span>
268
+
269
+ Help me test my **AI-Powered Quantum Network Monitor Assistant** with **quantum-ready security checks**:
270
+
271
+ 👉 [Quantum Network Monitor](https://readyforquantum.com/?assistant=open&utm_source=huggingface&utm_medium=referral&utm_campaign=huggingface_repo_readme)
272
+
273
+
274
+ The full Open Source Code for the Quantum Network Monitor Service available at my github repos ( repos with NetworkMonitor in the name) : [Source Code Quantum Network Monitor](https://github.com/Mungert69). You will also find the code I use to quantize the models if you want to do it yourself [GGUFModelBuilder](https://github.com/Mungert69/GGUFModelBuilder)
275
+
276
+ 💬 **How to test**:
277
+ Choose an **AI assistant type**:
278
+ - `TurboLLM` (GPT-4.1-mini)
279
+ - `HugLLM` (Hugginface Open-source models)
280
+ - `TestLLM` (Experimental CPU-only)
281
+
282
+ ### **What I’m Testing**
283
+ I’m pushing the limits of **small open-source models for AI network monitoring**, specifically:
284
+ - **Function calling** against live network services
285
+ - **How small can a model go** while still handling:
286
+ - Automated **Nmap security scans**
287
+ - **Quantum-readiness checks**
288
+ - **Network Monitoring tasks**
289
+
290
+ 🟡 **TestLLM** – Current experimental model (llama.cpp on 2 CPU threads on huggingface docker space):
291
+ - ✅ **Zero-configuration setup**
292
+ - ⏳ 30s load time (slow inference but **no API costs**) . No token limited as the cost is low.
293
+ - 🔧 **Help wanted!** If you’re into **edge-device AI**, let’s collaborate!
294
+
295
+ ### **Other Assistants**
296
+ 🟢 **TurboLLM** – Uses **gpt-4.1-mini** :
297
+ - **It performs very well but unfortunatly OpenAI charges per token. For this reason tokens usage is limited.
298
+ - **Create custom cmd processors to run .net code on Quantum Network Monitor Agents**
299
+ - **Real-time network diagnostics and monitoring**
300
+ - **Security Audits**
301
+ - **Penetration testing** (Nmap/Metasploit)
302
+
303
+ 🔵 **HugLLM** – Latest Open-source models:
304
+ - 🌐 Runs on Hugging Face Inference API. Performs pretty well using the lastest models hosted on Novita.
305
+
306
+ ### 💡 **Example commands you could test**:
307
+ 1. `"Give me info on my websites SSL certificate"`
308
+ 2. `"Check if my server is using quantum safe encyption for communication"`
309
+ 3. `"Run a comprehensive security audit on my server"`
310
+ 4. '"Create a cmd processor to .. (what ever you want)" Note you need to install a [Quantum Network Monitor Agent](https://readyforquantum.com/Download/?utm_source=huggingface&utm_medium=referral&utm_campaign=huggingface_repo_readme) to run the .net code on. This is a very flexible and powerful feature. Use with caution!
311
+
312
+ ### Final Word
313
+
314
+ I fund the servers used to create these model files, run the Quantum Network Monitor service, and pay for inference from Novita and OpenAI—all out of my own pocket. All the code behind the model creation and the Quantum Network Monitor project is [open source](https://github.com/Mungert69). Feel free to use whatever you find helpful.
315
+
316
+ If you appreciate the work, please consider [buying me a coffee](https://www.buymeacoffee.com/mahadeva) ☕. Your support helps cover service costs and allows me to raise token limits for everyone.
317
+
318
+ I'm also open to job opportunities or sponsorship.
319
+
320
+ Thank you! 😊
bf16-q8-0/Hermes-4-70B-bf16_q8_0-bf16_q8_0-00001-of-00003.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b29a78a23a2ee464f80f4456a19c12007ba9d7e04313fc2eff07549790038e8
3
+ size 45902462976
bf16-q8-0/Hermes-4-70B-bf16_q8_0-bf16_q8_0-00002-of-00003.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978648284c92ff06d04d869d1b53f9e6914909932fe48e56b09ad82b92eef717
3
+ size 45902462976
bf16-q8-0/Hermes-4-70B-bf16_q8_0-bf16_q8_0-00003-of-00003.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58654fa4e10b53ae369b9ccc180576a69bdb0621fb5c526f063312a5f99e5d01
3
+ size 8418527648
bf16/Hermes-4-70B-bf16-00001-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4920dbb1f9994400fc03d0efb9b152f45e6940442532336f574cf38c098a4e40
3
+ size 45902462976
bf16/Hermes-4-70B-bf16-00002-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb2ecf44d0f082fc087403adaa5890bbe9fe04925ad9b4c9c69de722e5bdba53
3
+ size 45902462976
bf16/Hermes-4-70B-bf16-00003-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81316714e983bc7b269816679c4671572ef967d2ef1c97e705d1e6505ed400c9
3
+ size 45902462976
bf16/Hermes-4-70B-bf16-00004-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c38f56b8f7a1374b0a8a9cc274a3f2212b4161d3c217d0b57f6a16063a8e4ef9
3
+ size 3410528672
q5-1/Hermes-4-70B-q5_1-q5_1-00001-of-00002.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ab34064bf1f73749b6a57a05fca2a2e1079cb335889b15919cf98a2ca3dc7f
3
+ size 45902462976
q5-1/Hermes-4-70B-q5_1-q5_1-00002-of-00002.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a0dec2fe98dc00042ba5f9b86f0994770de399878ea7f5f80d5812746eb992
3
+ size 7681641152
q6-k-m/Hermes-4-70B-q6_k_m-q6_k_m-00001-of-00002.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed4dc3d9627e2ca5b7f3b1db39868ac9214a03b33e386060e89d80007eb9e6ef
3
+ size 45902462976
q6-k-m/Hermes-4-70B-q6_k_m-q6_k_m-00002-of-00002.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b0adb32a93628bf2e429c116adae87ad81e5f827bca80c514eeb8176a42f56
3
+ size 12494604992
q8-0/Hermes-4-70B-q8_0-q8_0-00001-of-00002.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d42c2b308a06935bab53cf4166bfbb1101368fc65b900ea11d93f6e70f7058d
3
+ size 45902462976
q8-0/Hermes-4-70B-q8_0-q8_0-00002-of-00002.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cdb8a0ceb1d335257076602daa62676a5a86f66ec4b165218da5e7ab2f343d6
3
+ size 29072591264