tclf90
commited on
Commit
·
c66e31f
1
Parent(s):
e624633
'优化模型量化损失'
Browse files- README.md +5 -6
- config.json +1 -1
- generation_config.json +0 -1
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- modeling_chatglm.py +1 -1
README.md
CHANGED
|
@@ -16,7 +16,7 @@ tags:
|
|
| 16 |
|
| 17 |
|
| 18 |
### 【模型更新日期】
|
| 19 |
-
``` 2024-06-
|
| 20 |
|
| 21 |
### 【模型大小】
|
| 22 |
`6.9GB`
|
|
@@ -24,15 +24,14 @@ tags:
|
|
| 24 |
### 【06-06 情况告知】
|
| 25 |
|
| 26 |
1. 目前需要用vllm entrypoint的方式来启动模型。
|
| 27 |
-
2.
|
| 28 |
-
原因在于原作者用了一个比较大胆的 `layernorm_epsilon: 1.5625e-07`。
|
| 29 |
-
这个数值下训练出来的模型,用`fp16/half`及难保质。
|
| 30 |
-
3. 模型现在已经校准的差不多了,目前以`2024-06-06 00:20`的版本为准。
|
| 31 |
-
4. 打比赛的同学,我更推荐使用int8模型 [GLM-4-9B-Chat-GPTQ-Int8-量化修复](https://www.modelscope.cn/models/tclf90/glm-4-9b-chat-GPTQ-Int8),这个模型更鲁棒。
|
| 32 |
|
| 33 |
### 【更新日志】
|
| 34 |
|
| 35 |
```
|
|
|
|
|
|
|
|
|
|
| 36 |
2004-06-06 00:20
|
| 37 |
1. 模型重新校准
|
| 38 |
2. 修复layernorm_epsilon数值不对的问题
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
### 【模型更新日期】
|
| 19 |
+
``` 2024-06-18 ```
|
| 20 |
|
| 21 |
### 【模型大小】
|
| 22 |
`6.9GB`
|
|
|
|
| 24 |
### 【06-06 情况告知】
|
| 25 |
|
| 26 |
1. 目前需要用vllm entrypoint的方式来启动模型。
|
| 27 |
+
2. 打比赛的同学,我更推荐使用int8模型 [GLM-4-9B-Chat-GPTQ-Int8-量化修复](https://www.modelscope.cn/models/tclf90/glm-4-9b-chat-GPTQ-Int8),这个模型更鲁棒。
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
### 【更新日志】
|
| 30 |
|
| 31 |
```
|
| 32 |
+
2004-06-18
|
| 33 |
+
1. 优化模型量化损失
|
| 34 |
+
|
| 35 |
2004-06-06 00:20
|
| 36 |
1. 模型重新校准
|
| 37 |
2. 修复layernorm_epsilon数值不对的问题
|
config.json
CHANGED
|
@@ -28,7 +28,7 @@
|
|
| 28 |
"hidden_dropout": 0.0,
|
| 29 |
"hidden_size": 4096,
|
| 30 |
"kv_channels": 128,
|
| 31 |
-
"layernorm_epsilon":
|
| 32 |
"model_type": "chatglm",
|
| 33 |
"multi_query_attention": true,
|
| 34 |
"multi_query_group_num": 2,
|
|
|
|
| 28 |
"hidden_dropout": 0.0,
|
| 29 |
"hidden_size": 4096,
|
| 30 |
"kv_channels": 128,
|
| 31 |
+
"layernorm_epsilon": 1e-06,
|
| 32 |
"model_type": "chatglm",
|
| 33 |
"multi_query_attention": true,
|
| 34 |
"multi_query_group_num": 2,
|
generation_config.json
CHANGED
|
@@ -5,6 +5,5 @@
|
|
| 5 |
151336,
|
| 6 |
151338
|
| 7 |
],
|
| 8 |
-
"pad_token_id": 151329,
|
| 9 |
"transformers_version": "4.40.2"
|
| 10 |
}
|
|
|
|
| 5 |
151336,
|
| 6 |
151338
|
| 7 |
],
|
|
|
|
| 8 |
"transformers_version": "4.40.2"
|
| 9 |
}
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4995499776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e09f83f719809cb5eb4b33e6a9a4ffa978d4e3c595f54e99ee90d6698b57852f
|
| 3 |
size 4995499776
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1893310824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:022c89d2200edfe4f4e3ea2a4c0386579d36bbd23f0861cb5194e871540ea8c0
|
| 3 |
size 1893310824
|
modeling_chatglm.py
CHANGED
|
@@ -324,7 +324,7 @@ class SelfAttention(torch.nn.Module):
|
|
| 324 |
)
|
| 325 |
|
| 326 |
def forward(
|
| 327 |
-
self, hidden_states, attention_mask, rotary_pos_emb, kv_cache=None, use_cache=True
|
| 328 |
):
|
| 329 |
# hidden_states: [b, sq, h]
|
| 330 |
|
|
|
|
| 324 |
)
|
| 325 |
|
| 326 |
def forward(
|
| 327 |
+
self, hidden_states, attention_mask, rotary_pos_emb=None, kv_cache=None, use_cache=True
|
| 328 |
):
|
| 329 |
# hidden_states: [b, sq, h]
|
| 330 |
|