init
Browse files- config.json +1 -0
- configuration_minicpm.py +3 -0
config.json
CHANGED
|
@@ -36,5 +36,6 @@
|
|
| 36 |
"rope_theta": 10000.0,
|
| 37 |
"scale_emb": 12,
|
| 38 |
"scale_depth": 1.4,
|
|
|
|
| 39 |
"tie_word_embeddings": false
|
| 40 |
}
|
|
|
|
| 36 |
"rope_theta": 10000.0,
|
| 37 |
"scale_emb": 12,
|
| 38 |
"scale_depth": 1.4,
|
| 39 |
+
"mup_denominator": 32,
|
| 40 |
"tie_word_embeddings": false
|
| 41 |
}
|
configuration_minicpm.py
CHANGED
|
@@ -137,6 +137,7 @@ class MiniCPMConfig(PretrainedConfig):
|
|
| 137 |
scale_emb=1,
|
| 138 |
dim_model_base=1,
|
| 139 |
scale_depth=1,
|
|
|
|
| 140 |
sparse_config=None,
|
| 141 |
**kwargs):
|
| 142 |
|
|
@@ -165,6 +166,8 @@ class MiniCPMConfig(PretrainedConfig):
|
|
| 165 |
self.scale_emb = scale_emb
|
| 166 |
self.dim_model_base = dim_model_base
|
| 167 |
self.scale_depth = scale_depth
|
|
|
|
|
|
|
| 168 |
|
| 169 |
# sparse config
|
| 170 |
self.sparse_config = sparse_config
|
|
|
|
| 137 |
scale_emb=1,
|
| 138 |
dim_model_base=1,
|
| 139 |
scale_depth=1,
|
| 140 |
+
mup_denominator=32,
|
| 141 |
sparse_config=None,
|
| 142 |
**kwargs):
|
| 143 |
|
|
|
|
| 166 |
self.scale_emb = scale_emb
|
| 167 |
self.dim_model_base = dim_model_base
|
| 168 |
self.scale_depth = scale_depth
|
| 169 |
+
# only used for Eagle Head
|
| 170 |
+
self.mup_denominator = mup_denominator
|
| 171 |
|
| 172 |
# sparse config
|
| 173 |
self.sparse_config = sparse_config
|