Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +82 -0
config.json +23 -0
optimizer.bin +3 -0
pytorch_model.bin +3 -0
pytorch_model_1.bin +3 -0
scheduler.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,82 @@

+# vicuna-eagle-13B-v1.3
+## 模型配置
+base_model: lmsys/vicuna-13b-v1.3
+eagle-model
+```text
+Model(
+  (embed_tokens): Embedding(32000, 4096, padding_idx=0)
+  (layers): ModuleList(
+    (0): LlamaDecoderLayer(
+      (self_attn): LlamaAttention(
+        (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
+        (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
+        (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
+        (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
+        (rotary_emb): LlamaRotaryEmbedding()
+      )
+      (mlp): LlamaMLP(
+        (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
+        (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
+        (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
+        (act_fn): SiLU()
+      )
+      (post_attention_layernorm): LlamaRMSNorm()
+    )
+  )
+  (fc): Linear(in_features=8192, out_features=4096, bias=True)
+  (act): SiLU()
+)
+```
+vicuna-13B-config.json
+```json
+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 13824,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 1,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.28.1",
+  "use_cache": true,
+  "vocab_size": 32000
+}
+```
+## 模型训练
+### 数据生成
+```bash
+python -m eagle.ge_data.allocation --outdir ../data
+```
+### 训练
+```bash
+accelerate launch -m --mixed_precision=bf16 eagle.train.main --tmpdir eagle/data/sharegpt_0_67999_mufp16 --cpdir eagle/checkpoint --configpath eagle/train/vicuna_13B_config.json
+```
+### 模型上传
+```bash
+```

config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "_name_or_path": "/home/ubuntu/model_weights/vicuna-13b-v1.3",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 13824,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 1,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.28.1",
+  "use_cache": true,
+  "vocab_size": 32000
+}

optimizer.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a06d8a6feee96fad1b7e433a874019d86505d2edaec302d981fd29574a9d5554
+size 2957074763

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c234b006a3db7eb99c7709a1dba523c5077d9663d3b14b206338bab8c602d657
+size 2133897490

pytorch_model_1.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9099e02b7f770d85d9fb6b0a9601cca95027307b0b64f0155dda81b2bf8c290b
+size 655360835

scheduler.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fc70d1c5ed8c42b291152ed291ff8e3585fc6c574dea395d981c5a23fd21aa6
+size 563