Gavin1104 commited on
Commit
b4ee5df
·
verified ·
1 Parent(s): de50e5d

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +82 -0
  2. config.json +23 -0
  3. optimizer.bin +3 -0
  4. pytorch_model.bin +3 -0
  5. pytorch_model_1.bin +3 -0
  6. scheduler.bin +3 -0
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vicuna-eagle-13B-v1.3
2
+
3
+ ## 模型配置
4
+ base_model: lmsys/vicuna-13b-v1.3
5
+
6
+ eagle-model
7
+
8
+ ```text
9
+ Model(
10
+ (embed_tokens): Embedding(32000, 4096, padding_idx=0)
11
+ (layers): ModuleList(
12
+ (0): LlamaDecoderLayer(
13
+ (self_attn): LlamaAttention(
14
+ (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
15
+ (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
16
+ (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
17
+ (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
18
+ (rotary_emb): LlamaRotaryEmbedding()
19
+ )
20
+ (mlp): LlamaMLP(
21
+ (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
22
+ (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
23
+ (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
24
+ (act_fn): SiLU()
25
+ )
26
+ (post_attention_layernorm): LlamaRMSNorm()
27
+ )
28
+ )
29
+ (fc): Linear(in_features=8192, out_features=4096, bias=True)
30
+ (act): SiLU()
31
+ )
32
+ ```
33
+
34
+ vicuna-13B-config.json
35
+
36
+ ```json
37
+ {
38
+ "architectures": [
39
+ "LlamaForCausalLM"
40
+ ],
41
+ "bos_token_id": 1,
42
+ "eos_token_id": 2,
43
+ "hidden_act": "silu",
44
+ "hidden_size": 5120,
45
+ "initializer_range": 0.02,
46
+ "intermediate_size": 13824,
47
+ "max_position_embeddings": 2048,
48
+ "model_type": "llama",
49
+ "num_attention_heads": 40,
50
+ "num_hidden_layers": 1,
51
+ "pad_token_id": 0,
52
+ "rms_norm_eps": 1e-06,
53
+ "tie_word_embeddings": false,
54
+ "torch_dtype": "float16",
55
+ "transformers_version": "4.28.1",
56
+ "use_cache": true,
57
+ "vocab_size": 32000
58
+ }
59
+
60
+
61
+ ```
62
+ ## 模型训练
63
+
64
+ ### 数据生成
65
+ ```bash
66
+ python -m eagle.ge_data.allocation --outdir ../data
67
+ ```
68
+
69
+ ### 训练
70
+
71
+ ```bash
72
+ accelerate launch -m --mixed_precision=bf16 eagle.train.main --tmpdir eagle/data/sharegpt_0_67999_mufp16 --cpdir eagle/checkpoint --configpath eagle/train/vicuna_13B_config.json
73
+ ```
74
+
75
+
76
+ ### 模型上传
77
+
78
+
79
+
80
+ ```bash
81
+
82
+ ```
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/ubuntu/model_weights/vicuna-13b-v1.3",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 5120,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 13824,
12
+ "max_position_embeddings": 2048,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 40,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-06,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 32000
23
+ }
optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a06d8a6feee96fad1b7e433a874019d86505d2edaec302d981fd29574a9d5554
3
+ size 2957074763
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c234b006a3db7eb99c7709a1dba523c5077d9663d3b14b206338bab8c602d657
3
+ size 2133897490
pytorch_model_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9099e02b7f770d85d9fb6b0a9601cca95027307b0b64f0155dda81b2bf8c290b
3
+ size 655360835
scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fc70d1c5ed8c42b291152ed291ff8e3585fc6c574dea395d981c5a23fd21aa6
3
+ size 563