prajwalJumde commited on
Commit
2ee1237
·
verified ·
1 Parent(s): fd9a803

prajwalJumde/orpheus_tts_FT_may13

Browse files
.gitattributes CHANGED
@@ -37,3 +37,4 @@ unsloth.F16.gguf filter=lfs diff=lfs merge=lfs -text
37
  unsloth.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
38
  unsloth.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
39
  unsloth.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
 
 
37
  unsloth.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
38
  unsloth.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
39
  unsloth.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
40
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,10 +1,8 @@
1
  ---
2
- base_model: unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit
3
  library_name: peft
4
- license: llama3.1
 
5
  tags:
6
- - trl
7
- - sft
8
  - unsloth
9
  - generated_from_trainer
10
  model-index:
@@ -17,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # outputs
19
 
20
- This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit) on an unknown dataset.
21
 
22
  ## Model description
23
 
@@ -36,16 +34,16 @@ More information needed
36
  ### Training hyperparameters
37
 
38
  The following hyperparameters were used during training:
39
- - learning_rate: 0.0002
40
- - train_batch_size: 2
41
  - eval_batch_size: 8
42
  - seed: 3407
43
- - gradient_accumulation_steps: 4
44
  - total_train_batch_size: 8
45
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
- - lr_scheduler_type: linear
47
- - lr_scheduler_warmup_steps: 5
48
- - training_steps: 60
49
 
50
  ### Training results
51
 
@@ -53,8 +51,8 @@ The following hyperparameters were used during training:
53
 
54
  ### Framework versions
55
 
56
- - PEFT 0.12.0
57
- - Transformers 4.44.2
58
- - Pytorch 2.4.0+cu121
59
- - Datasets 2.21.0
60
- - Tokenizers 0.19.1
 
1
  ---
 
2
  library_name: peft
3
+ license: apache-2.0
4
+ base_model: unsloth/orpheus-3b-0.1-ft
5
  tags:
 
 
6
  - unsloth
7
  - generated_from_trainer
8
  model-index:
 
15
 
16
  # outputs
17
 
18
+ This model is a fine-tuned version of [unsloth/orpheus-3b-0.1-ft](https://huggingface.co/unsloth/orpheus-3b-0.1-ft) on an unknown dataset.
19
 
20
  ## Model description
21
 
 
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
+ - learning_rate: 0.0001
38
+ - train_batch_size: 4
39
  - eval_batch_size: 8
40
  - seed: 3407
41
+ - gradient_accumulation_steps: 2
42
  - total_train_batch_size: 8
43
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
44
+ - lr_scheduler_type: cosine
45
+ - lr_scheduler_warmup_ratio: 0.03
46
+ - training_steps: 500
47
 
48
  ### Training results
49
 
 
51
 
52
  ### Framework versions
53
 
54
+ - PEFT 0.15.2
55
+ - Transformers 4.51.3
56
+ - Pytorch 2.6.0+cu124
57
+ - Datasets 3.5.0
58
+ - Tokenizers 0.21.1
adapter_config.json CHANGED
@@ -1,8 +1,11 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
5
  "bias": "none",
 
 
 
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
@@ -10,25 +13,27 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 16,
14
- "lora_dropout": 0,
 
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
  "down_proj",
25
- "gate_proj",
26
- "k_proj",
27
- "q_proj",
28
  "o_proj",
29
- "up_proj"
 
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
 
32
  "use_dora": false,
33
- "use_rslora": false
34
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/orpheus-3b-0.1-ft",
5
  "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
  "fan_in_fan_out": false,
10
  "inference_mode": true,
11
  "init_lora_weights": true,
 
13
  "layers_pattern": null,
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
+ "lora_alpha": 128,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
+ "r": 128,
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
27
  "down_proj",
 
 
 
28
  "o_proj",
29
+ "k_proj",
30
+ "v_proj",
31
+ "gate_proj",
32
+ "up_proj",
33
+ "q_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
  "use_dora": false,
38
+ "use_rslora": true
39
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90f916ae7695de68664c5ac90b080d61fe1e0ebbc6872834236524888109ecda
3
- size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f9fe50dcaa23a9c0908e0f55beb51dfb70a646c785d72d27d6fe4d50a4abf99
3
+ size 778096664
special_tokens_map.json CHANGED
@@ -1,5 +1,26 @@
1
  {
2
- "bos_token": "<|begin_of_text|>",
3
- "eos_token": "<|im_end|>",
4
- "pad_token": "<|finetune_right_pad_id|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|audio|>"
4
+ ],
5
+ "bos_token": {
6
+ "content": "<|begin_of_text|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "content": "<|eot_id|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "pad_token": {
20
+ "content": "<|finetune_right_pad_id|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ }
26
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cffb4ed4dd7e5a5f95197ec864219e711afec7f455b85a7169aaafc230c84d7
3
- size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a09fba46486dd5351bec3e0b3b1a45731e60675a1a46a12ae2ef70cf91480ac
3
+ size 5240