ErNewdev0 commited on
Commit
7a9a80f
·
verified ·
1 Parent(s): d3f1961

Model save

Browse files
README.md CHANGED
@@ -1,5 +1,7 @@
1
  ---
2
  library_name: transformers
 
 
3
  tags:
4
  - generated_from_trainer
5
  model-index:
@@ -12,9 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # nusa-beta-0001
14
 
15
- This model was trained from scratch on an unknown dataset.
16
- It achieves the following results on the evaluation set:
17
- - Loss: 1.6995
18
 
19
  ## Model description
20
 
@@ -34,27 +34,24 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 5e-05
37
- - train_batch_size: 8
38
- - eval_batch_size: 8
39
  - seed: 42
40
- - gradient_accumulation_steps: 2
41
  - total_train_batch_size: 16
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_ratio: 0.1
45
- - num_epochs: 20
46
  - mixed_precision_training: Native AMP
47
 
48
  ### Training results
49
 
50
- | Training Loss | Epoch | Step | Validation Loss |
51
- |:-------------:|:-----:|:----:|:---------------:|
52
- | 0.1608 | 10.0 | 100 | 1.6995 |
53
 
54
 
55
  ### Framework versions
56
 
57
- - Transformers 4.48.3
58
- - Pytorch 2.5.1+cu124
59
  - Datasets 3.5.1
60
- - Tokenizers 0.21.0
 
1
  ---
2
  library_name: transformers
3
+ license: mit
4
+ base_model: gpt2
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # nusa-beta-0001
16
 
17
+ This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
 
 
18
 
19
  ## Model description
20
 
 
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 5e-05
37
+ - train_batch_size: 4
38
+ - eval_batch_size: 4
39
  - seed: 42
40
+ - gradient_accumulation_steps: 4
41
  - total_train_batch_size: 16
42
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_ratio: 0.1
45
+ - num_epochs: 3
46
  - mixed_precision_training: Native AMP
47
 
48
  ### Training results
49
 
 
 
 
50
 
51
 
52
  ### Framework versions
53
 
54
+ - Transformers 4.51.3
55
+ - Pytorch 2.6.0+cu124
56
  - Datasets 3.5.1
57
+ - Tokenizers 0.21.1
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "[MASK]": 50258,
3
- "[PAD]": 50257
4
  }
 
1
  {
2
+ "<assistant>": 50258,
3
+ "<user>": 50257
4
  }
generation_config.json CHANGED
@@ -2,6 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
- "pad_token_id": 50257,
6
- "transformers_version": "4.48.3"
7
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
+ "transformers_version": "4.51.3"
 
6
  }
special_tokens_map.json CHANGED
@@ -1,37 +1,22 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "mask_token": {
17
- "content": "[MASK]",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "pad_token": {
24
- "content": "[PAD]",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "unk_token": {
31
- "content": "<|endoftext|>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- }
37
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<user>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<assistant>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|endoftext|>",
19
+ "eos_token": "<|endoftext|>",
20
+ "pad_token": "<|endoftext|>",
21
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  }
tokenizer.json CHANGED
@@ -2,19 +2,19 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 256,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 256
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
15
- "pad_id": 50257,
16
  "pad_type_id": 0,
17
- "pad_token": "[PAD]"
18
  },
19
  "added_tokens": [
20
  {
@@ -28,7 +28,7 @@
28
  },
29
  {
30
  "id": 50257,
31
- "content": "[PAD]",
32
  "single_word": false,
33
  "lstrip": false,
34
  "rstrip": false,
@@ -37,7 +37,7 @@
37
  },
38
  {
39
  "id": 50258,
40
- "content": "[MASK]",
41
  "single_word": false,
42
  "lstrip": false,
43
  "rstrip": false,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 512
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
15
+ "pad_id": 50256,
16
  "pad_type_id": 0,
17
+ "pad_token": "<|endoftext|>"
18
  },
19
  "added_tokens": [
20
  {
 
28
  },
29
  {
30
  "id": 50257,
31
+ "content": "<user>",
32
  "single_word": false,
33
  "lstrip": false,
34
  "rstrip": false,
 
37
  },
38
  {
39
  "id": 50258,
40
+ "content": "<assistant>",
41
  "single_word": false,
42
  "lstrip": false,
43
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -10,7 +10,7 @@
10
  "special": true
11
  },
12
  "50257": {
13
- "content": "[PAD]",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
@@ -18,7 +18,7 @@
18
  "special": true
19
  },
20
  "50258": {
21
- "content": "[MASK]",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
@@ -26,20 +26,16 @@
26
  "special": true
27
  }
28
  },
 
 
 
 
29
  "bos_token": "<|endoftext|>",
30
  "clean_up_tokenization_spaces": false,
31
  "eos_token": "<|endoftext|>",
32
  "extra_special_tokens": {},
33
- "mask_token": "[MASK]",
34
- "max_length": 256,
35
  "model_max_length": 1024,
36
- "pad_to_multiple_of": null,
37
- "pad_token": "[PAD]",
38
- "pad_token_type_id": 0,
39
- "padding_side": "right",
40
- "stride": 0,
41
  "tokenizer_class": "GPT2Tokenizer",
42
- "truncation_side": "right",
43
- "truncation_strategy": "longest_first",
44
  "unk_token": "<|endoftext|>"
45
  }
 
10
  "special": true
11
  },
12
  "50257": {
13
+ "content": "<user>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
 
18
  "special": true
19
  },
20
  "50258": {
21
+ "content": "<assistant>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
 
26
  "special": true
27
  }
28
  },
29
+ "additional_special_tokens": [
30
+ "<user>",
31
+ "<assistant>"
32
+ ],
33
  "bos_token": "<|endoftext|>",
34
  "clean_up_tokenization_spaces": false,
35
  "eos_token": "<|endoftext|>",
36
  "extra_special_tokens": {},
 
 
37
  "model_max_length": 1024,
38
+ "pad_token": "<|endoftext|>",
 
 
 
 
39
  "tokenizer_class": "GPT2Tokenizer",
 
 
40
  "unk_token": "<|endoftext|>"
41
  }