shanndrea commited on
Commit
718f3d5
·
verified ·
1 Parent(s): 3b4aa38

Upload tokenizer

Browse files
Files changed (3) hide show
  1. spiece.model +3 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +32 -0
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d33eab49f262358f962dbf38433dec85c44b71cb05f4b0e23f439c45209218
3
+ size 776904
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -25,6 +25,38 @@
25
  "single_word": false,
26
  "special": true
27
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "32000": {
29
  "content": "<extra_id_99>",
30
  "lstrip": true,
 
25
  "single_word": false,
26
  "special": true
27
  },
28
+ "3": {
29
+ "content": ".",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "4": {
37
+ "content": "(",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "5": {
45
+ "content": ")",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "6": {
53
+ "content": ",-,–,£,€,#,'",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
  "32000": {
61
  "content": "<extra_id_99>",
62
  "lstrip": true,