tokenizer can decode tensor for vllm test
#14
by
zhouzaida
- opened
- tokenization_moonshot.py +3 -0
tokenization_moonshot.py
CHANGED
|
@@ -16,6 +16,7 @@ from shutil import copyfile
|
|
| 16 |
from tiktoken.load import load_tiktoken_bpe
|
| 17 |
from tokenizers import AddedToken
|
| 18 |
from transformers.tokenization_utils import PreTrainedTokenizer
|
|
|
|
| 19 |
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
|
| 20 |
|
| 21 |
|
|
@@ -229,6 +230,8 @@ class TikTokenTokenizer(PreTrainedTokenizer):
|
|
| 229 |
if len(kwargs) > 0:
|
| 230 |
return super().decode(token_ids, **kwargs)
|
| 231 |
|
|
|
|
|
|
|
| 232 |
if type(token_ids) is int:
|
| 233 |
token_ids = [token_ids]
|
| 234 |
|
|
|
|
| 16 |
from tiktoken.load import load_tiktoken_bpe
|
| 17 |
from tokenizers import AddedToken
|
| 18 |
from transformers.tokenization_utils import PreTrainedTokenizer
|
| 19 |
+
from transformers.utils import to_py_obj
|
| 20 |
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
|
| 21 |
|
| 22 |
|
|
|
|
| 230 |
if len(kwargs) > 0:
|
| 231 |
return super().decode(token_ids, **kwargs)
|
| 232 |
|
| 233 |
+
token_ids = to_py_obj(token_ids)
|
| 234 |
+
|
| 235 |
if type(token_ids) is int:
|
| 236 |
token_ids = [token_ids]
|
| 237 |
|