fix: tokenizer can be none in preprocessors
Browse files- processing_aria.py +5 -1
processing_aria.py
CHANGED
|
@@ -18,6 +18,7 @@
|
|
| 18 |
# under the License.
|
| 19 |
|
| 20 |
import inspect
|
|
|
|
| 21 |
import re
|
| 22 |
from typing import List, Optional, Union
|
| 23 |
|
|
@@ -34,6 +35,8 @@ from transformers.tokenization_utils import (
|
|
| 34 |
|
| 35 |
from .vision_processor import AriaVisionProcessor
|
| 36 |
|
|
|
|
|
|
|
| 37 |
|
| 38 |
class AriaProcessor(ProcessorMixin):
|
| 39 |
"""
|
|
@@ -73,7 +76,7 @@ class AriaProcessor(ProcessorMixin):
|
|
| 73 |
else:
|
| 74 |
self.tokenizer = tokenizer
|
| 75 |
|
| 76 |
-
if self.tokenizer.pad_token is None:
|
| 77 |
self.tokenizer.pad_token = self.tokenizer.unk_token
|
| 78 |
|
| 79 |
self.image_token = image_token
|
|
@@ -229,6 +232,7 @@ class AriaProcessor(ProcessorMixin):
|
|
| 229 |
**cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
|
| 230 |
)
|
| 231 |
if "use_fast" in kwargs:
|
|
|
|
| 232 |
kwargs.pop("use_fast")
|
| 233 |
try:
|
| 234 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
| 18 |
# under the License.
|
| 19 |
|
| 20 |
import inspect
|
| 21 |
+
import logging
|
| 22 |
import re
|
| 23 |
from typing import List, Optional, Union
|
| 24 |
|
|
|
|
| 35 |
|
| 36 |
from .vision_processor import AriaVisionProcessor
|
| 37 |
|
| 38 |
+
logger = logging.getLogger(__name__)
|
| 39 |
+
|
| 40 |
|
| 41 |
class AriaProcessor(ProcessorMixin):
|
| 42 |
"""
|
|
|
|
| 76 |
else:
|
| 77 |
self.tokenizer = tokenizer
|
| 78 |
|
| 79 |
+
if self.tokenizer is not None and self.tokenizer.pad_token is None:
|
| 80 |
self.tokenizer.pad_token = self.tokenizer.unk_token
|
| 81 |
|
| 82 |
self.image_token = image_token
|
|
|
|
| 232 |
**cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
|
| 233 |
)
|
| 234 |
if "use_fast" in kwargs:
|
| 235 |
+
logger.warning("use_fast is not supported for AriaProcessor. Ignoring...")
|
| 236 |
kwargs.pop("use_fast")
|
| 237 |
try:
|
| 238 |
tokenizer = AutoTokenizer.from_pretrained(
|