Upload folder using huggingface_hub
Browse files- example.py +1 -1
- hf_nemotron_parse_processor.py +19 -3
- preprocessor_config.json +2 -3
example.py
CHANGED
|
@@ -14,7 +14,7 @@ AutoImageProcessor.register("nemotron_parse", NemotronParseImageProcessor)
|
|
| 14 |
|
| 15 |
|
| 16 |
# Load model and processor
|
| 17 |
-
model_path = "nvidia/NVIDIA-Nemotron-Parse-v1.1" #Nano-12B-v2-VL-BF16" # Or use a local path
|
| 18 |
device = "cuda:0"
|
| 19 |
|
| 20 |
model = AutoModel.from_pretrained(
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
# Load model and processor
|
| 17 |
+
model_path = "." #nvidia/NVIDIA-Nemotron-Parse-v1.1" #Nano-12B-v2-VL-BF16" # Or use a local path
|
| 18 |
device = "cuda:0"
|
| 19 |
|
| 20 |
model = AutoModel.from_pretrained(
|
hf_nemotron_parse_processor.py
CHANGED
|
@@ -252,7 +252,7 @@ class NemotronParseImageProcessor(BaseImageProcessor, ImageProcessingMixin):
|
|
| 252 |
class NemotronParseProcessor(ProcessorMixin):
|
| 253 |
|
| 254 |
attributes = ["image_processor", "tokenizer"]
|
| 255 |
-
image_processor_class = "
|
| 256 |
tokenizer_class = ("PreTrainedTokenizer", "PreTrainedTokenizerFast")
|
| 257 |
|
| 258 |
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
|
@@ -350,8 +350,24 @@ class NemotronParseProcessor(ProcessorMixin):
|
|
| 350 |
|
| 351 |
This method is compatible with AutoProcessor.from_pretrained().
|
| 352 |
"""
|
| 353 |
-
#
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
def save_pretrained(self, save_directory, **kwargs):
|
| 357 |
"""
|
|
|
|
| 252 |
class NemotronParseProcessor(ProcessorMixin):
|
| 253 |
|
| 254 |
attributes = ["image_processor", "tokenizer"]
|
| 255 |
+
image_processor_class = "AutoImageProcessor"
|
| 256 |
tokenizer_class = ("PreTrainedTokenizer", "PreTrainedTokenizerFast")
|
| 257 |
|
| 258 |
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
|
|
|
| 350 |
|
| 351 |
This method is compatible with AutoProcessor.from_pretrained().
|
| 352 |
"""
|
| 353 |
+
# Explicitly load subcomponents via Auto* to ensure remote auto_map is honored.
|
| 354 |
+
from transformers import AutoImageProcessor, AutoTokenizer
|
| 355 |
+
trust_remote_code = kwargs.get("trust_remote_code", None)
|
| 356 |
+
revision = kwargs.get("revision", None)
|
| 357 |
+
token = kwargs.get("token", None)
|
| 358 |
+
image_processor = AutoImageProcessor.from_pretrained(
|
| 359 |
+
pretrained_model_name_or_path,
|
| 360 |
+
trust_remote_code=trust_remote_code,
|
| 361 |
+
revision=revision,
|
| 362 |
+
token=token,
|
| 363 |
+
)
|
| 364 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 365 |
+
pretrained_model_name_or_path,
|
| 366 |
+
trust_remote_code=trust_remote_code,
|
| 367 |
+
revision=revision,
|
| 368 |
+
token=token,
|
| 369 |
+
)
|
| 370 |
+
return cls(image_processor=image_processor, tokenizer=tokenizer)
|
| 371 |
|
| 372 |
def save_pretrained(self, save_directory, **kwargs):
|
| 373 |
"""
|
preprocessor_config.json
CHANGED
|
@@ -3,10 +3,9 @@
|
|
| 3 |
"image_processor_type": "NemotronParseImageProcessor",
|
| 4 |
"processor_class": "NemotronParseProcessor",
|
| 5 |
"auto_map": {
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
},
|
| 9 |
-
|
| 10 |
"do_normalize": false,
|
| 11 |
"do_rescale": true,
|
| 12 |
"rescale_factor": 0.00392156862745098,
|
|
|
|
| 3 |
"image_processor_type": "NemotronParseImageProcessor",
|
| 4 |
"processor_class": "NemotronParseProcessor",
|
| 5 |
"auto_map": {
|
| 6 |
+
"AutoImageProcessor": "hf_nemotron_parse_processor.NemotronParseImageProcessor",
|
| 7 |
+
"AutoProcessor": "hf_nemotron_parse_processor.NemotronParseProcessor"
|
| 8 |
},
|
|
|
|
| 9 |
"do_normalize": false,
|
| 10 |
"do_rescale": true,
|
| 11 |
"rescale_factor": 0.00392156862745098,
|