katerynaCh commited on
Commit
ff28759
·
verified ·
1 Parent(s): c1b6e1f

Upload folder using huggingface_hub

Browse files
__init__.py ADDED
File without changes
__pycache__/hf_nemotron_parse_config.cpython-310.pyc ADDED
Binary file (4.17 kB). View file
 
__pycache__/latex2html.cpython-310.pyc ADDED
Binary file (11.9 kB). View file
 
__pycache__/postprocessing.cpython-310.pyc ADDED
Binary file (2.78 kB). View file
 
example.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image, ImageDraw
3
+ from transformers import AutoModel, AutoProcessor, AutoTokenizer, AutoConfig, AutoImageProcessor, GenerationConfig
4
+ from postprocessing import extract_classes_bboxes, transform_bbox_to_original, postprocess_text
5
+
6
+ #from hf_nemotron_parse_config import NemotronParseConfig
7
+ #from hf_nemotron_parse_modeling import NemotronParseForConditionalGeneration
8
+ #from hf_nemotron_parse_processor import NemotronParseProcessor, NemotronParseImageProcessor
9
+
10
+ #AutoConfig.register("nemotron_parse", NemotronParseConfig)
11
+ #AutoModel.register(NemotronParseConfig, NemotronParseForConditionalGeneration)
12
+ #AutoProcessor.register("nemotron_parse", NemotronParseProcessor)
13
+ #AutoImageProcessor.register("nemotron_parse", NemotronParseImageProcessor)
14
+
15
+
16
+ # Load model and processor
17
+ model_path = "nvidia/NVIDIA-Nemotron-Parse-v1.1" #Nano-12B-v2-VL-BF16" # Or use a local path
18
+ device = "cuda:0"
19
+
20
+ model = AutoModel.from_pretrained(
21
+ model_path,
22
+ trust_remote_code=True,
23
+ torch_dtype=torch.bfloat16
24
+ ).to(device).eval()
25
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
26
+ processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
27
+
28
+ # Load image
29
+ image = Image.open("../soa_1.png") #path/to/your/image.jpg")
30
+ task_prompt = "</s><s><predict_bbox><predict_classes><output_markdown>"
31
+
32
+ # Process image
33
+ inputs = processor(images=[image], text=task_prompt, return_tensors="pt").to(device)
34
+ prompt_ids = processor.tokenizer.encode(task_prompt, return_tensors="pt", add_special_tokens=False).cuda()
35
+
36
+
37
+ generation_config = GenerationConfig.from_pretrained(model_path, trust_remote_code=True)
38
+ # Generate text
39
+ outputs = model.generate(**inputs, generation_config=generation_config)
40
+
41
+ # Decode the generated text
42
+ generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
43
+ classes, bboxes, texts = extract_classes_bboxes(generated_text)
44
+ bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]
45
+
46
+ # Specify output formats for postprocessing
47
+ table_format = 'latex' # latex | HTML | markdown
48
+ text_format = 'markdown' # markdown | plain
49
+ blank_text_in_figures = False # remove text inside 'Picture' class
50
+ texts = [postprocess_text(text, cls = cls, table_format=table_format, text_format=text_format, blank_text_in_figures=blank_text_in_figures) for text, cls in zip(texts, classes)]
51
+
52
+ for cl, bb, txt in zip(classes, bboxes, texts):
53
+ print(cl, ': ', txt)
54
+
55
+ # OPTIONAL - Draw bounding boxes
56
+ draw = ImageDraw.Draw(image)
57
+ for bbox in bboxes:
58
+ draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline="red")
59
+