prithivMLmods commited on
Commit
cedcb38
·
verified ·
1 Parent(s): b7d04bf

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -213
app.py DELETED
@@ -1,213 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- import os
4
- import sys
5
- from PIL import Image, ImageDraw
6
- from transformers import AutoModel, AutoProcessor, AutoTokenizer, GenerationConfig
7
- from huggingface_hub import snapshot_download
8
- import spaces
9
- from typing import Optional, Tuple, Dict, Any, Iterable
10
- from gradio.themes import Soft
11
- from gradio.themes.utils import colors, fonts, sizes
12
-
13
- print("Downloading model snapshot to ensure all scripts are present...")
14
- model_dir = snapshot_download(repo_id="nvidia/NVIDIA-Nemotron-Parse-v1.1")
15
- print(f"Model downloaded to: {model_dir}")
16
-
17
- sys.path.append(model_dir)
18
-
19
- try:
20
- from postprocessing import extract_classes_bboxes, transform_bbox_to_original, postprocess_text
21
- print("Successfully imported postprocessing functions.")
22
- except ImportError as e:
23
- print(f" Error importing postprocessing: {e}")
24
- raise e
25
-
26
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
- print(f"Using device: {device}")
28
-
29
- colors.steel_blue = colors.Color(
30
- name="steel_blue",
31
- c50="#EBF3F8",
32
- c100="#D3E5F0",
33
- c200="#A8CCE1",
34
- c300="#7DB3D2",
35
- c400="#529AC3",
36
- c500="#4682B4",
37
- c600="#3E72A0",
38
- c700="#36638C",
39
- c800="#2E5378",
40
- c900="#264364",
41
- c950="#1E3450",
42
- )
43
-
44
- class SteelBlueTheme(Soft):
45
- def __init__(
46
- self,
47
- *,
48
- primary_hue: colors.Color | str = colors.gray,
49
- secondary_hue: colors.Color | str = colors.steel_blue,
50
- neutral_hue: colors.Color | str = colors.slate,
51
- text_size: sizes.Size | str = sizes.text_lg,
52
- font: fonts.Font | str | Iterable[fonts.Font | str] = (
53
- fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
54
- ),
55
- font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
56
- fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
57
- ),
58
- ):
59
- super().__init__(
60
- primary_hue=primary_hue,
61
- secondary_hue=secondary_hue,
62
- neutral_hue=neutral_hue,
63
- text_size=text_size,
64
- font=font,
65
- font_mono=font_mono,
66
- )
67
- super().set(
68
- background_fill_primary="*primary_50",
69
- background_fill_primary_dark="*primary_900",
70
- body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
71
- body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
72
- button_primary_text_color="white",
73
- button_primary_text_color_hover="white",
74
- button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
75
- button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
76
- button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
77
- button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
78
- slider_color="*secondary_500",
79
- slider_color_dark="*secondary_600",
80
- block_title_text_weight="600",
81
- block_border_width="3px",
82
- block_shadow="*shadow_drop_lg",
83
- button_primary_shadow="*shadow_drop_lg",
84
- button_large_padding="11px",
85
- color_accent_soft="*primary_100",
86
- block_label_background_fill="*primary_200",
87
- )
88
-
89
- steel_blue_theme = SteelBlueTheme()
90
- css = """
91
- #main-title h1 { font-size: 2.3em !important; }
92
- #output-title h2 { font-size: 2.1em !important; }
93
- """
94
-
95
- print("Loading Model components...")
96
-
97
- processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
98
- model = AutoModel.from_pretrained(
99
- model_dir,
100
- trust_remote_code=True,
101
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
102
- ).to(device).eval()
103
-
104
- try:
105
- generation_config = GenerationConfig.from_pretrained(model_dir, trust_remote_code=True)
106
- except Exception as e:
107
- print(f"Warning: Could not load GenerationConfig: {e}. Using default.")
108
- generation_config = GenerationConfig(max_new_tokens=4096)
109
-
110
- print("Model loaded successfully.")
111
-
112
- @spaces.GPU
113
- def process_ocr_task(image):
114
- """
115
- Processes an image with NVIDIA-Nemotron-Parse-v1.1.
116
- """
117
- if image is None:
118
- return "Please upload an image first.", None
119
-
120
- task_prompt = "</s><s><predict_bbox><predict_classes><output_markdown>"
121
-
122
- inputs = processor(images=[image], text=task_prompt, return_tensors="pt").to(device)
123
-
124
- if device.type == 'cuda':
125
- inputs = {k: v.to(torch.bfloat16) if v.dtype == torch.float32 else v for k, v in inputs.items()}
126
-
127
- print("Running inference...")
128
- with torch.no_grad():
129
- outputs = model.generate(
130
- **inputs,
131
- generation_config=generation_config
132
- )
133
-
134
- generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
135
-
136
- try:
137
- classes, bboxes, texts = extract_classes_bboxes(generated_text)
138
- except Exception as e:
139
- print(f"Error extracting boxes: {e}")
140
- return generated_text, image
141
-
142
- bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]
143
-
144
- table_format = 'latex'
145
- text_format = 'markdown'
146
- blank_text_in_figures = False
147
-
148
- processed_texts = [
149
- postprocess_text(
150
- text,
151
- cls=cls,
152
- table_format=table_format,
153
- text_format=text_format,
154
- blank_text_in_figures=blank_text_in_figures
155
- )
156
- for text, cls in zip(texts, classes)
157
- ]
158
-
159
- result_image = image.copy()
160
- draw = ImageDraw.Draw(result_image)
161
-
162
- color_map = {
163
- "Table": "red",
164
- "Figure": "blue",
165
- "Text": "green",
166
- "Title": "purple"
167
- }
168
-
169
- final_output_text = ""
170
-
171
- for cls, bbox, txt in zip(classes, bboxes, processed_texts):
172
- color = color_map.get(cls, "red")
173
- draw.rectangle([bbox[0], bbox[1], bbox[2], bbox[3]], outline=color, width=3)
174
-
175
- if cls == "Table":
176
- final_output_text += f"\n\n--- [Table] ---\n{txt}\n-----------------\n"
177
- elif cls == "Figure":
178
- final_output_text += f"\n\n--- [Figure] ---\n(Figure Detected)\n-----------------\n"
179
- else:
180
- final_output_text += f"{txt}\n"
181
-
182
- if not final_output_text.strip() and generated_text:
183
- final_output_text = generated_text
184
-
185
- return final_output_text, result_image
186
-
187
- with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
188
- gr.Markdown("# **NVIDIA Nemotron Parse v1.1 [OCR/Parsing]**", elem_id="main-title")
189
- gr.Markdown("Upload a document image to extract text, tables, and layout structures using NVIDIA's state-of-the-art Parse model.")
190
-
191
- with gr.Row():
192
- with gr.Column(scale=1):
193
- image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
194
- submit_btn = gr.Button("Process Document", variant="primary")
195
-
196
- examples = gr.Examples(
197
- examples=["examples/1.jpg"],
198
- inputs=image_input,
199
- label="Examples"
200
- )
201
-
202
- with gr.Column(scale=2):
203
- output_text = gr.Textbox(label="Parsed Content (Markdown/LaTeX)", lines=8, show_copy_button=True)
204
- output_image = gr.Image(label="Detected Layout & Bounding Boxes", type="pil")
205
-
206
- submit_btn.click(
207
- fn=process_ocr_task,
208
- inputs=[image_input],
209
- outputs=[output_text, output_image]
210
- )
211
-
212
- if __name__ == "__main__":
213
- demo.queue(max_size=20).launch(share=True, mcp_server=True, ssr_mode=False)