prithivMLmods commited on
Commit
301b940
·
verified ·
1 Parent(s): 8967bbb

update app

Browse files
Files changed (1) hide show
  1. app.py +9 -24
app.py CHANGED
@@ -10,27 +10,22 @@ from typing import Optional, Tuple, Dict, Any, Iterable
10
  from gradio.themes import Soft
11
  from gradio.themes.utils import colors, fonts, sizes
12
 
13
- # --- Model & Script Download ---
14
  print("Downloading model snapshot to ensure all scripts are present...")
15
- # Download the full model repo to ensure postprocessing.py is available locally
16
  model_dir = snapshot_download(repo_id="nvidia/NVIDIA-Nemotron-Parse-v1.1")
17
  print(f"Model downloaded to: {model_dir}")
18
 
19
- # Add the model directory to sys.path so we can import postprocessing
20
  sys.path.append(model_dir)
21
 
22
  try:
23
  from postprocessing import extract_classes_bboxes, transform_bbox_to_original, postprocess_text
24
- print("Successfully imported postprocessing functions.")
25
  except ImportError as e:
26
- print(f"Error importing postprocessing: {e}")
27
  raise e
28
 
29
- # --- Device Setup ---
30
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
  print(f"Using device: {device}")
32
 
33
- # --- Theme Definition ---
34
  colors.steel_blue = colors.Color(
35
  name="steel_blue",
36
  c50="#EBF3F8",
@@ -97,7 +92,6 @@ css = """
97
  #output-title h2 { font-size: 2.1em !important; }
98
  """
99
 
100
- # --- Model Loading ---
101
  print("Loading Model components...")
102
 
103
  processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
@@ -113,7 +107,7 @@ except Exception as e:
113
  print(f"Warning: Could not load GenerationConfig: {e}. Using default.")
114
  generation_config = GenerationConfig(max_new_tokens=4096)
115
 
116
- print("Model loaded successfully.")
117
 
118
  @spaces.GPU
119
  def process_ocr_task(image):
@@ -130,7 +124,7 @@ def process_ocr_task(image):
130
  if device.type == 'cuda':
131
  inputs = {k: v.to(torch.bfloat16) if v.dtype == torch.float32 else v for k, v in inputs.items()}
132
 
133
- print("🏃 Running inference...")
134
  with torch.no_grad():
135
  outputs = model.generate(
136
  **inputs,
@@ -145,7 +139,6 @@ def process_ocr_task(image):
145
  print(f"Error extracting boxes: {e}")
146
  return generated_text, image
147
 
148
- # Transform boxes to original image size
149
  bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]
150
 
151
  table_format = 'latex'
@@ -198,10 +191,9 @@ def process_ocr_task(image):
198
 
199
  return final_output_text, result_image
200
 
201
- # --- Gradio Interface ---
202
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
203
- gr.Markdown("# **NVIDIA Nemotron Parse v1.1 [OCR/Parsing]**", elem_id="main-title")
204
- gr.Markdown("Upload a document image to extract text, tables, and layout structures using NVIDIA's state-of-the-art Parse model.")
205
 
206
  with gr.Row():
207
  with gr.Column(scale=1):
@@ -209,22 +201,15 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
209
  submit_btn = gr.Button("Process Document", variant="primary")
210
 
211
  examples = gr.Examples(
212
- examples=["examples/1.jpg"],
213
  inputs=image_input,
214
  label="Examples"
215
  )
216
 
217
  with gr.Column(scale=2):
218
- output_text = gr.Textbox(label="Parsed Content (Markdown/LaTeX)", lines=20, show_copy_button=True)
219
  output_image = gr.Image(label="Detected Layout & Bounding Boxes", type="pil")
220
 
221
- with gr.Accordion("Technical Details", open=False):
222
- gr.Markdown("""
223
- **Model:** [nvidia/NVIDIA-Nemotron-Parse-v1.1](https://huggingface.co/nvidia/NVIDIA-Nemotron-Parse-v1.1)
224
- **Architecture:** Llama-3-Vila based.
225
- **Capabilities:** High-accuracy OCR, Table extraction (to LaTeX/HTML), Figure detection.
226
- """)
227
-
228
  submit_btn.click(
229
  fn=process_ocr_task,
230
  inputs=[image_input],
@@ -232,4 +217,4 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
232
  )
233
 
234
  if __name__ == "__main__":
235
- demo.queue(max_size=20).launch(share=True, mcp_server=True, ssr_mode=False)
 
10
  from gradio.themes import Soft
11
  from gradio.themes.utils import colors, fonts, sizes
12
 
 
13
  print("Downloading model snapshot to ensure all scripts are present...")
 
14
  model_dir = snapshot_download(repo_id="nvidia/NVIDIA-Nemotron-Parse-v1.1")
15
  print(f"Model downloaded to: {model_dir}")
16
 
 
17
  sys.path.append(model_dir)
18
 
19
  try:
20
  from postprocessing import extract_classes_bboxes, transform_bbox_to_original, postprocess_text
21
+ print("Successfully imported postprocessing functions.")
22
  except ImportError as e:
23
+ print(f"Error importing postprocessing: {e}")
24
  raise e
25
 
 
26
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
  print(f"Using device: {device}")
28
 
 
29
  colors.steel_blue = colors.Color(
30
  name="steel_blue",
31
  c50="#EBF3F8",
 
92
  #output-title h2 { font-size: 2.1em !important; }
93
  """
94
 
 
95
  print("Loading Model components...")
96
 
97
  processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
 
107
  print(f"Warning: Could not load GenerationConfig: {e}. Using default.")
108
  generation_config = GenerationConfig(max_new_tokens=4096)
109
 
110
+ print("Model loaded successfully.")
111
 
112
  @spaces.GPU
113
  def process_ocr_task(image):
 
124
  if device.type == 'cuda':
125
  inputs = {k: v.to(torch.bfloat16) if v.dtype == torch.float32 else v for k, v in inputs.items()}
126
 
127
+ print("👊 Running inference...")
128
  with torch.no_grad():
129
  outputs = model.generate(
130
  **inputs,
 
139
  print(f"Error extracting boxes: {e}")
140
  return generated_text, image
141
 
 
142
  bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]
143
 
144
  table_format = 'latex'
 
191
 
192
  return final_output_text, result_image
193
 
 
194
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
195
+ gr.Markdown("# **NVIDIA Nemotron Parse v1.1**", elem_id="main-title")
196
+ gr.Markdown("Upload a document image to extract text, tables, and layout structures using NVIDIA's Nemotron Parse model.")
197
 
198
  with gr.Row():
199
  with gr.Column(scale=1):
 
201
  submit_btn = gr.Button("Process Document", variant="primary")
202
 
203
  examples = gr.Examples(
204
+ examples=["examples/1.jpg", "examples/2.jpg", "examples/3.jpg"],
205
  inputs=image_input,
206
  label="Examples"
207
  )
208
 
209
  with gr.Column(scale=2):
210
+ output_text = gr.Textbox(label="Parsed Content (Markdown/LaTeX)", lines=8, show_copy_button=True)
211
  output_image = gr.Image(label="Detected Layout & Bounding Boxes", type="pil")
212
 
 
 
 
 
 
 
 
213
  submit_btn.click(
214
  fn=process_ocr_task,
215
  inputs=[image_input],
 
217
  )
218
 
219
  if __name__ == "__main__":
220
+ demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False)