Spaces:

prithivMLmods
/

NVIDIA-Nemotron-Parse-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Nov 21

Commit

301b940

verified ·

1 Parent(s): 8967bbb

update app

Browse files

Files changed (1) hide show

app.py +9 -24

app.py CHANGED Viewed

@@ -10,27 +10,22 @@ from typing import Optional, Tuple, Dict, Any, Iterable
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
-# --- Model & Script Download ---
 print("Downloading model snapshot to ensure all scripts are present...")
-# Download the full model repo to ensure postprocessing.py is available locally
 model_dir = snapshot_download(repo_id="nvidia/NVIDIA-Nemotron-Parse-v1.1")
 print(f"Model downloaded to: {model_dir}")
-# Add the model directory to sys.path so we can import postprocessing
 sys.path.append(model_dir)
 try:
     from postprocessing import extract_classes_bboxes, transform_bbox_to_original, postprocess_text
-    print("✅ Successfully imported postprocessing functions.")
 except ImportError as e:
-    print(f"❌ Error importing postprocessing: {e}")
     raise e
-# --- Device Setup ---
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
-# --- Theme Definition ---
 colors.steel_blue = colors.Color(
     name="steel_blue",
     c50="#EBF3F8",
@@ -97,7 +92,6 @@ css = """
 #output-title h2 { font-size: 2.1em !important; }
 """
-# --- Model Loading ---
 print("Loading Model components...")
 processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
@@ -113,7 +107,7 @@ except Exception as e:
     print(f"Warning: Could not load GenerationConfig: {e}. Using default.")
     generation_config = GenerationConfig(max_new_tokens=4096)
-print("✅ Model loaded successfully.")
 @spaces.GPU
 def process_ocr_task(image):
@@ -130,7 +124,7 @@ def process_ocr_task(image):
     if device.type == 'cuda':
         inputs = {k: v.to(torch.bfloat16) if v.dtype == torch.float32 else v for k, v in inputs.items()}
-    print("🏃 Running inference...")
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
@@ -145,7 +139,6 @@ def process_ocr_task(image):
         print(f"Error extracting boxes: {e}")
         return generated_text, image
-    # Transform boxes to original image size
     bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]
     table_format = 'latex'
@@ -198,10 +191,9 @@ def process_ocr_task(image):
     return final_output_text, result_image
-# --- Gradio Interface ---
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
-    gr.Markdown("# **NVIDIA Nemotron Parse v1.1 [OCR/Parsing]**", elem_id="main-title")
-    gr.Markdown("Upload a document image to extract text, tables, and layout structures using NVIDIA's state-of-the-art Parse model.")
     with gr.Row():
         with gr.Column(scale=1):
@@ -209,22 +201,15 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
             submit_btn = gr.Button("Process Document", variant="primary")
             examples = gr.Examples(
-                examples=["examples/1.jpg"],
                 inputs=image_input,
                 label="Examples"
             )
         with gr.Column(scale=2):
-            output_text = gr.Textbox(label="Parsed Content (Markdown/LaTeX)", lines=20, show_copy_button=True)
             output_image = gr.Image(label="Detected Layout & Bounding Boxes", type="pil")
-            with gr.Accordion("Technical Details", open=False):
-                gr.Markdown("""
-                **Model:** [nvidia/NVIDIA-Nemotron-Parse-v1.1](https://huggingface.co/nvidia/NVIDIA-Nemotron-Parse-v1.1)
-                **Architecture:** Llama-3-Vila based.
-                **Capabilities:** High-accuracy OCR, Table extraction (to LaTeX/HTML), Figure detection.
-                """)
     submit_btn.click(
         fn=process_ocr_task,
         inputs=[image_input],
@@ -232,4 +217,4 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     )
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch(share=True, mcp_server=True, ssr_mode=False)

 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
 print("Downloading model snapshot to ensure all scripts are present...")
 model_dir = snapshot_download(repo_id="nvidia/NVIDIA-Nemotron-Parse-v1.1")
 print(f"Model downloaded to: {model_dir}")
 sys.path.append(model_dir)
 try:
     from postprocessing import extract_classes_bboxes, transform_bbox_to_original, postprocess_text
+    print("Successfully imported postprocessing functions.")
 except ImportError as e:
+    print(f"Error importing postprocessing: {e}")
     raise e
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 colors.steel_blue = colors.Color(
     name="steel_blue",
     c50="#EBF3F8",
 #output-title h2 { font-size: 2.1em !important; }
 """
 print("Loading Model components...")
 processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
     print(f"Warning: Could not load GenerationConfig: {e}. Using default.")
     generation_config = GenerationConfig(max_new_tokens=4096)
+print("Model loaded successfully.")
 @spaces.GPU
 def process_ocr_task(image):
     if device.type == 'cuda':
         inputs = {k: v.to(torch.bfloat16) if v.dtype == torch.float32 else v for k, v in inputs.items()}
+    print("👊 Running inference...")
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
         print(f"Error extracting boxes: {e}")
         return generated_text, image
     bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]
     table_format = 'latex'
     return final_output_text, result_image
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
+    gr.Markdown("# **NVIDIA Nemotron Parse v1.1**", elem_id="main-title")
+    gr.Markdown("Upload a document image to extract text, tables, and layout structures using NVIDIA's Nemotron Parse model.")
     with gr.Row():
         with gr.Column(scale=1):
             submit_btn = gr.Button("Process Document", variant="primary")
             examples = gr.Examples(
+                examples=["examples/1.jpg", "examples/2.jpg", "examples/3.jpg"],
                 inputs=image_input,
                 label="Examples"
             )
         with gr.Column(scale=2):
+            output_text = gr.Textbox(label="Parsed Content (Markdown/LaTeX)", lines=8, show_copy_button=True)
             output_image = gr.Image(label="Detected Layout & Bounding Boxes", type="pil")
     submit_btn.click(
         fn=process_ocr_task,
         inputs=[image_input],
     )
 if __name__ == "__main__":
+    demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False)