lamco-development
/

granite-docling-258M-onnx

+#!/usr/bin/env python3
+"""
+granite-docling ONNX Usage Example with ONNX Runtime
+Demonstrates how to use the converted granite-docling model for document processing
+"""
+import onnxruntime as ort
+import numpy as np
+from PIL import Image
+import json
+def load_granite_docling_onnx(model_path: str):
+    """Load granite-docling ONNX model"""
+    print(f"Loading granite-docling ONNX model from: {model_path}")
+    session = ort.InferenceSession(model_path)
+    # Print model information
+    print("Model Information:")
+    print(f"  Inputs:")
+    for inp in session.get_inputs():
+        print(f"    {inp.name}: {inp.shape} ({inp.type})")
+    print(f"  Outputs:")
+    for out in session.get_outputs():
+        print(f"    {out.name}: {out.shape} ({out.type})")
+    return session
+def preprocess_document_image(image_path: str) -> np.ndarray:
+    """Preprocess document image for granite-docling inference"""
+    # Load and resize image to 512x512 (SigLIP2 requirement)
+    image = Image.open(image_path).convert('RGB')
+    image = image.resize((512, 512))
+    # Convert to numpy array and normalize
+    pixel_values = np.array(image).astype(np.float32) / 255.0
+    # Normalize using SigLIP2 parameters (from preprocessor_config.json)
+    mean = np.array([0.485, 0.456, 0.406])
+    std = np.array([0.229, 0.224, 0.225])
+    pixel_values = (pixel_values - mean) / std
+    # Reshape to [batch_size, channels, height, width]
+    pixel_values = pixel_values.transpose(2, 0, 1)  # HWC -> CHW
+    pixel_values = pixel_values[np.newaxis, :]      # Add batch dimension
+    return pixel_values
+def create_text_inputs(prompt: str = "Convert this document to DocTags:") -> tuple:
+    """Create text inputs for granite-docling"""
+    # Simple tokenization (in practice, use proper tokenizer)
+    # This is a simplified example - use actual granite-docling tokenizer
+    tokens = [1] + [i for i in range(2, len(prompt.split()) + 2)] + [2]  # Simple token mapping
+    input_ids = np.array([tokens], dtype=np.int64)
+    attention_mask = np.ones((1, len(tokens)), dtype=np.int64)
+    return input_ids, attention_mask
+def run_granite_docling_inference(session, image_path: str):
+    """Run complete granite-docling inference"""
+    print(f"Processing document: {image_path}")
+    # Prepare inputs
+    pixel_values = preprocess_document_image(image_path)
+    input_ids, attention_mask = create_text_inputs()
+    print(f"Input shapes:")
+    print(f"  pixel_values: {pixel_values.shape}")
+    print(f"  input_ids: {input_ids.shape}")
+    print(f"  attention_mask: {attention_mask.shape}")
+    # Run inference
+    outputs = session.run(None, {
+        'pixel_values': pixel_values,
+        'input_ids': input_ids,
+        'attention_mask': attention_mask
+    })
+    logits = outputs[0]
+    print(f"Output logits shape: {logits.shape}")
+    # Decode logits to tokens (simplified)
+    predicted_tokens = np.argmax(logits, axis=-1)
+    print(f"Predicted tokens shape: {predicted_tokens.shape}")
+    # In practice, decode tokens to DocTags markup using proper tokenizer
+    print("✅ Inference completed successfully")
+    return predicted_tokens
+def main():
+    """Main example usage"""
+    model_path = "model.onnx"  # Path to downloaded ONNX model
+    try:
+        # Load model
+        session = load_granite_docling_onnx(model_path)
+        # Run inference on example document
+        # (Replace with actual document image path)
+        image_path = "example_document.png"
+        if os.path.exists(image_path):
+            result = run_granite_docling_inference(session, image_path)
+            print("✅ granite-docling ONNX inference successful!")
+        else:
+            print("⚠️ No example document provided")
+            print("  Create a test document image to run inference")
+    except Exception as e:
+        print(f"❌ Example failed: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    import os
+    main()