Spaces:

AdithyaSK
/

NetraEmbed

Running on Zero

App Files Files Community

AdithyaSK commited on 4 days ago

Commit

e03ab78

1 Parent(s): b3edf84

Update README.md and app.py: change SDK version to 6.0.2 and enhance error handling in document indexing

Browse files

Files changed (2) hide show

README.md +1 -2
app.py +157 -570

README.md CHANGED Viewed

@@ -4,12 +4,11 @@ emoji: 👁️
 colorFrom: yellow
 colorTo: purple
 sdk: gradio
-sdk_version: 5.9.1
 app_file: app.py
 pinned: false
 license: mit
 short_description: Universal Multilingual Multimodal Document Retrieval
-hardware: zero-gpu
 ---
 # NetraEmbed - Universal Multilingual Multimodal Document Retrieval

 colorFrom: yellow
 colorTo: purple
 sdk: gradio
+sdk_version: 6.0.2
 app_file: app.py
 pinned: false
 license: mit
 short_description: Universal Multilingual Multimodal Document Retrieval
 ---
 # NetraEmbed - Universal Multilingual Multimodal Document Retrieval

app.py CHANGED Viewed

@@ -10,15 +10,13 @@ Features:
 - Query input with top-k selection (default: 5)
 - Similarity score display
 - Side-by-side comparison when both models are selected
-- Progressive loading with real-time updates
-- Proper error handling
 - ZeroGPU integration for efficient GPU usage
 """
 import io
 import gc
 import math
-from typing import Iterator, List, Optional, Tuple
 import gradio as gr
 import torch
@@ -37,7 +35,11 @@ from colpali_engine.interpretability.similarity_map_utils import normalize_simil
 # Configuration
 MAX_BATCH_SIZE = 32  # Maximum pages to process at once
-DEFAULT_DURATION = 120  # Default GPU duration in seconds
 # Global state for models and indexed documents
 class DocumentIndex:
@@ -49,37 +51,24 @@ class DocumentIndex:
         self.bigemma_processor = None
         self.colgemma_model = None
         self.colgemma_processor = None
-        self.models_loaded = {"bigemma": False, "colgemma": False}
 doc_index = DocumentIndex()
 # Helper functions
-def get_loaded_models() -> List[str]:
-    """Get list of currently loaded models."""
-    loaded = []
-    if doc_index.bigemma_model is not None:
-        loaded.append("BiGemma3")
-    if doc_index.colgemma_model is not None:
-        loaded.append("ColGemma3")
-    return loaded
-def get_model_choice_from_loaded() -> str:
-    """Determine model choice string based on what's loaded."""
-    loaded = get_loaded_models()
-    if "BiGemma3" in loaded and "ColGemma3" in loaded:
-        return "Both"
-    elif "BiGemma3" in loaded:
-        return "NetraEmbed (BiGemma3)"
-    elif "ColGemma3" in loaded:
-        return "ColNetraEmbed (ColGemma3)"
-    else:
-        return ""
-@spaces.GPU(duration=DEFAULT_DURATION)
 def load_bigemma_model():
     """Load BiGemma3 model and processor."""
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     if doc_index.bigemma_model is None:
         print("Loading BiGemma3 (NetraEmbed)...")
         try:
@@ -93,18 +82,15 @@ def load_bigemma_model():
                 device_map=device,
             )
             doc_index.bigemma_model.eval()
-            doc_index.models_loaded["bigemma"] = True
             print("✓ BiGemma3 loaded successfully")
         except Exception as e:
             print(f"❌ Failed to load BiGemma3: {str(e)}")
-            raise
-    return doc_index.bigemma_model, doc_index.bigemma_processor
-@spaces.GPU(duration=DEFAULT_DURATION)
 def load_colgemma_model():
     """Load ColGemma3 model and processor."""
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     if doc_index.colgemma_model is None:
         print("Loading ColGemma3 (ColNetraEmbed)...")
         try:
@@ -118,12 +104,11 @@ def load_colgemma_model():
                 "Cognitive-Lab/ColNetraEmbed",
                 use_fast=True,
             )
-            doc_index.models_loaded["colgemma"] = True
             print("✓ ColGemma3 loaded successfully")
         except Exception as e:
             print(f"❌ Failed to load ColGemma3: {str(e)}")
-            raise
-    return doc_index.colgemma_model, doc_index.colgemma_processor
 def unload_models():
     """Unload models and free GPU memory."""
@@ -133,14 +118,12 @@ def unload_models():
             del doc_index.bigemma_processor
             doc_index.bigemma_model = None
             doc_index.bigemma_processor = None
-            doc_index.models_loaded["bigemma"] = False
         if doc_index.colgemma_model is not None:
             del doc_index.colgemma_model
             del doc_index.colgemma_processor
             doc_index.colgemma_model = None
             doc_index.colgemma_processor = None
-            doc_index.models_loaded["colgemma"] = False
         # Clear embeddings and images
         doc_index.bigemma_embeddings = None
@@ -157,42 +140,74 @@ def unload_models():
     except Exception as e:
         return f"❌ Error unloading models: {str(e)}"
-def clear_incompatible_embeddings(model_choice: str) -> str:
-    """Clear embeddings that are incompatible with currently loading models."""
-    cleared = []
-    # If loading only BiGemma3, clear ColGemma3 embeddings
-    if model_choice == "NetraEmbed (BiGemma3)":
-        if doc_index.colgemma_embeddings is not None:
-            doc_index.colgemma_embeddings = None
-            doc_index.images = []
-            cleared.append("ColGemma3")
-            print("Cleared ColGemma3 embeddings")
-    # If loading only ColGemma3, clear BiGemma3 embeddings
-    elif model_choice == "ColNetraEmbed (ColGemma3)":
-        if doc_index.bigemma_embeddings is not None:
-            doc_index.bigemma_embeddings = None
-            doc_index.images = []
-            cleared.append("BiGemma3")
-            print("Cleared BiGemma3 embeddings")
-    if cleared:
-        return f"Cleared {', '.join(cleared)} embeddings - please re-index"
-    return ""
-def pdf_to_images(pdf_path: str) -> List[Image.Image]:
-    """Convert PDF to list of PIL Images with error handling."""
     try:
-        print(f"Converting PDF to images: {pdf_path}")
-        images = convert_from_path(pdf_path, dpi=200)
-        print(f"Converted {len(images)} pages")
-        return images
     except Exception as e:
-        print(f"❌ PDF conversion error: {str(e)}")
-        raise Exception(f"Failed to convert PDF: {str(e)}")
-@spaces.GPU(duration=DEFAULT_DURATION)
 def generate_colgemma_heatmap(
     image: Image.Image,
     query: str,
@@ -203,17 +218,14 @@ def generate_colgemma_heatmap(
 ) -> Image.Image:
     """Generate heatmap overlay for ColGemma3 results."""
     try:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
         # Re-process the single image to get the proper batch_images dict for image mask
         batch_images = processor.process_images([image]).to(device)
-        # Create image mask manually (ColGemmaProcessor3 doesn't have get_image_mask)
         if "input_ids" in batch_images and hasattr(model.config, "image_token_id"):
             image_token_id = model.config.image_token_id
             image_mask = batch_images["input_ids"] == image_token_id
         else:
-            # Fallback: all tokens are image tokens
             image_mask = torch.ones(
                 image_embedding.shape[0], image_embedding.shape[1], dtype=torch.bool, device=device
             )
@@ -225,10 +237,9 @@ def generate_colgemma_heatmap(
         if n_side * n_side == num_image_tokens:
             n_patches = (n_side, n_side)
         else:
-            # Fallback: use default calculation
             n_patches = (16, 16)
-        # Generate similarity maps (returns a list of tensors)
         similarity_maps_list = get_similarity_maps_from_embeddings(
             image_embeddings=image_embedding,
             query_embeddings=query_embedding,
@@ -236,10 +247,9 @@ def generate_colgemma_heatmap(
             image_mask=image_mask,
         )
-        # Get the similarity map for our image (returns a list, get first element)
-        similarity_map = similarity_maps_list[0]  # (query_length, n_patches_x, n_patches_y)
-        # Aggregate across all query tokens (mean)
         if similarity_map.dtype == torch.bfloat16:
             similarity_map = similarity_map.float()
         aggregated_map = torch.mean(similarity_map, dim=0)
@@ -247,10 +257,8 @@ def generate_colgemma_heatmap(
         # Convert the image to an array
         img_array = np.array(image.convert("RGBA"))
-        # Normalize the similarity map and convert to numpy
         similarity_map_array = normalize_similarity_map(aggregated_map).to(torch.float32).cpu().numpy()
-        # Reshape to match PIL convention
         similarity_map_array = rearrange(similarity_map_array, "h w -> w h")
         # Create PIL image from similarity map
@@ -280,121 +288,19 @@ def generate_colgemma_heatmap(
     except Exception as e:
         print(f"❌ Heatmap generation error: {str(e)}")
-        # Return original image if heatmap generation fails
         return image
-@spaces.GPU(duration=DEFAULT_DURATION)
-def index_bigemma_images(images: List[Image.Image]) -> torch.Tensor:
-    """Index images with BiGemma3 model."""
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model, processor = doc_index.bigemma_model, doc_index.bigemma_processor
-    batch_images = processor.process_images(images).to(device)
-    embeddings = model(**batch_images, embedding_dim=768)
-    return embeddings
-@spaces.GPU(duration=DEFAULT_DURATION)
-def index_colgemma_images(images: List[Image.Image]) -> torch.Tensor:
-    """Index images with ColGemma3 model."""
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model, processor = doc_index.colgemma_model, doc_index.colgemma_processor
-    batch_images = processor.process_images(images).to(device)
-    embeddings = model(**batch_images)
-    return embeddings
-def index_document(pdf_file, model_choice: str) -> Iterator[str]:
-    """Upload and index a PDF document with progress updates."""
-    if pdf_file is None:
-        yield "⚠️ Please upload a PDF document first."
-        return
-    try:
-        status_messages = []
-        # Convert PDF to images
-        status_messages.append("⏳ Converting PDF to images...")
-        yield "\n".join(status_messages)
-        doc_index.images = pdf_to_images(pdf_file.name)
-        num_pages = len(doc_index.images)
-        status_messages.append(f"✓ Converted PDF to {num_pages} images")
-        # Check if we need to batch process
-        if num_pages > MAX_BATCH_SIZE:
-            status_messages.append(f"⚠️ Large PDF ({num_pages} pages). Processing in batches of {MAX_BATCH_SIZE}...")
-            yield "\n".join(status_messages)
-        # Index with BiGemma3
-        if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
-            if doc_index.bigemma_model is None:
-                status_messages.append("⏳ Loading BiGemma3 model...")
-                yield "\n".join(status_messages)
-                load_bigemma_model()
-                status_messages.append("✓ BiGemma3 loaded")
-            else:
-                status_messages.append("✓ Using cached BiGemma3 model")
-            yield "\n".join(status_messages)
-            status_messages.append("⏳ Encoding images with BiGemma3...")
-            yield "\n".join(status_messages)
-            doc_index.bigemma_embeddings = index_bigemma_images(doc_index.images)
-            status_messages.append("✓ Indexed with BiGemma3 (shape: {})".format(doc_index.bigemma_embeddings.shape))
-            yield "\n".join(status_messages)
-        # Index with ColGemma3
-        if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
-            if doc_index.colgemma_model is None:
-                status_messages.append("⏳ Loading ColGemma3 model...")
-                yield "\n".join(status_messages)
-                load_colgemma_model()
-                status_messages.append("✓ ColGemma3 loaded")
-            else:
-                status_messages.append("✓ Using cached ColGemma3 model")
-            yield "\n".join(status_messages)
-            status_messages.append("⏳ Encoding images with ColGemma3...")
-            yield "\n".join(status_messages)
-            doc_index.colgemma_embeddings = index_colgemma_images(doc_index.images)
-            status_messages.append(
-                "✓ Indexed with ColGemma3 (shape: {})".format(doc_index.colgemma_embeddings.shape)
-            )
-            yield "\n".join(status_messages)
-        final_status = "\n".join(status_messages) + "\n\n✅ Document ready for querying!"
-        yield final_status
-    except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        print(f"Indexing error: {error_details}")
-        yield f"❌ Error indexing document: {str(e)}"
-@spaces.GPU(duration=DEFAULT_DURATION)
 def query_bigemma(query: str, top_k: int) -> Tuple[str, List]:
     """Query indexed documents with BiGemma3."""
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     model, processor = doc_index.bigemma_model, doc_index.bigemma_processor
     # Encode query
     batch_query = processor.process_texts([query]).to(device)
     query_embedding = model(**batch_query, embedding_dim=768)
-    # Compute scores (cosine similarity)
-    scores = processor.score(
-        qs=query_embedding,
-        ps=doc_index.bigemma_embeddings,
-    )
     # Get top-k results
     top_k_actual = min(top_k, len(doc_index.images))
@@ -413,21 +319,17 @@ def query_bigemma(query: str, top_k: int) -> Tuple[str, List]:
     return results_text, gallery_images
-@spaces.GPU(duration=DEFAULT_DURATION)
 def query_colgemma(query: str, top_k: int, show_heatmap: bool = False) -> Tuple[str, List]:
     """Query indexed documents with ColGemma3."""
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     model, processor = doc_index.colgemma_model, doc_index.colgemma_processor
     # Encode query
     batch_query = processor.process_queries([query]).to(device)
     query_embedding = model(**batch_query)
-    # Compute scores (MaxSim)
-    scores = processor.score_multi_vector(
-        qs=query_embedding,
-        ps=doc_index.colgemma_embeddings,
-    )
     # Get top-k results
     top_k_actual = min(top_k, len(doc_index.images))
@@ -456,10 +358,7 @@ def query_colgemma(query: str, top_k: int, show_heatmap: bool = False) -> Tuple[
             )
         else:
             gallery_images.append(
-                (
-                    doc_index.images[idx.item()],
-                    f"Rank {rank + 1} - Page {idx.item() + 1} (Score: {score:.2f})",
-                )
             )
     return results_text, gallery_images
@@ -484,14 +383,12 @@ def query_documents(
         if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
             if doc_index.bigemma_embeddings is None:
                 return "⚠️ Please index the document with BiGemma3 first.", None, None, None
             results_bi, gallery_images_bi = query_bigemma(query, top_k)
         # Query with ColGemma3
         if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
             if doc_index.colgemma_embeddings is None:
                 return "⚠️ Please index the document with ColGemma3 first.", None, None, None
             results_col, gallery_images_col = query_colgemma(query, top_k, show_heatmap)
         # Return results based on model choice
@@ -504,266 +401,57 @@ def query_documents(
     except Exception as e:
         import traceback
         error_details = traceback.format_exc()
         print(f"Query error: {error_details}")
         return f"❌ Error during query: {str(e)}", None, None, None
-def load_models_with_progress(model_choice: str) -> Iterator[Tuple]:
-    """Load models with progress updates."""
-    if not model_choice:
-        yield (
-            "❌ Please select a model first.",
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(value="Load model first"),
-        )
-        return
-    try:
-        status_messages = []
-        # Clear incompatible embeddings
-        clear_msg = clear_incompatible_embeddings(model_choice)
-        if clear_msg:
-            status_messages.append(f"⚠️ {clear_msg}")
-        # Load BiGemma3
-        if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
-            status_messages.append("⏳ Loading BiGemma3 (NetraEmbed)...")
-            yield (
-                "\n".join(status_messages),
-                gr.update(visible=True),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(value="Loading models..."),
-            )
-            load_bigemma_model()
-            status_messages[-1] = "✅ BiGemma3 loaded successfully"
-            yield (
-                "\n".join(status_messages),
-                gr.update(visible=True),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(value="Loading models..."),
-            )
-        # Load ColGemma3
-        if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
-            status_messages.append("⏳ Loading ColGemma3 (ColNetraEmbed)...")
-            yield (
-                "\n".join(status_messages),
-                gr.update(visible=True),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(value="Loading models..."),
-            )
-            load_colgemma_model()
-            status_messages[-1] = "✅ ColGemma3 loaded successfully"
-            yield (
-                "\n".join(status_messages),
-                gr.update(visible=True),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(value="Loading models..."),
-            )
-        # Determine column visibility based on loaded models
-        show_bigemma = model_choice in ["NetraEmbed (BiGemma3)", "Both"]
-        show_colgemma = model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]
-        show_heatmap_checkbox = model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]
-        final_status = "\n".join(status_messages) + "\n\n✅ Ready!"
-        yield (
-            final_status,
-            gr.update(visible=False),
-            gr.update(visible=True),
-            gr.update(visible=show_bigemma),
-            gr.update(visible=show_colgemma),
-            gr.update(visible=show_heatmap_checkbox),
-            gr.update(interactive=True),
-            gr.update(interactive=True),
-            gr.update(interactive=True),
-            gr.update(interactive=True),
-            gr.update(interactive=True),
-            gr.update(value="Ready to index"),
-        )
-    except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        print(f"Model loading error: {error_details}")
-        yield (
-            f"❌ Failed to load models: {str(e)}",
-            gr.update(visible=True),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(interactive=False),
-            gr.update(value="Load model first"),
-        )
-def unload_models_and_hide_ui():
-    """Unload models and hide main UI."""
-    status = unload_models()
-    return (
-        status,
-        gr.update(visible=True),
-        gr.update(visible=False),
-        gr.update(visible=False),
-        gr.update(visible=False),
-        gr.update(visible=False),
-        gr.update(interactive=False),
-        gr.update(interactive=False),
-        gr.update(interactive=False),
-        gr.update(interactive=False),
-        gr.update(interactive=False),
-        gr.update(value="Load model first"),
-    )
 # Create Gradio interface
-with gr.Blocks(
-    title="NetraEmbed Demo",
-) as demo:
-    # Header section with model info and banner
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("# NetraEmbed")
-            gr.HTML(
-                """
-                <div style="display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 15px;">
-                    <a href="https://arxiv.org/abs/2512.03514" target="_blank">
-                        <img src="https://img.shields.io/badge/arXiv-2512.03514-b31b1b.svg" alt="Paper">
-                    </a>
-                    <a href="https://github.com/adithya-s-k/colpali" target="_blank">
-                        <img src="https://img.shields.io/badge/GitHub-colpali-181717?logo=github" alt="GitHub">
-                    </a>
-                    <a href="https://huggingface.co/Cognitive-Lab/ColNetraEmbed" target="_blank">
-                        <img src="https://img.shields.io/badge/🤗%20HuggingFace-Model-yellow" alt="Model">
-                    </a>
-                    <a href="https://www.cognitivelab.in/blog/introducing-netraembed" target="_blank">
-                        <img src="https://img.shields.io/badge/Blog-CognitiveLab-blue" alt="Blog">
-                    </a>
-                    <a href="https://cloud.cognitivelab.in" target="_blank">
-                        <img src="https://img.shields.io/badge/Demo-Try%20it%20out-green" alt="Demo">
-                    </a>
-                </div>
-                """
-            )
-            gr.Markdown(
-                """
-                **🚀 Universal Multilingual Multimodal Document Retrieval**
-                Upload a PDF document, select your model(s), and query using semantic search.
-                **Available Models:**
-                - **NetraEmbed (BiGemma3)**: Single-vector embedding with Matryoshka representation
-                  Fast retrieval with cosine similarity
-                - **ColNetraEmbed (ColGemma3)**: Multi-vector embedding with late interaction
-                  High-quality retrieval with MaxSim scoring and attention heatmaps
-                """
-            )
-        with gr.Column(scale=1):
-            gr.HTML(
-                """
-                <div style="text-align: center;">
-                    <img src="https://cdn-uploads.huggingface.co/production/uploads/6442d975ad54813badc1ddf7/-fYMikXhSuqRqm-UIdulK.png"
-                         alt="NetraEmbed Banner"
-                         style="width: 100%; height: auto; border-radius: 8px;">
-                </div>
-                """
-            )
-    gr.Markdown("---")
-    # Compact 3-column layout
     with gr.Row():
-        # Column 1: Model Management
         with gr.Column(scale=1):
-            gr.Markdown("### 🤖 Model Management")
             model_select = gr.Radio(
                 choices=["NetraEmbed (BiGemma3)", "ColNetraEmbed (ColGemma3)", "Both"],
                 value="Both",
                 label="Select Model(s)",
             )
-            load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="sm")
-            unload_model_btn = gr.Button("🗑️ Unload", variant="secondary", size="sm")
-            model_status = gr.Textbox(
-                label="Status",
-                lines=6,
-                interactive=False,
-                value="Select and load a model",
-            )
-            loading_info = gr.Markdown(
-                """
-                **First load:** 2-3 min
-                **Cached:** ~30 sec
-                """,
-                visible=True,
-            )
-        # Column 2: Document Upload & Indexing
         with gr.Column(scale=1):
             gr.Markdown("### 📄 Upload & Index")
-            pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"], interactive=False)
-            index_btn = gr.Button("📥 Index Document", variant="primary", size="sm", interactive=False)
-            index_status = gr.Textbox(
-                label="Indexing Status",
-                lines=6,
-                interactive=False,
-                value="Load model first",
-            )
         # Column 3: Query
         with gr.Column(scale=1):
@@ -772,145 +460,44 @@ with gr.Blocks(
                 label="Enter Query",
                 placeholder="e.g., financial report, organizational structure...",
                 lines=2,
-                interactive=False,
             )
             with gr.Row():
-                top_k_slider = gr.Slider(
-                    minimum=1,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="Top K",
-                    scale=2,
-                    interactive=False,
-                )
-                heatmap_checkbox = gr.Checkbox(
-                    label="Heatmaps",
-                    value=False,
-                    visible=False,
-                    scale=1,
-                )
-            query_btn = gr.Button("🔍 Search", variant="primary", size="sm", interactive=False)
     gr.Markdown("---")
-    # Results section (always visible after model load)
-    with gr.Column(visible=False) as main_interface:
-        gr.Markdown("### 📊 Results")
-        with gr.Row(equal_height=True):
-            with gr.Column(scale=1, visible=False) as bigemma_column:
-                bigemma_results = gr.Markdown(
-                    value="*BiGemma3 results will appear here...*",
-                )
-                bigemma_gallery = gr.Gallery(
-                    label="BiGemma3 - Top Retrieved Pages",
-                    show_label=True,
-                    columns=2,
-                    height="auto",
-                    object_fit="contain",
-                )
-            with gr.Column(scale=1, visible=False) as colgemma_column:
-                colgemma_results = gr.Markdown(
-                    value="*ColGemma3 results will appear here...*",
-                )
-                colgemma_gallery = gr.Gallery(
-                    label="ColGemma3 - Top Retrieved Pages",
-                    show_label=True,
-                    columns=2,
-                    height="auto",
-                    object_fit="contain",
-                )
-        # Tips
-        with gr.Accordion("💡 Tips", open=False):
-            gr.Markdown(
-                """
-                - **Both models**: Compare results side-by-side
-                - **Scores**: BiGemma3 uses cosine similarity (-1 to 1), ColGemma3 uses MaxSim (higher is better)
-                - **Heatmaps**: Enable to visualize ColGemma3 attention patterns (brighter = higher attention)
-                """
             )
-    # Event handlers - Model Management
-    load_model_btn.click(
-        fn=load_models_with_progress,
-        inputs=[model_select],
-        outputs=[
-            model_status,
-            loading_info,
-            main_interface,
-            bigemma_column,
-            colgemma_column,
-            heatmap_checkbox,
-            pdf_upload,
-            index_btn,
-            query_input,
-            top_k_slider,
-            query_btn,
-            index_status,
-        ],
-    )
-    unload_model_btn.click(
-        fn=unload_models_and_hide_ui,
-        outputs=[
-            model_status,
-            loading_info,
-            main_interface,
-            bigemma_column,
-            colgemma_column,
-            heatmap_checkbox,
-            pdf_upload,
-            index_btn,
-            query_input,
-            top_k_slider,
-            query_btn,
-            index_status,
-        ],
-    )
-    # Event handlers - Main Interface
-    def index_with_current_models(pdf_file):
-        """Index document with currently loaded models."""
-        if pdf_file is None:
-            yield "⚠️ Please upload a PDF document first."
-            return
-        model_choice = get_model_choice_from_loaded()
-        if not model_choice:
-            yield "⚠️ No models loaded. Please load a model first."
-            return
-        # Use generator from index_document
-        for status in index_document(pdf_file, model_choice):
-            yield status
-    def query_with_current_models(query, top_k, show_heatmap):
-        """Query with currently loaded models."""
-        model_choice = get_model_choice_from_loaded()
-        if not model_choice:
-            return "⚠️ No models loaded. Please load a model first.", None, None, None
-        return query_documents(query, model_choice, top_k, show_heatmap)
     index_btn.click(
-        fn=index_with_current_models,
-        inputs=[pdf_upload],
         outputs=[index_status],
     )
     query_btn.click(
-        fn=query_with_current_models,
-        inputs=[query_input, top_k_slider, heatmap_checkbox],
         outputs=[bigemma_results, colgemma_results, bigemma_gallery, colgemma_gallery],
     )
-# Enable queue for handling multiple requests
-demo.queue(max_size=20)
 # Launch the app
-if __name__ == "__main__":
-    demo.launch()

 - Query input with top-k selection (default: 5)
 - Similarity score display
 - Side-by-side comparison when both models are selected
 - ZeroGPU integration for efficient GPU usage
 """
 import io
 import gc
 import math
+from typing import List, Optional, Tuple
 import gradio as gr
 import torch
 # Configuration
 MAX_BATCH_SIZE = 32  # Maximum pages to process at once
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Device: {device}")
+if torch.cuda.is_available():
+    print(f"GPU: {torch.cuda.get_device_name(0)}")
 # Global state for models and indexed documents
 class DocumentIndex:
         self.bigemma_processor = None
         self.colgemma_model = None
         self.colgemma_processor = None
 doc_index = DocumentIndex()
 # Helper functions
+def pdf_to_images(pdf_path: str) -> List[Image.Image]:
+    """Convert PDF to list of PIL Images with error handling."""
+    try:
+        print(f"Converting PDF to images: {pdf_path}")
+        images = convert_from_path(pdf_path, dpi=200)
+        print(f"Converted {len(images)} pages")
+        return images
+    except Exception as e:
+        print(f"❌ PDF conversion error: {str(e)}")
+        raise gr.Error(f"Failed to convert PDF: {str(e)}")
+@spaces.GPU
 def load_bigemma_model():
     """Load BiGemma3 model and processor."""
     if doc_index.bigemma_model is None:
         print("Loading BiGemma3 (NetraEmbed)...")
         try:
                 device_map=device,
             )
             doc_index.bigemma_model.eval()
             print("✓ BiGemma3 loaded successfully")
         except Exception as e:
             print(f"❌ Failed to load BiGemma3: {str(e)}")
+            raise gr.Error(f"Failed to load BiGemma3: {str(e)}")
+    return "✅ BiGemma3 loaded"
+@spaces.GPU
 def load_colgemma_model():
     """Load ColGemma3 model and processor."""
     if doc_index.colgemma_model is None:
         print("Loading ColGemma3 (ColNetraEmbed)...")
         try:
                 "Cognitive-Lab/ColNetraEmbed",
                 use_fast=True,
             )
             print("✓ ColGemma3 loaded successfully")
         except Exception as e:
             print(f"❌ Failed to load ColGemma3: {str(e)}")
+            raise gr.Error(f"Failed to load ColGemma3: {str(e)}")
+    return "✅ ColGemma3 loaded"
 def unload_models():
     """Unload models and free GPU memory."""
             del doc_index.bigemma_processor
             doc_index.bigemma_model = None
             doc_index.bigemma_processor = None
         if doc_index.colgemma_model is not None:
             del doc_index.colgemma_model
             del doc_index.colgemma_processor
             doc_index.colgemma_model = None
             doc_index.colgemma_processor = None
         # Clear embeddings and images
         doc_index.bigemma_embeddings = None
     except Exception as e:
         return f"❌ Error unloading models: {str(e)}"
+@spaces.GPU
+def index_bigemma_images(images: List[Image.Image]) -> torch.Tensor:
+    """Index images with BiGemma3 model."""
+    model, processor = doc_index.bigemma_model, doc_index.bigemma_processor
+    batch_images = processor.process_images(images).to(device)
+    embeddings = model(**batch_images, embedding_dim=768)
+    return embeddings
+@spaces.GPU
+def index_colgemma_images(images: List[Image.Image]) -> torch.Tensor:
+    """Index images with ColGemma3 model."""
+    model, processor = doc_index.colgemma_model, doc_index.colgemma_processor
+    batch_images = processor.process_images(images).to(device)
+    embeddings = model(**batch_images)
+    return embeddings
+def index_document(pdf_file, model_choice: str):
+    """Upload and index a PDF document."""
+    if pdf_file is None:
+        return "⚠️ Please upload a PDF document first."
     try:
+        status = []
+        # Convert PDF to images
+        status.append("⏳ Converting PDF to images...")
+        doc_index.images = pdf_to_images(pdf_file.name)
+        num_pages = len(doc_index.images)
+        status.append(f"✓ Converted PDF to {num_pages} images")
+        if num_pages > MAX_BATCH_SIZE:
+            status.append(f"⚠️ Large PDF ({num_pages} pages). Processing in batches...")
+        # Index with BiGemma3
+        if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
+            if doc_index.bigemma_model is None:
+                status.append("⏳ Loading BiGemma3 model...")
+                load_bigemma_model()
+                status.append("✓ BiGemma3 loaded")
+            else:
+                status.append("✓ Using cached BiGemma3 model")
+            status.append("⏳ Encoding images with BiGemma3...")
+            doc_index.bigemma_embeddings = index_bigemma_images(doc_index.images)
+            status.append(f"✓ Indexed with BiGemma3 (shape: {doc_index.bigemma_embeddings.shape})")
+        # Index with ColGemma3
+        if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
+            if doc_index.colgemma_model is None:
+                status.append("⏳ Loading ColGemma3 model...")
+                load_colgemma_model()
+                status.append("✓ ColGemma3 loaded")
+            else:
+                status.append("✓ Using cached ColGemma3 model")
+            status.append("⏳ Encoding images with ColGemma3...")
+            doc_index.colgemma_embeddings = index_colgemma_images(doc_index.images)
+            status.append(f"✓ Indexed with ColGemma3 (shape: {doc_index.colgemma_embeddings.shape})")
+        return "\n".join(status) + "\n\n✅ Document ready for querying!"
     except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Indexing error: {error_details}")
+        return f"❌ Error indexing document: {str(e)}"
+@spaces.GPU
 def generate_colgemma_heatmap(
     image: Image.Image,
     query: str,
 ) -> Image.Image:
     """Generate heatmap overlay for ColGemma3 results."""
     try:
         # Re-process the single image to get the proper batch_images dict for image mask
         batch_images = processor.process_images([image]).to(device)
+        # Create image mask manually
         if "input_ids" in batch_images and hasattr(model.config, "image_token_id"):
             image_token_id = model.config.image_token_id
             image_mask = batch_images["input_ids"] == image_token_id
         else:
             image_mask = torch.ones(
                 image_embedding.shape[0], image_embedding.shape[1], dtype=torch.bool, device=device
             )
         if n_side * n_side == num_image_tokens:
             n_patches = (n_side, n_side)
         else:
             n_patches = (16, 16)
+        # Generate similarity maps
         similarity_maps_list = get_similarity_maps_from_embeddings(
             image_embeddings=image_embedding,
             query_embeddings=query_embedding,
             image_mask=image_mask,
         )
+        similarity_map = similarity_maps_list[0]
+        # Aggregate across all query tokens
         if similarity_map.dtype == torch.bfloat16:
             similarity_map = similarity_map.float()
         aggregated_map = torch.mean(similarity_map, dim=0)
         # Convert the image to an array
         img_array = np.array(image.convert("RGBA"))
+        # Normalize the similarity map
         similarity_map_array = normalize_similarity_map(aggregated_map).to(torch.float32).cpu().numpy()
         similarity_map_array = rearrange(similarity_map_array, "h w -> w h")
         # Create PIL image from similarity map
     except Exception as e:
         print(f"❌ Heatmap generation error: {str(e)}")
         return image
+@spaces.GPU
 def query_bigemma(query: str, top_k: int) -> Tuple[str, List]:
     """Query indexed documents with BiGemma3."""
     model, processor = doc_index.bigemma_model, doc_index.bigemma_processor
     # Encode query
     batch_query = processor.process_texts([query]).to(device)
     query_embedding = model(**batch_query, embedding_dim=768)
+    # Compute scores
+    scores = processor.score(qs=query_embedding, ps=doc_index.bigemma_embeddings)
     # Get top-k results
     top_k_actual = min(top_k, len(doc_index.images))
     return results_text, gallery_images
+@spaces.GPU
 def query_colgemma(query: str, top_k: int, show_heatmap: bool = False) -> Tuple[str, List]:
     """Query indexed documents with ColGemma3."""
     model, processor = doc_index.colgemma_model, doc_index.colgemma_processor
     # Encode query
     batch_query = processor.process_queries([query]).to(device)
     query_embedding = model(**batch_query)
+    # Compute scores
+    scores = processor.score_multi_vector(qs=query_embedding, ps=doc_index.colgemma_embeddings)
     # Get top-k results
     top_k_actual = min(top_k, len(doc_index.images))
             )
         else:
             gallery_images.append(
+                (doc_index.images[idx.item()], f"Rank {rank + 1} - Page {idx.item() + 1} (Score: {score:.2f})")
             )
     return results_text, gallery_images
         if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
             if doc_index.bigemma_embeddings is None:
                 return "⚠️ Please index the document with BiGemma3 first.", None, None, None
             results_bi, gallery_images_bi = query_bigemma(query, top_k)
         # Query with ColGemma3
         if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
             if doc_index.colgemma_embeddings is None:
                 return "⚠️ Please index the document with ColGemma3 first.", None, None, None
             results_col, gallery_images_col = query_colgemma(query, top_k, show_heatmap)
         # Return results based on model choice
     except Exception as e:
         import traceback
         error_details = traceback.format_exc()
         print(f"Query error: {error_details}")
         return f"❌ Error during query: {str(e)}", None, None, None
 # Create Gradio interface
+with gr.Blocks(title="NetraEmbed Demo") as demo:
+    # Header section
+    gr.Markdown("# NetraEmbed")
+    gr.HTML(
+        """
+        <div style="display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 15px;">
+            <a href="https://arxiv.org/abs/2512.03514" target="_blank">
+                <img src="https://img.shields.io/badge/arXiv-2512.03514-b31b1b.svg" alt="Paper">
+            </a>
+            <a href="https://github.com/adithya-s-k/colpali" target="_blank">
+                <img src="https://img.shields.io/badge/GitHub-colpali-181717?logo=github" alt="GitHub">
+            </a>
+            <a href="https://huggingface.co/Cognitive-Lab/ColNetraEmbed" target="_blank">
+                <img src="https://img.shields.io/badge/🤗%20HuggingFace-Model-yellow" alt="Model">
+            </a>
+        </div>
+        """
+    )
+    gr.Markdown(
+        """
+        **🚀 Universal Multilingual Multimodal Document Retrieval**
+        Upload a PDF document, select your model(s), and query using semantic search.
+        **Available Models:**
+        - **NetraEmbed (BiGemma3)**: Single-vector embedding - Fast retrieval with cosine similarity
+        - **ColNetraEmbed (ColGemma3)**: Multi-vector embedding - High-quality retrieval with MaxSim scoring and heatmaps
+        """
+    )
     with gr.Row():
+        # Column 1: Model Selection
         with gr.Column(scale=1):
+            gr.Markdown("### 🤖 Model Selection")
             model_select = gr.Radio(
                 choices=["NetraEmbed (BiGemma3)", "ColNetraEmbed (ColGemma3)", "Both"],
                 value="Both",
                 label="Select Model(s)",
             )
+        # Column 2: Document Upload
         with gr.Column(scale=1):
             gr.Markdown("### 📄 Upload & Index")
+            pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
+            index_btn = gr.Button("📥 Index Document", variant="primary")
+            index_status = gr.Textbox(label="Status", lines=6, interactive=False)
         # Column 3: Query
         with gr.Column(scale=1):
                 label="Enter Query",
                 placeholder="e.g., financial report, organizational structure...",
                 lines=2,
             )
             with gr.Row():
+                top_k_slider = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Top K", scale=2)
+                heatmap_checkbox = gr.Checkbox(label="Heatmaps", value=False, scale=1)
+            query_btn = gr.Button("🔍 Search", variant="primary")
     gr.Markdown("---")
+    # Results section
+    gr.Markdown("### 📊 Results")
+    with gr.Row():
+        with gr.Column(scale=1):
+            bigemma_results = gr.Markdown(value="*BiGemma3 results will appear here...*")
+            bigemma_gallery = gr.Gallery(
+                label="BiGemma3 - Top Retrieved Pages",
+                columns=2,
+                height="auto",
+            )
+        with gr.Column(scale=1):
+            colgemma_results = gr.Markdown(value="*ColGemma3 results will appear here...*")
+            colgemma_gallery = gr.Gallery(
+                label="ColGemma3 - Top Retrieved Pages",
+                columns=2,
+                height="auto",
             )
+    # Event handlers
     index_btn.click(
+        fn=index_document,
+        inputs=[pdf_upload, model_select],
         outputs=[index_status],
     )
     query_btn.click(
+        fn=query_documents,
+        inputs=[query_input, model_select, top_k_slider, heatmap_checkbox],
         outputs=[bigemma_results, colgemma_results, bigemma_gallery, colgemma_gallery],
     )
 # Launch the app
+demo.launch()