Spaces:

google
/

radextract

Running on CPU Upgrade

App Files Files Community

goelak commited on Jul 29

Commit

fab8051

0 Parent(s):

Initial commit for RadExtract

Browse files

Files changed (33) hide show

.dockerignore +54 -0
.gitattributes +35 -0
.gitignore +30 -0
.prettierrc.json +15 -0
Dockerfile +35 -0
LICENSE +202 -0
README.md +159 -0
app.py +293 -0
cache/sample_cache.json +0 -0
cache_manager.py +285 -0
env.list.example +3 -0
prompt_instruction.py +121 -0
prompt_lib.py +101 -0
pyproject.toml +85 -0
report_examples.py +645 -0
run_docker.sh +58 -0
run_local.sh +45 -0
sanitize.py +104 -0
social_sharing.py +53 -0
start.sh +27 -0
static/copy.js +177 -0
static/favicon.svg +21 -0
static/google-research-logo.svg +61 -0
static/reset.js +103 -0
static/sample_reports.json +64 -0
static/script.js +1320 -0
static/style.css +2239 -0
structure_report.py +734 -0
templates/index.html +524 -0
test_app.py +209 -0
test_validation.py +152 -0
tools/rebuild_cache.py +70 -0
view_logs_endpoint.py +44 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,54 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+env/
+ENV/
+.venv
+.env
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+*.cover
+*.log
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Git
+.git/
+.gitignore
+# Documentation
+*.md
+docs/
+README.md
+LICENSE
+# Build artifacts
+dist/
+build/
+*.egg-info/
+# Development scripts
+run_local.sh
+run_docker.sh
+test_*.py
+# Temporary files
+*.tmp
+*.bak

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,30 @@

+env.list
+**/__pycache__
+*.pyc
+*.pyo
+*.pyd
+__pycache__/
+.pytest_cache/
+.DS_Store
+*.log
+venv/
+.venv/
+.env
+.vscode/
+.idea/
+*.egg-info/
+build/
+dist/
+.coverage
+htmlcov/
+.mypy_cache/
+.ruff_cache/
+*.so
+*.dylib
+# Local developer documentation
+run_docker_dev.sh
+notes/
+# Video assets for demo
+video_assets/

.prettierrc.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "printWidth": 80,
+  "tabWidth": 2,
+  "useTabs": false,
+  "semi": true,
+  "singleQuote": true,
+  "quoteProps": "as-needed",
+  "jsxSingleQuote": false,
+  "trailingComma": "all",
+  "bracketSpacing": true,
+  "bracketSameLine": false,
+  "arrowParens": "always",
+  "htmlWhitespaceSensitivity": "css",
+  "endOfLine": "lf"
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+# Use a base image with Python
+FROM python:3.11-slim
+# Set the working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends git procps libmagic1 curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the project configuration
+COPY pyproject.toml /app/
+# Environment variables
+ENV HOME=/tmp
+ENV APP_MODULE=app:app
+# Install dependencies
+RUN pip install --no-cache-dir -e .
+# Copy all application files
+COPY . /app/
+# Expose the ports
+EXPOSE 7870
+# Health check endpoint
+HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
+    CMD curl -f http://localhost:7870/cache/stats || exit 1
+# Copy and use the startup script
+COPY start.sh /app/
+RUN chmod +x /app/start.sh
+CMD ["/app/start.sh"]

LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,159 @@

+---
+title: RadExtract
+emoji: 🗂️
+colorFrom: blue
+colorTo: green
+sdk: docker
+pinned: false
+license: apache-2.0
+header: mini
+app_port: 7870
+tags:
+  - medical
+  - nlp
+  - radiology
+  - langextract
+  - gemini
+  - structured-data
+---
+# RadExtract: Radiology Report Structuring Demo
+[![🤗 Hugging Face Spaces](https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/google/radextract)
+[![LangExtract](https://img.shields.io/badge/Powered%20by-LangExtract-green)](https://github.com/google/langextract)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+A demonstration application powered by [LangExtract](https://github.com/google/langextract) that structures radiology reports using Gemini models. Transform unstructured radiology text into organized, interactive segments with clinical significance annotations.
+## Try the Demo
+**[Launch RadExtract Demo](https://huggingface.co/spaces/google/radextract)**
+Transform unstructured radiology reports into structured data with highlighted findings that are precisely mapped back to the original source text.
+## Key Features
+- **Structured Output**: Organizes reports into anatomical sections with clinical significance
+- **Interactive Highlighting**: Click any finding to see its exact source in the original text
+- **Clinical Significance**: Annotates findings as minor, significant, or grounding
+- **Character-Level Mapping**: Precise attribution back to source text
+- **Multi-Model Support**: Gemini 2.5 Flash (fast) and Pro (comprehensive)
+## Quick Start
+### Setup
+```bash
+git clone https://huggingface.co/spaces/google/radextract
+cd radextract
+python -m venv venv
+source venv/bin/activate
+pip install -e ".[dev]"
+cp env.list.example env.list
+# Edit env.list and set KEY=your_gemini_api_key_here
+```
+### Local Development
+```bash
+source venv/bin/activate
+export KEY=your_gemini_api_key_here
+python app.py
+```
+Access at: http://localhost:7870
+## API Usage
+### Example Request
+```bash
+curl -X POST \
+  -H 'X-Model-ID: gemini-2.5-flash' \
+  -H 'X-Use-Cache: true' \
+  -d 'FINDINGS: Normal heart and lungs. IMPRESSION: Normal study.' \
+  http://localhost:7870/predict
+```
+### Response Format
+```json
+{
+  "segments": [{
+    "type": "body",
+    "label": "Chest",
+    "content": "Normal heart and lungs",
+    "intervals": [{"startPos": 10, "endPos": 32}],
+    "significance": "minor"
+  }],
+  "text": "Chest:\n- Normal heart and lungs",
+  "annotated_document_json": {...}
+}
+```
+## Architecture
+- **Backend**: Flask + Python 3.10+ with full type safety
+- **NLP Engine**: [LangExtract](https://github.com/google/langextract) for structured extraction
+- **AI Models**: Google Gemini 2.5 (Flash/Pro)
+- **Frontend**: Vanilla JavaScript with interactive UI
+- **Deployment**: Docker + Hugging Face Spaces
+- **Package Details**: See [pyproject.toml](https://huggingface.co/spaces/google/radextract/blob/main/pyproject.toml) for dependencies, metadata, and tooling
+## Project Structure
+```
+radextract/
+├── app.py                 # Flask API endpoints
+├── structure_report.py    # Core structuring logic
+├── sanitize.py           # Text preprocessing & normalization
+├── prompt_instruction.py  # LangExtract prompt
+├── cache_manager.py      # Response caching
+├── static/               # Frontend assets
+└── templates/            # HTML templates
+```
+## Development
+### Setup
+```bash
+git clone https://huggingface.co/spaces/google/radextract
+cd radextract
+python -m venv venv
+source venv/bin/activate
+pip install -e ".[dev]"
+```
+### Code Quality
+```bash
+# Format code
+pyink . && isort .
+# Type checking
+mypy . --ignore-missing-imports
+# Run tests
+pytest
+```
+### Docker
+```bash
+# Build and run
+docker build -t radextract .
+docker run -p 7870:7870 --env-file env.list radextract
+```
+## License
+Apache License 2.0 - see [LICENSE](LICENSE) for details.
+## Related Projects
+- **[LangExtract](https://github.com/google/langextract)**: Core NLP library
+---
+**Built for the medical AI community** | **Hosted on Hugging Face Spaces**
+## Disclaimer
+This is not an officially supported Google product. If you use RadExtract or LangExtract in production or publications, please cite accordingly and acknowledge usage. Use is subject to the [Apache 2.0 License](LICENSE). For health-related applications, use of LangExtract is also subject to the [Health AI Developer Foundations Terms of Use](https://developers.google.com/health-ai-foundations/terms).

app.py ADDED Viewed

	@@ -0,0 +1,293 @@

+"""Flask web application for radiology report structuring using Gemini models.
+This module provides a web API that structures radiology reports into
+semantic sections using LangExtract and Google's Gemini language models.
+The application supports caching, multiple model configurations, and
+provides both a web interface and REST API endpoints.
+Typical usage example:
+  # Set environment variables
+  export KEY=your_gemini_api_key_here
+  export MODEL_ID=gemini-2.5-flash
+  # Run the application
+  python app.py
+"""
+import logging
+import os
+import shutil
+import tempfile
+import time
+import json
+import hashlib
+from flask import Flask, jsonify, render_template, request
+from flask_limiter import Limiter
+from flask_limiter.util import get_remote_address
+from cache_manager import CacheManager
+from sanitize import preprocess_report
+from social_sharing import SocialSharingConfig
+from structure_report import RadiologyReportStructurer, ResponseDict
+# Configuration constants
+MAX_INPUT_LENGTH = 3000
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
+)
+logger = logging.getLogger(__name__)
+class Model:
+    """Manages RadiologyReportStructurer instances for different Gemini model IDs.
+    This class handles initialization, caching, and coordination
+    of structurer instances for various model configurations, ensuring
+    efficient resource usage and consistent API key management.
+    """
+    def __init__(self):
+        """Initializes the Model manager with default structurer.
+        Sets up the Gemini API key from environment variables
+        and creates a default structurer instance for the configured model.
+        Raises:
+            ValueError: If the KEY environment variable is not set.
+        """
+        self.gemini_api_key = os.environ.get("KEY")
+        if not self.gemini_api_key:
+            logger.error("KEY environment variable not set.")
+            raise ValueError("KEY environment variable not set.")
+        self._structurers: dict[str, RadiologyReportStructurer] = {}
+        default_model_id = os.environ.get("MODEL_ID", "gemini-2.5-flash")
+        self._structurers[default_model_id] = RadiologyReportStructurer(
+            api_key=self.gemini_api_key,
+            model_id=default_model_id,
+        )
+        logger.info(
+            f"RadExtract ready [Worker {os.getpid()}] with model: {default_model_id}"
+        )
+    def _get_structurer(self, model_id: str) -> RadiologyReportStructurer:
+        """Returns a cached or newly created structurer for the given model ID.
+        Args:
+            model_id: Identifier for the specific model configuration.
+        Returns:
+            RadiologyReportStructurer instance for the specified model.
+        """
+        if model_id not in self._structurers:
+            logger.info(f"Creating structurer for model: {model_id}")
+            self._structurers[model_id] = RadiologyReportStructurer(
+                api_key=self.gemini_api_key,
+                model_id=model_id,
+            )
+        return self._structurers[model_id]
+    def predict(self, data: str, model_id: str) -> ResponseDict:
+        """Processes prediction request using the specified model.
+        Args:
+            data: Input text data to be processed.
+            model_id: Identifier for the model to use for processing.
+        Returns:
+            Dictionary containing the structured prediction results.
+        """
+        logger.info(f"Processing prediction with model: {model_id}")
+        structurer = self._get_structurer(model_id)
+        result = structurer.predict(data)
+        logger.info(f"Result preview: {str(result)[:500]}...")
+        return result
+model = Model()
+# Copy prebuilt cache to writable location if it exists
+def setup_cache():
+    """Sets up the cache directory and copies prebuilt cache files.
+    Creates a writable cache directory in /tmp and copies any existing
+    prebuilt cache files to ensure the latest version is available.
+    Returns:
+        Path to the configured cache directory.
+    """
+    cache_dir = tempfile.gettempdir() + "/cache"
+    os.makedirs(cache_dir, exist_ok=True)
+    source_cache = "cache/sample_cache.json"
+    target_cache = os.path.join(cache_dir, "sample_cache.json")
+    if os.path.exists(source_cache) and not os.path.exists(target_cache):
+        shutil.copy2(source_cache, target_cache)
+        logger.info(f"Initialized cache with {os.path.getsize(target_cache)} bytes")
+    return cache_dir
+cache_dir = setup_cache()
+cache_manager = CacheManager(cache_dir=cache_dir)
+app = Flask(
+    __name__,
+    static_url_path="/static",
+    static_folder="static",
+    template_folder="templates",
+)
+# Initialize rate limiter
+limiter = Limiter(
+    get_remote_address,
+    app=app,
+    default_limits=[
+        os.environ.get("RATE_LIMIT_DAY", "200 per day"),
+        os.environ.get("RATE_LIMIT_HOUR", "50 per hour"),
+    ],
+    storage_uri="memory://",
+)
+@app.route("/")
+def index():
+    """Renders the main application interface.
+    Returns:
+        Rendered HTML template for the application index page.
+    """
+    # Get social sharing context
+    social_context = SocialSharingConfig.get_sharing_context(request.url_root)
+    return render_template("index.html", **social_context)
+@app.route("/cache/stats")
+def cache_stats():
+    """Returns cache performance statistics.
+    Returns:
+        JSON response containing cache usage and performance statistics.
+    """
+    return jsonify(cache_manager.get_cache_stats())
+@app.route("/predict", methods=["POST"])
+@limiter.limit(os.environ.get("RATE_LIMIT_PREDICT", "100 per hour"))
+def predict():
+    """Processes radiology report text and returns structured results.
+    Accepts raw text via POST request body with optional headers
+    for caching, sample identification, and model selection. Supports
+    both cached and real-time processing modes.
+    Returns:
+        JSON response containing structured report segments, annotations,
+        and formatted text. Includes cache status when applicable.
+    Raises:
+        500: If processing fails due to invalid input or model errors.
+    """
+    start_time = time.time()
+    try:
+        data = request.get_data(as_text=True)
+        # Validate input to ensure it meets API requirements
+        if not data or not data.strip():
+            return (
+                jsonify(
+                    {
+                        "error": "Empty input",
+                        "message": "Input text is required",
+                        "max_length": MAX_INPUT_LENGTH,
+                    }
+                ),
+                400,
+            )
+        if len(data) > MAX_INPUT_LENGTH:
+            return (
+                jsonify(
+                    {
+                        "error": "Input too long",
+                        "message": f"Input length ({len(data)} characters) exceeds maximum allowed length of {MAX_INPUT_LENGTH} characters",
+                        "max_length": MAX_INPUT_LENGTH,
+                    }
+                ),
+                400,
+            )
+        use_cache = request.headers.get("X-Use-Cache", "true").lower() == "true"
+        sample_id = request.headers.get("X-Sample-ID")
+        model_id = request.headers.get(
+            "X-Model-ID", os.environ.get("MODEL_ID", "gemini-2.5-flash")
+        )
+        processed_data = preprocess_report(data)
+        if use_cache:
+            cached_result = cache_manager.get_cached_result(processed_data, sample_id)
+            if cached_result:
+                req_id = hashlib.md5(
+                    f"{request.remote_addr}{int(time.time()/3600)}".encode()
+                ).hexdigest()[:8]
+                logger.info(
+                    f"🟢 CACHE HIT [Req {req_id}] [Worker {os.getpid()}] - Returning cached result (no API call)"
+                )
+                return jsonify({"from_cache": True, **cached_result})
+        try:
+            req_id = hashlib.md5(
+                f"{request.remote_addr}{int(time.time()/3600)}".encode()
+            ).hexdigest()[:8]
+            logger.info(
+                f"🔴 API CALL [Req {req_id}] [Worker {os.getpid()}] - Processing with Gemini model: {model_id}"
+            )
+            result = model.predict(processed_data, model_id=model_id)
+            if use_cache:
+                cache_manager.cache_result(processed_data, result, sample_id)
+            result["sanitized_input"] = processed_data
+            return jsonify(result)
+        except TypeError as te:
+            error_msg = str(te)
+            logger.error(f"TypeError in prediction: {error_msg}", exc_info=True)
+            return (
+                jsonify({"error": "Processing error. Please try a different input."}),
+                500,
+            )
+    except Exception as e:
+        logger.error(f"Prediction error: {str(e)}", exc_info=True)
+        return jsonify({"error": str(e)}), 500
+@app.errorhandler(429)
+def ratelimit_handler(e):
+    """Handle rate limit exceeded errors."""
+    return (
+        jsonify(
+            {
+                "error": "Rate limit exceeded. Please try again later.",
+                "message": str(e.description),
+            }
+        ),
+        429,
+    )
+if __name__ == "__main__":
+    logger.info("Starting development server")
+    app.run(host="0.0.0.0", port=7870, debug=True)

cache/sample_cache.json ADDED Viewed

The diff for this file is too large to render. See raw diff

cache_manager.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""Cache management for radiology report structuring results.
+This module provides the CacheManager class that handles caching of
+structured radiology report results to improve performance and reduce
+API calls. Supports both sample-based and custom text caching with
+JSON file persistence.
+Example usage:
+  cache_manager = CacheManager(cache_dir="cache")
+  cached_result = cache_manager.get_cached_result(report_text, sample_id)
+  if not cached_result:
+      result = process_report(report_text)
+      cache_manager.cache_result(report_text, result, sample_id)
+"""
+import hashlib
+import json
+import logging
+import os
+import time
+from typing import Any
+from langextract.data import AnnotatedDocument, CharInterval, Extraction
+logger = logging.getLogger(__name__)
+class CacheManager:
+    """Manages caching of radiology report structuring results.
+    This class provides efficient caching capabilities for structured
+    radiology report results, supporting both file-based persistence
+    and in-memory access with automatic cache key generation and management.
+    Attributes:
+        cache_dir: Directory path for cache storage.
+        cache_file: Full path to the cache JSON file.
+    """
+    def __init__(self, cache_dir: str = "cache"):
+        """Initializes the CacheManager with specified cache directory.
+        Args:
+            cache_dir: Directory path for cache storage. Defaults to "cache".
+        """
+        self.cache_dir = cache_dir
+        self.cache_file = os.path.join(cache_dir, "sample_cache.json")
+        self._cache_data: dict[str, Any] = {}
+        self._load_cache()
+    def _ensure_cache_dir(self):
+        """Ensures the cache directory exists, creating it if necessary."""
+        os.makedirs(self.cache_dir, exist_ok=True)
+    def _load_cache(self):
+        """Loads existing cache data from file into memory.
+        Attempts to load cache from the JSON file. If the file doesn't
+        exist or cannot be loaded, initializes with an empty cache.
+        """
+        try:
+            if os.path.exists(self.cache_file):
+                with open(self.cache_file, "r", encoding="utf-8") as f:
+                    self._cache_data = json.load(f)
+                logger.info(f"Loaded cache with {len(self._cache_data)} entries")
+            else:
+                self._cache_data = {}
+                logger.info("No existing cache file found, starting with empty cache")
+        except Exception as e:
+            logger.error(f"Error loading cache: {e}")
+            self._cache_data = {}
+    def _save_cache(self):
+        """Saves current cache data to the JSON file.
+        Ensures the cache directory exists before writing the cache data
+        to the JSON file with proper formatting.
+        """
+        try:
+            self._ensure_cache_dir()
+            with open(self.cache_file, "w", encoding="utf-8") as f:
+                json.dump(self._cache_data, f, indent=2, ensure_ascii=False)
+            logger.info(f"Saved cache with {len(self._cache_data)} entries")
+        except Exception as e:
+            logger.error(f"Error saving cache: {e}")
+    def _get_cache_key(self, text: str, sample_id: str | None = None) -> str:
+        """Generates a cache key for the given text and optional sample ID.
+        Args:
+            text: The input text to generate a key for.
+            sample_id: Optional sample identifier for predefined samples.
+        Returns:
+            A string cache key, either sample-based or hash-based.
+        """
+        if sample_id:
+            # Avoid double "sample_" prefix if sample_id already starts with "sample_"
+            if sample_id.startswith("sample_"):
+                return sample_id
+            else:
+                return f"sample_{sample_id}"
+        else:
+            return f"custom_{hashlib.md5(text.encode('utf-8')).hexdigest()}"
+    def get_cached_result(self, text: str, sample_id: str | None = None) -> dict | None:
+        """Gets cached result for given text.
+        Args:
+            text: The input text to look up.
+            sample_id: Optional sample identifier for predefined samples.
+        Returns:
+            The cached result dictionary if found, None otherwise.
+        """
+        cache_key = self._get_cache_key(text, sample_id)
+        result = self._cache_data.get(cache_key)
+        if result:
+            logger.info(f"Cache hit for key: {cache_key}")
+        return result
+    def _dict_to_extraction(self, extraction_dict: dict[str, Any]) -> Extraction:
+        """Converts a cached extraction dictionary to an Extraction object."""
+        char_interval = None
+        if extraction_dict.get("char_interval"):
+            interval_data = extraction_dict["char_interval"]
+            char_interval = CharInterval(
+                start_pos=interval_data.get("start_pos"),
+                end_pos=interval_data.get("end_pos"),
+            )
+        return Extraction(
+            extraction_text=extraction_dict.get("extraction_text", ""),
+            extraction_class=extraction_dict.get("extraction_class", ""),
+            attributes=extraction_dict.get("attributes", {}),
+            char_interval=char_interval,
+            alignment_status=extraction_dict.get("alignment_status"),
+        )
+    def convert_cached_response_to_annotated_document(
+        self, cached_response: dict[str, Any]
+    ) -> AnnotatedDocument:
+        """Converts a cached response to an AnnotatedDocument with proper Extraction objects."""
+        extractions = []
+        if (
+            "annotated_document_json" in cached_response
+            and "extractions" in cached_response["annotated_document_json"]
+        ):
+            for extraction_dict in cached_response["annotated_document_json"][
+                "extractions"
+            ]:
+                extractions.append(self._dict_to_extraction(extraction_dict))
+        return AnnotatedDocument(text="", extractions=extractions)
+    def cache_result(
+        self, text: str, result: dict[str, Any] | Any, sample_id: str | None = None
+    ) -> None:
+        """Caches result for given text.
+        Args:
+            text: The input text to cache results for.
+            result: The structured result dictionary to cache.
+            sample_id: Optional sample identifier for predefined samples.
+        """
+        cache_key = self._get_cache_key(text, sample_id)
+        self._cache_data[cache_key] = result
+        self._save_cache()
+        logger.info(f"Cached result for key: {cache_key}")
+    def clear_cache(self) -> None:
+        """Clears all cached results and saves the empty cache to file."""
+        self._cache_data = {}
+        self._save_cache()
+        logger.info("Cache cleared")
+    def remove_sample(self, sample_id: str) -> bool:
+        """Removes a specific sample from cache.
+        Args:
+            sample_id: The sample identifier to remove.
+        Returns:
+            True if the sample was found and removed, False otherwise.
+        """
+        cache_key = f"sample_{sample_id}"
+        if cache_key in self._cache_data:
+            del self._cache_data[cache_key]
+            self._save_cache()
+            logger.info(f"Removed sample {sample_id} from cache")
+            return True
+        else:
+            logger.warning(f"Sample {sample_id} not found in cache")
+            return False
+    def prepopulate_cache_with_samples(
+        self,
+        sample_reports: list[dict[str, Any]],
+        structurer_callable,
+        force_refresh: bool = False,
+    ) -> None:
+        """Prepopulates cache with sample reports.
+        Processes a list of sample reports and caches their structured
+        results to improve initial application performance. Includes rate
+        limiting and error handling for robust cache population.
+        Args:
+            sample_reports: List of sample report dictionaries with 'id' and 'text'.
+            structurer_callable: Function to call for structuring reports.
+            force_refresh: If True, reprocesses samples even if already cached.
+        """
+        if not sample_reports:
+            logger.info("No sample reports provided for cache prepopulation")
+            return
+        logger.info(f"Starting cache prepopulation with {len(sample_reports)} samples")
+        lock_file = os.path.join(self.cache_dir, ".cache_lock")
+        if os.path.exists(lock_file) and not force_refresh:
+            logger.info("Cache prepopulation already in progress or recently completed")
+            return
+        try:
+            self._ensure_cache_dir()
+            with open(lock_file, "w") as f:
+                f.write(str(os.getpid()))
+            for i, sample in enumerate(sample_reports):
+                sample_id = sample.get("id")
+                sample_text = sample.get("text", "")
+                if not sample_id or not sample_text:
+                    logger.warning(f"Sample {i} missing id or text, skipping")
+                    continue
+                if not force_refresh and self.get_cached_result(sample_text, sample_id):
+                    logger.info(f"Sample {sample_id} already cached, skipping")
+                    continue
+                logger.info(
+                    f"Processing sample {sample_id} ({i+1}/{len(sample_reports)})"
+                )
+                try:
+                    result = structurer_callable(sample_text)
+                    self.cache_result(sample_text, result, sample_id)
+                    logger.info(f"Successfully cached sample {sample_id}")
+                except Exception as e:
+                    logger.error(f"Error processing sample {sample_id}: {e}")
+                    continue
+                time.sleep(6)
+            logger.info("Cache prepopulation completed")
+            self._save_cache()
+        except Exception as e:
+            logger.error(f"Error during cache prepopulation: {e}")
+        finally:
+            if os.path.exists(lock_file):
+                os.remove(lock_file)
+    def get_cache_stats(self) -> dict[str, Any]:
+        """Gets cache statistics.
+        Returns:
+            Dictionary containing cache statistics including entry counts,
+            file information, and cache status details.
+        """
+        sample_count = sum(
+            1 for key in self._cache_data.keys() if key.startswith("sample_")
+        )
+        custom_count = sum(
+            1 for key in self._cache_data.keys() if key.startswith("custom_")
+        )
+        return {
+            "total_entries": len(self._cache_data),
+            "sample_entries": sample_count,
+            "custom_entries": custom_count,
+            "cache_file": self.cache_file,
+            "cache_file_exists": os.path.exists(self.cache_file),
+        }

env.list.example ADDED Viewed

	@@ -0,0 +1,3 @@

+# Copy this file to env.list and fill in your actual API key
+KEY=your_gemini_api_key_here
+MODEL_ID=gemini-2.5-flash

prompt_instruction.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Core prompt template for radiology report structuring.
+This module provides the main prompt template used to guide the LangExtract
+system in categorizing radiology report text into semantic sections
+(prefix, body, suffix) with appropriate clinical significance annotations.
+The prompt includes comprehensive instruction templates with detailed guidelines
+for handling different report formats and edge cases, ensuring consistent and
+accurate structuring across various radiology report types.
+"""
+import textwrap
+PROMPT_INSTRUCTION = textwrap.dedent(
+    """\
+    # RadExtract Prompt
+    ## Task Description
+    You are a medical assistant specialized in categorizing radiology text into sections:
+    - **findings_prefix** -- All text that appears before the actual "findings" content.
+    - **findings_body** -- The main 'Findings' section. Each finding is classified into a possible section through a list of attributes, some of which may also be assigned to a subheader.
+    - **findings_suffix** -- Any text that appears after the "findings" portion (such as "Impression" or other concluding content).
+    ### Section Categories:
+    - **findings_prefix**: Use only for header information before clinical findings (examination details, clinical indication, technique). Never use for actual clinical observations or pathological findings.
+    - **findings_body**: Use for all clinical findings, observations, and pathological descriptions.
+    - **findings_suffix**: Use only for conclusions, impressions, or recommendations that appear after the main findings.
+    ### Critical Rule:
+    If a report contains only clinical findings without any header information, do not create a findings_prefix extraction. Start directly with findings_body extractions for the clinical content.
+    **Example of findings-only content (NO prefix needed):**
+    Input: "There is a small joint effusion. The cartilage shows thinning."
+    Correct: Create only findings_body extractions for each clinical finding.
+    Incorrect: Do not categorize clinical findings as findings_prefix.
+    ### Professional Output Standards:
+    All extracted text must maintain the grammatical correctness and professional coherence expected in radiology reports. Ensure that:
+    - All sentences are complete and grammatically correct
+    - Medical terminology is used appropriately and consistently
+    - The language remains professional and clinical in tone
+    - Correct obvious typos (e.g., "splen" → "spleen", "kidny" → "kidney")
+    - Any modifications to the original text preserve the intended medical meaning
+    - Minor typos are corrected and optimal punctuation is used
+    ### Empty prefix or suffix sections:
+    Only create extractions for sections that actually exist in the text. Do not create empty prefix or suffix sections if there is no corresponding content in the source text. If the text is findings-only without any impression/conclusion, do not create a findings_suffix extraction.
+    ### Section Usage Guidelines:
+    **findings_prefix**: Reserved exclusively for header information that appears before clinical findings, such as:
+    - Examination details (type of study, technique)
+    - Clinical indication or history
+    - Comparison studies referenced
+    - Technical parameters
+    **findings_body**: Contains the actual clinical findings and observations from the imaging study.
+    **findings_suffix**: Reserved for concluding content that follows the findings, such as impressions or recommendations.
+    **Critical Rule**: Clinical findings should never be categorized as prefix content. If a report begins directly with clinical observations without any header information, create only findings_body and findings_suffix extractions as appropriate.
+    ### Special guidance for findings_prefix organization:
+    When the report has detailed prefix information with clear section headers (like EXAMINATION, CLINICAL INDICATION, COMPARISON, TECHNIQUE), create separate extractions for each section rather than one large block. Use the "section" attribute to label each part:
+    - "Examination" for exam type/title
+    - "Clinical Indication" for clinical history/reason for study
+    - "Comparison" for prior studies referenced
+    - "Technique" for imaging parameters and acquisition details
+    **Important:** Even when examination information appears at the beginning without an explicit "EXAMINATION:" header, it should still be labeled with section:"Examination". This includes standalone exam descriptions that identify the type of imaging study being performed.
+    Always recognize examination-type content and use section:"Examination" regardless of whether it has an explicit header.
+    This structured approach provides better organization and readability.
+    ### Critical for findings_suffix:
+    Do NOT include headers like "IMPRESSION:", "CONCLUSION:", etc. in the extraction_text. Only extract the actual content that follows these headers. The formatting system will add appropriate headers automatically.
+    **Example:** If the text contains "IMPRESSION: 1. Severe arthritis. 2. Labral tear.", extract only "1. Severe arthritis. 2. Labral tear." as the extraction_text.
+    ### Additional Notes for findings_body:
+    - If a single sentence references multiple structures with a shared status (e.g., "liver, gallbladder, spleen appear unremarkable"), please split them into separate extraction lines, each referencing the relevant structure.
+    - If the text mentions subheaders like "CT ABDOMEN" or "CERVICAL SPINE," only create/retain that subheader if it clearly organizes multiple organ-structure findings under it. Do not force subheaders if only 1 or 2 lines belong there. A subheader should ideally group 3+ sections to be meaningful.
+    ### Special guidance for spine reports:
+    - For spine imaging (MRI, CT), organize findings by anatomical level using the format: "Lumbar Spine Levels: L1-L2", "Lumbar Spine Levels: L2-L3", "Cervical Spine Levels: C5-C6", etc.
+    - Separate general spine findings (alignment, lordosis, vertebral heights) from level-specific findings
+    - Use dedicated sections for: "Spinal Cord", "Bones" (for marrow/vertebral body lesions), "Paraspinal Soft Tissues" (for muscle findings)
+    - Each spinal level should get its own section when findings are described level-by-level
+    - This level-by-level organization is preferred over generic "Spine" labeling for clinical utility
+    ### Non-spine skeletal findings:
+    For non-spine skeletal findings, unify them under a single section like "Bones." Only keep laterality (Right/Left) if there is symmetry in the findings.
+    ## Required JSON Format
+    Each final answer must be valid JSON with an array key "extractions". Each "extraction" is an object with:
+    ```json
+    {{
+      "text": "...",
+      "category": "findings_prefix" | "findings_body" | "findings_suffix",
+      "attributes": {{}}
+    }}
+    ```
+    Within "attributes" each attribute should be a key-value pair as shown in the examples below. The attribute **"clinical_significance"** MUST be included for findings_body extractions and should be one of: **"normal"**, **"minor"**, **"significant"**, or **"not_applicable"** to indicate the importance of the finding.
+    ---
+    # Few-Shot Examples
+    The following examples demonstrate how to properly structure different types of radiology reports:
+    {examples}
+    {inference_section}
+    """
+).strip()

prompt_lib.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""Processing utilities for radiology report structuring prompts.
+This module provides helper functions for processing and formatting prompts
+used in the LangExtract system for radiology report structuring.
+"""
+import dataclasses
+import json
+from typing import Optional
+from langextract.data import ExampleData
+from langextract.data_lib import enum_asdict_factory
+from prompt_instruction import PROMPT_INSTRUCTION
+def clean_dict(obj):
+    """Removes null values and empty objects/lists from dictionary recursively.
+    This function recursively traverses a dictionary or list structure
+    and removes any keys with null values, empty dictionaries, or empty
+    lists to create cleaner JSON output for the prompt examples.
+    Args:
+        obj: The object to clean (dict, list, or primitive value).
+    Returns:
+        The cleaned object with null/empty values removed.
+    """
+    if isinstance(obj, dict):
+        cleaned = {}
+        for key, value in obj.items():
+            cleaned_value = clean_dict(value)
+            # Only include non-null, non-empty values
+            if (
+                cleaned_value is not None
+                and cleaned_value != {}
+                and cleaned_value != []
+            ):
+                cleaned[key] = cleaned_value
+        return cleaned
+    elif isinstance(obj, list):
+        return [clean_dict(item) for item in obj if clean_dict(item) is not None]
+    else:
+        return obj
+def generate_markdown_prompt(
+    examples: list[ExampleData], input_text: Optional[str] = None
+) -> str:
+    """Generate markdown prompt with examples using LangExtract's enum_asdict_factory.
+    Args:
+        examples: List of ExampleData objects for few-shot learning
+        input_text: Optional input text to include in inference example
+    Returns:
+        Formatted markdown string containing the complete prompt
+    """
+    examples_list = []
+    for i, example in enumerate(examples, 1):
+        example_dict = dataclasses.asdict(example, dict_factory=enum_asdict_factory)
+        # Clean up null values and empty objects
+        cleaned_extractions = clean_dict({"extractions": example_dict["extractions"]})
+        json_output = json.dumps(cleaned_extractions, indent=2)
+        example_section = f"""## Example {i}
+**Input Text:**
+```
+{example.text}
+```
+**Expected Output:**
+```json
+{json_output}
+```"""
+        examples_list.append(example_section)
+    examples_formatted = "\n\n---\n\n".join(examples_list)
+    # Format inference section if input text provided
+    inference_section = ""
+    if input_text:
+        inference_section = f"""
+## Inference Example:
+**Input Text:**
+```
+{input_text}
+```
+**Expected Output:**
+"""
+    return PROMPT_INSTRUCTION.format(
+        examples=examples_formatted, inference_section=inference_section
+    )

pyproject.toml ADDED Viewed

	@@ -0,0 +1,85 @@

+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "radextract"
+version = "0.1.0"
+description = "Radiology Report Structuring Demo using LangExtract"
+readme = "README.md"
+license = {text = "Apache-2.0"}
+authors = [
+    {name = "Akshay Goel", email = "[email protected]"},
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Healthcare Industry",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Medical Science Apps.",
+    "Topic :: Text Processing :: Linguistic",
+]
+requires-python = ">=3.10"
+dependencies = [
+    "Flask>=3.1.0",
+    "Flask-Limiter>=3.5.0",
+    "gunicorn>=23.0.0",
+    "langextract>=0.1.3",
+    "pandas>=1.3.0",
+    "numpy>=1.20.0",
+    "ml-collections>=0.1.0",
+    "pydantic>=1.8.0",
+    "requests>=2.25.0",
+    "typing-extensions>=4.0.0",
+    "more-itertools>=8.0.0",
+    "langfun>=0.1.0",
+    "google-genai>=0.1.0",
+    "python-dotenv>=1.0.0",
+    "ftfy>=6.0.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest==7.4.0",
+    "pylint==2.17.5",
+    "pyink==24.10.1",
+    "autoflake==2.3.1",
+]
+[project.urls]
+Homepage = "https://huggingface.co/spaces/google/radextract"
+Repository = "https://huggingface.co/spaces/google/radextract"
+"Source Code" = "https://github.com/google/langextract"
+Documentation = "https://github.com/google/langextract"
+[tool.setuptools]
+packages = ["radextract"]
+[tool.setuptools.package-dir]
+radextract = "."
+[tool.pyink]
+line-length = 88
+target-version = ['py39']
+pyink-indentation = 4
+pyink-use-majority-quotes = true
+[tool.pylint.messages_control]
+disable = [
+    "missing-docstring",
+    "too-few-public-methods",
+    "too-many-arguments",
+    "too-many-locals",
+    "too-many-branches",
+    "too-many-statements",
+]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py"]

report_examples.py ADDED Viewed

	@@ -0,0 +1,645 @@

+"""Example radiology reports for training the structuring model.
+This module contains curated examples of radiology reports with their
+corresponding structured extractions. These examples are used for few-shot
+learning with LangExtract to train the model on proper categorization of
+report sections into prefix, body, and suffix components with appropriate
+clinical significance labels.
+The examples cover various imaging modalities including CT, MRI, and different
+anatomical regions (spine, abdomen, brain, knee) to provide comprehensive
+training coverage for the radiology report structuring task.
+"""
+import textwrap
+from enum import Enum
+import langextract as lx
+class ReportSectionType(Enum):
+    PREFIX = "findings_prefix"
+    BODY = "findings_body"
+    SUFFIX = "findings_suffix"
+def get_examples_for_model() -> list[lx.data.ExampleData]:
+    """Examples that structure radiology reports into semantic sections.
+    Returns:
+        List of ExampleData objects containing radiology report examples
+        with their corresponding structured extractions for training
+        the language model.
+    """
+    return [
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                EXAMINATION: CT ABDOMEN AND PELVIS WITH IV CONTRAST
+                CLINICAL INDICATION: Abdominal pain.
+                COMPARISON: None.
+                TECHNIQUE: Axial images of the abdomen and pelvis were obtained following the administration of intravenous contrast material. Coronal and sagittal reformations were reviewed.
+                FINDINGS:
+                No acute abnormality is seen in the visualized lung bases. The liver is normal in size and contour. There is a 1.2 cm simple-appearing low-attenuation lesion in hepatic segment VII, consistent with a cyst. The gallbladder contains numerous calcified gallstones, compatible with cholelithiasis.
+                IMPRESSION:
+                1. Cholelithiasis without evidence of acute cholecystitis.
+                2. Hepatic cyst.
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="EXAMINATION: CT ABDOMEN AND PELVIS WITH IV CONTRAST",
+                    extraction_class="findings_prefix",
+                    attributes={
+                        "section": "Examination",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="CLINICAL INDICATION: Abdominal pain.",
+                    extraction_class="findings_prefix",
+                    attributes={
+                        "section": "Clinical Indication",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="COMPARISON: None.",
+                    extraction_class="findings_prefix",
+                    attributes={
+                        "section": "Comparison",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="TECHNIQUE: Axial images of the abdomen and pelvis were obtained following the administration of intravenous contrast material. Coronal and sagittal reformations were reviewed.",
+                    extraction_class="findings_prefix",
+                    attributes={
+                        "section": "Technique",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="No acute abnormality is seen in the visualized lung bases.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lungs",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The liver is normal in size and contour.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Liver",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="There is a 1.2 cm simple-appearing low-attenuation lesion in hepatic segment VII, consistent with a cyst.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Liver",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The gallbladder contains numerous calcified gallstones, compatible with cholelithiasis.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Gallbladder",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="1. Cholelithiasis without evidence of acute cholecystitis.\n2. Hepatic cyst.",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                CLINICAL HISTORY:
+                Low back pain, rule out disc herniation
+                MRI LUMBAR SPINE WITHOUT CONTRAST:
+                FINDINGS:
+                The lumbar lordosis is maintained. Vertebral body heights are preserved.
+                There is a small hemangioma in the L3 vertebral body.
+                The conus medullaris terminates at L1 and appears normal.
+                At L2-L3, there is mild disc desiccation without significant stenosis.
+                At L3-L4, a small posterior disc bulge causes mild central canal narrowing.
+                At L4-L5, there is a large posterior disc herniation with severe central canal stenosis and nerve root impingement.
+                At L5-S1, mild disc bulge without significant stenosis.
+                The paraspinal musculature appears unremarkable.
+                IMPRESSION:
+                Large L4-L5 disc herniation with severe stenosis.
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="CLINICAL HISTORY:\nLow back pain, rule out disc herniation\n\nMRI LUMBAR SPINE WITHOUT CONTRAST:",
+                    extraction_class="findings_prefix",
+                    attributes={},
+                ),
+                lx.data.Extraction(
+                    extraction_text="The lumbar lordosis is maintained. Vertebral body heights are preserved.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lumbar Spine",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="There is a small hemangioma in the L3 vertebral body.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Bones",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The conus medullaris terminates at L1 and appears normal.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Spinal Cord",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At L2-L3, there is mild disc desiccation without significant stenosis.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lumbar Spine Levels: L2-L3",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At L3-L4, a small posterior disc bulge causes mild central canal narrowing.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lumbar Spine Levels: L3-L4",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At L4-L5, there is a large posterior disc herniation with severe central canal stenosis and nerve root impingement.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lumbar Spine Levels: L4-L5",
+                        "clinical_significance": "significant",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At L5-S1, mild disc bulge without significant stenosis.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lumbar Spine Levels: L5-S1",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The paraspinal musculature appears unremarkable.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Paraspinal Soft Tissues",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Large L4-L5 disc herniation with severe stenosis.",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                INDICATION:
+                Neck pain, radiculopathy
+                MRI CERVICAL SPINE:
+                FINDINGS:
+                Normal cervical lordosis is maintained. No vertebral body compression fractures.
+                The cervical spinal cord demonstrates normal signal intensity.
+                At C3-C4, no significant disc disease or stenosis.
+                At C4-C5, mild disc osteophyte complex with mild foraminal narrowing.
+                At C5-C6, moderate disc herniation with moderate central canal stenosis.
+                At C6-C7, small disc bulge without significant stenosis.
+                IMPRESSION:
+                Moderate C5-C6 disc herniation and stenosis.
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="INDICATION: \nNeck pain, radiculopathy\n\nMRI CERVICAL SPINE:",
+                    extraction_class="findings_prefix",
+                    attributes={},
+                ),
+                lx.data.Extraction(
+                    extraction_text="Normal cervical lordosis is maintained. No vertebral body compression fractures.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Cervical Spine",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The cervical spinal cord demonstrates normal signal intensity.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Spinal Cord",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At C3-C4, no significant disc disease or stenosis.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Cervical Spine Levels: C3-C4",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At C4-C5, mild disc osteophyte complex with mild foraminal narrowing.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Cervical Spine Levels: C4-C5",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At C5-C6, moderate disc herniation with moderate central canal stenosis.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Cervical Spine Levels: C5-C6",
+                        "clinical_significance": "significant",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="At C6-C7, small disc bulge without significant stenosis.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Cervical Spine Levels: C6-C7",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Moderate C5-C6 disc herniation and stenosis.",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                TECHNIQUE:
+                Multidetector helical CT from lung bases to adrenals with and without intravenous contrast.
+                FINDINGS:
+                LIVER/GALLBLADDER/SPLEEN: The liver has a normal appearance. Gallbladder wall appears normal. The spleen is normal in size.
+                PANCREAS/ADRENALS: The pancreas and bilateral adrenal glands appear unremarkable.
+                RETROPERITONEUM: No lymphadenopathy. No fluid collection.
+                IMPRESSION:
+                Normal abdominal CT.
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="TECHNIQUE: \nMultidetector helical CT from lung bases to adrenals with and without intravenous contrast.",
+                    extraction_class="findings_prefix",
+                    attributes={},
+                ),
+                lx.data.Extraction(
+                    extraction_text="The liver has a normal appearance.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Liver",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Gallbladder wall appears normal.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Gallbladder",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The spleen is normal in size.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Spleen",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The pancreas and bilateral adrenal glands appear unremarkable.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Pancreas/Adrenals",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="No lymphadenopathy.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Retroperitoneum",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="No fluid collection.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Retroperitoneum",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Normal abdominal CT.",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                HISTORY:
+                Lower abdominal pain
+                CT ABDOMEN/PELVIS WITH CONTRAST:
+                FINDINGS:
+                LIVER: Multiple hepatic metastases are present, measuring up to 3.2 cm.
+                KIDNEYS: The left kidney shows moderate hydronephrosis. The right kidney appears normal.
+                LYMPH NODES: Enlarged retroperitoneal lymph nodes, largest measuring 2.1 cm.
+                IMPRESSION:
+                1. Multiple hepatic metastases
+                2. Left hydronephrosis
+                3. Retroperitoneal lymphadenopathy
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="HISTORY: \nLower abdominal pain\n\nCT ABDOMEN/PELVIS WITH CONTRAST:",
+                    extraction_class="findings_prefix",
+                    attributes={},
+                ),
+                lx.data.Extraction(
+                    extraction_text="Multiple hepatic metastases are present, measuring up to 3.2 cm.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Liver",
+                        "clinical_significance": "significant",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The left kidney shows moderate hydronephrosis.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Kidneys",
+                        "clinical_significance": "significant",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The right kidney appears normal.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Kidneys",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Enlarged retroperitoneal lymph nodes, largest measuring 2.1 cm.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lymph Nodes",
+                        "clinical_significance": "significant",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="1. Multiple hepatic metastases\n2. Left hydronephrosis  \n3. Retroperitoneal lymphadenopathy",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                EXAMINATION:
+                MRI brain without contrast
+                CLINICAL HISTORY:
+                Headaches
+                FINDINGS:
+                The brain parenchyma demonstrates normal signal intensity. No mass lesions are identified.
+                The ventricular system is normal in size and configuration.
+                No abnormal enhancement is seen.
+                IMPRESSION:
+                Normal brain MRI.
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="EXAMINATION:\nMRI brain without contrast\n\nCLINICAL HISTORY:\nHeadaches",
+                    extraction_class="findings_prefix",
+                    attributes={},
+                ),
+                lx.data.Extraction(
+                    extraction_text="The brain parenchyma demonstrates normal signal intensity.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Brain Parenchyma",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="No mass lesions are identified.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Brain Parenchyma",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The ventricular system is normal in size and configuration.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Ventricular System",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="No abnormal enhancement is seen.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Enhancement",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Normal brain MRI.",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                INDICATION:
+                Right knee pain
+                MRI RIGHT KNEE:
+                FINDINGS:
+                MENISCI: There is a complex tear of the medial meniscus. The lateral meniscus appears intact.
+                LIGAMENTS: The ACL shows complete rupture. The PCL, MCL, and LCL are intact.
+                BONES: Mild bone marrow edema is present in the medial femoral condyle.
+                IMPRESSION:
+                1. Complex medial meniscal tear
+                2. Complete ACL rupture
+                3. Bone marrow edema in medial femoral condyle
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="INDICATION:\nRight knee pain\n\nMRI RIGHT KNEE:",
+                    extraction_class="findings_prefix",
+                    attributes={},
+                ),
+                lx.data.Extraction(
+                    extraction_text="There is a complex tear of the medial meniscus.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Menisci",
+                        "clinical_significance": "significant",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The lateral meniscus appears intact.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Menisci",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The ACL shows complete rupture.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Ligaments",
+                        "clinical_significance": "significant",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The PCL, MCL, and LCL are intact.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Ligaments",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Mild bone marrow edema is present in the medial femoral condyle.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Bones",
+                        "clinical_significance": "minor",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="1. Complex medial meniscal tear\n2. Complete ACL rupture\n3. Bone marrow edema in medial femoral condyle",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+        lx.data.ExampleData(
+            text=textwrap.dedent(
+                """\
+                EXAMINATION: CT CHEST
+                FINDINGS:
+                The longs are clear bilaterally. The hart size is normal. No pleural effushion.
+                IMPRESSION:
+                Normal chest CT.
+                """
+            ).rstrip(),
+            extractions=[
+                lx.data.Extraction(
+                    extraction_text="EXAMINATION: CT CHEST",
+                    extraction_class="findings_prefix",
+                    attributes={
+                        "section": "Examination",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The lungs are clear bilaterally.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Lungs",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="The heart size is normal.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Heart",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="No pleural effusion.",
+                    extraction_class="findings_body",
+                    attributes={
+                        "section": "Pleura",
+                        "clinical_significance": "normal",
+                    },
+                ),
+                lx.data.Extraction(
+                    extraction_text="Normal chest CT.",
+                    extraction_class="findings_suffix",
+                    attributes={},
+                ),
+            ],
+        ),
+    ]

run_docker.sh ADDED Viewed

	@@ -0,0 +1,58 @@

+#!/bin/bash
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+echo -e "${GREEN}Setting up radextract with Docker${NC}"
+# Check if Docker is running
+if ! docker info >/dev/null 2>&1; then
+    echo -e "${RED}Error: Docker is not running. Please start Docker Desktop.${NC}"
+    exit 1
+fi
+# Check if env.list exists
+if [ ! -f "env.list" ]; then
+    echo -e "${RED}Error: env.list file not found!${NC}"
+    echo "Please create env.list with your API keys and configuration."
+    exit 1
+fi
+# Stop and remove existing container if it exists
+echo -e "${YELLOW}Cleaning up existing containers...${NC}"
+docker stop radiology-report-app 2>/dev/null || true
+docker rm radiology-report-app 2>/dev/null || true
+# Build the Docker image
+echo -e "${YELLOW}Building Docker image...${NC}"
+docker build -t radiology-report-app .
+# Run the container
+echo -e "${YELLOW}Starting application in Docker container...${NC}"
+docker run -d \
+    --name radiology-report-app \
+    --env-file env.list \
+    -p 7870:7870 \
+    -v "$(pwd)/cache:/app/cache" \
+    radiology-report-app
+# Wait for the application to start
+echo -e "${YELLOW}Waiting for application to start...${NC}"
+sleep 5
+# Check if the application is running
+if curl -s http://localhost:7870/ >/dev/null; then
+    echo -e "${GREEN}Application is running at http://localhost:7870/${NC}"
+    echo ""
+    echo "To view logs: docker logs -f radiology-report-app"
+    echo "To stop: docker stop radiology-report-app"
+    echo "To restart: docker restart radiology-report-app"
+else
+    echo -e "${RED}Application failed to start. Check logs with: docker logs radiology-report-app${NC}"
+    exit 1
+fi

run_local.sh ADDED Viewed

	@@ -0,0 +1,45 @@

+#!/bin/bash
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+echo -e "${GREEN}Setting up radextract development environment${NC}"
+# Check if virtual environment exists
+if [ ! -d "venv" ]; then
+    echo -e "${YELLOW}Creating virtual environment...${NC}"
+    python3 -m venv venv
+fi
+# Activate virtual environment
+echo -e "${YELLOW}Activating virtual environment...${NC}"
+source venv/bin/activate
+# Install dependencies
+echo -e "${YELLOW}Installing dependencies...${NC}"
+if [ "$1" = "dev" ]; then
+    echo -e "${YELLOW}Installing with development dependencies...${NC}"
+    pip install -e ".[dev]"
+else
+    pip install -e .
+fi
+# Check if env.list exists
+if [ ! -f "env.list" ]; then
+    echo -e "${RED}Error: env.list file not found!${NC}"
+    echo -e "${YELLOW}Please create env.list with required environment variables${NC}"
+    exit 1
+fi
+# Load environment variables
+echo -e "${YELLOW}Loading environment variables...${NC}"
+export $(cat env.list | xargs)
+# Start the application
+echo -e "${GREEN}Starting radextract application on http://localhost:7870${NC}"
+python app.py

sanitize.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""Text preprocessing for radiology reports with complex Unicode and formatting.
+Handles reports containing complex Unicode symbolic characters and non-standard
+structural formatting that are not currently supported by the prompt and LangExtract
+library. Prevents timeout issues by normalizing problematic characters and structures
+to formats compatible with downstream processing.
+Typical usage example:
+    from sanitize import preprocess_report
+    clean_text = preprocess_report(raw_report)
+"""
+from __future__ import annotations
+import re
+import ftfy
+_TRANSLATE = str.maketrans(
+    {
+        0x2022: "*",
+        0x25CF: "*",
+        0x27A1: "->",
+        0xF0E0: "->",
+        0x2192: "->",
+        0x2190: "<-",
+        0x00D7: "x",
+        0x2191: "up",
+        0x2642: "male",
+        0x2640: "female",
+        0x2010: "-",
+        0x2013: "-",
+        0x2014: "-",
+        0x00A0: " ",
+    }
+)
+_WS = re.compile(r"[ \t]+")
+_BLANKS = re.compile(r"\n\s*\n\s*\n+")
+# Structure normalization patterns
+_BEGIN = re.compile(r"---\s*BEGIN [^-]+---\n*", re.I)
+_END = re.compile(r"\n*---\s*END [^-]+---\s*", re.I)
+_HEADER = re.compile(r"\*{3}\s*([^*]+?)\s*\*{3}", re.I)
+_BULLET_HDR = re.compile(r"^[ \t]*[\*\u2022\u25CF-]+\s*", re.M)
+_ENUM = re.compile(r"^[ \t]*(\d+)[\)\.][ \t]+", re.M)
+def sanitize_text(text: str) -> str:
+    """Sanitizes Unicode characters and normalizes whitespace.
+    Applies ftfy text repair, translates problematic Unicode symbols to ASCII
+    equivalents, normalizes whitespace, and removes excessive blank lines.
+    Args:
+        text: The input text to sanitize.
+    Returns:
+        Sanitized text with Unicode issues resolved and whitespace normalized.
+    """
+    out = ftfy.fix_text(text, remove_control_chars=True, normalization="NFC")
+    out = out.translate(_TRANSLATE)
+    out = _WS.sub(" ", out)
+    out = out.replace("\r\n", "\n").replace("\r", "\n")
+    out = _BLANKS.sub("\n\n", out)
+    return out.strip()
+def normalize_structure(text: str) -> str:
+    """Normalizes structural elements in radiology reports.
+    Removes report wrappers, converts asterisk headers to colon format,
+    removes bullet prefixes, and standardizes enumerations.
+    Args:
+        text: The input text to normalize.
+    Returns:
+        Text with structural elements normalized for consistent formatting.
+    """
+    text = _BEGIN.sub("", text)
+    text = _END.sub("", text)
+    text = _HEADER.sub(lambda m: f"{m.group(1).strip()}:", text)
+    text = _BULLET_HDR.sub("", text)
+    text = _ENUM.sub(lambda m: f"{m.group(1)}. ", text)
+    return text.strip()
+def preprocess_report(raw: str) -> str:
+    """Preprocesses radiology reports with sanitization and normalization.
+    Combines Unicode sanitization and structural normalization to prepare
+    radiology reports for downstream processing. This is the main entry point
+    for text preprocessing.
+    Args:
+        raw: The raw radiology report text.
+    Returns:
+        Preprocessed text ready for structured extraction.
+    """
+    return normalize_structure(sanitize_text(raw))

social_sharing.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+Social sharing configuration and utilities for RadExtract.
+This module handles all social media sharing functionality including
+URL generation, message formatting, and platform-specific configurations.
+"""
+from urllib.parse import quote_plus
+class SocialSharingConfig:
+    """Configuration and utilities for social media sharing."""
+    # Production URL for consistent sharing
+    PRODUCTION_URL = "https://google-radextract.hf.space"
+    # Twitter/X share message
+    TWITTER_MESSAGE = (
+        "Check out this new demo from @AkshayGoelMD and the team @GoogleResearch: Gemini + LangExtract structure & optimize radiology reports.\n\n"
+        "Try it here! → https://google-radextract.hf.space \n\n"
+        "#Gemini #LangExtract #RadExtract #OpenSource #Google #Radiology"
+    )
+    # LinkedIn sharing content
+    LINKEDIN_TITLE = "RadExtract – Radiology Report Structuring Demo"
+    LINKEDIN_SUMMARY = "Gemini-powered radiology report structuring demo"
+    @classmethod
+    def get_sharing_context(cls, request_url_root):
+        """
+        Generate all social sharing variables for template rendering.
+        Args:
+            request_url_root: The root URL from Flask request
+        Returns:
+            dict: All variables needed for social sharing in templates
+        """
+        page_url = request_url_root.rstrip("/")
+        # Use production URL for sharing (consistent experience, localhost won't work for previews)
+        share_url_for_sharing = (
+            cls.PRODUCTION_URL if "localhost" in page_url else page_url
+        )
+        return {
+            "share_url": page_url,
+            "share_url_for_sharing": share_url_for_sharing,
+            "share_url_encoded": quote_plus(share_url_for_sharing),
+            "share_text": quote_plus(cls.TWITTER_MESSAGE),
+            "linkedin_title": quote_plus(cls.LINKEDIN_TITLE),
+            "linkedin_summary": quote_plus(cls.LINKEDIN_SUMMARY),
+        }

start.sh ADDED Viewed

	@@ -0,0 +1,27 @@

+#!/bin/bash
+# Check if persistent storage is available and set up logging accordingly
+if [ -d "/data" ]; then
+    mkdir -p /data/logs
+    LOG_FILE="/data/logs/radextract-$(date +%Y-%m-%d).log"
+    exec gunicorn \
+        --workers 6 \
+        --worker-class sync \
+        --timeout 60 \
+        --keep-alive 5 \
+        --error-logfile - \
+        --log-level warning \
+        -b 0.0.0.0:7870 \
+        app:app 2>&1 | tee -a "$LOG_FILE"
+else
+    # No persistent storage, just run normally
+    exec gunicorn \
+        --workers 6 \
+        --worker-class sync \
+        --timeout 60 \
+        --keep-alive 5 \
+        --error-logfile - \
+        --log-level warning \
+        -b 0.0.0.0:7870 \
+        app:app
+fi

static/copy.js ADDED Viewed

	@@ -0,0 +1,177 @@

+/**
+ * Copy functionality for RadExtract output
+ * Modular, testable, and maintainable
+ */
+/**
+ * Initialize the copy button with event listener
+ */
+export function initCopyButton() {
+  const btn = document.getElementById('copy-output');
+  if (!btn) return;
+  // Add accessibility attributes
+  btn.setAttribute('aria-label', 'Copy findings to clipboard');
+  btn.addEventListener('click', async () => {
+    const text = buildTextToCopy();
+    if (!text) return;
+    const succeeded = await copyToClipboard(text);
+    if (succeeded) flashSuccess(btn);
+  });
+  // Initialize button state based on output availability
+  updateCopyButtonState();
+}
+/**
+ * Build the text to copy based on current mode and output
+ * @returns {string} Text to copy, or empty string if nothing to copy
+ */
+function buildTextToCopy() {
+  // ① Raw-JSON mode
+  if (document.getElementById('raw-toggle')?.checked) {
+    const rawOutput = document.getElementById('raw-output');
+    const json = rawOutput?._jsonData;
+    return json ? JSON.stringify(json, null, 2) : '';
+  }
+  // ② Pre-computed plain text (preferred path)
+  const outputEl = document.getElementById('output-text');
+  if (outputEl?.dataset.copy) {
+    return outputEl.dataset.copy;
+  }
+  // ③ Fallback: parse DOM structure (legacy support)
+  return parseDOMStructure(outputEl) || outputEl?.textContent || '';
+}
+/**
+ * Parse DOM structure to extract formatted text (fallback method)
+ * @param {HTMLElement} container - Output container element
+ * @returns {string} Formatted text
+ */
+function parseDOMStructure(container) {
+  if (!container || !container.children.length) return '';
+  const sections = [];
+  // Get all section headers and content
+  const sectionHeaders = container.querySelectorAll('.section-header');
+  sectionHeaders.forEach((header) => {
+    sections.push(header.textContent);
+    let nextElement = header.nextElementSibling;
+    while (nextElement && !nextElement.classList.contains('section-header')) {
+      if (nextElement.classList.contains('primary-label')) {
+        sections.push('\n' + nextElement.textContent);
+      } else if (nextElement.classList.contains('finding-list')) {
+        nextElement.querySelectorAll('li').forEach((li) => {
+          sections.push('• ' + li.textContent.trim());
+        });
+      } else if (nextElement.classList.contains('single-finding')) {
+        sections.push('- ' + nextElement.textContent.trim());
+      } else if (nextElement.textContent.trim()) {
+        sections.push(nextElement.textContent.trim());
+      }
+      nextElement = nextElement.nextElementSibling;
+    }
+    sections.push(''); // Add blank line after each section
+  });
+  // Handle prefix content (like examination type)
+  const allContent = container.children;
+  if (
+    allContent.length > 0 &&
+    !allContent[0].classList.contains('section-header')
+  ) {
+    const prefixContent = [];
+    for (let i = 0; i < allContent.length; i++) {
+      if (allContent[i].classList.contains('section-header')) break;
+      if (allContent[i].textContent.trim()) {
+        prefixContent.push(allContent[i].textContent.trim());
+      }
+    }
+    if (prefixContent.length > 0) {
+      return prefixContent.join('\n') + '\n\n' + sections.join('\n');
+    }
+  }
+  return sections
+    .join('\n')
+    .replace(/\n{3,}/g, '\n\n')
+    .trim();
+}
+/**
+ * Copy text to clipboard with fallback for older browsers
+ * @param {string} text - Text to copy
+ * @returns {Promise<boolean>} Success status
+ */
+async function copyToClipboard(text) {
+  // Check if clipboard API is available and secure context
+  if (navigator.clipboard && window.isSecureContext) {
+    try {
+      await navigator.clipboard.writeText(text);
+      return true;
+    } catch (err) {
+      console.warn('Clipboard API failed, trying fallback:', err);
+      return legacyCopy(text);
+    }
+  } else {
+    // Use fallback for older browsers or insecure contexts
+    return legacyCopy(text);
+  }
+}
+/**
+ * Legacy clipboard copy using execCommand
+ * @param {string} text - Text to copy
+ * @returns {boolean} Success status
+ */
+function legacyCopy(text) {
+  const ta = Object.assign(document.createElement('textarea'), {
+    value: text,
+    style: 'position:fixed;left:-9999px',
+  });
+  document.body.appendChild(ta);
+  ta.select();
+  let ok = false;
+  try {
+    ok = document.execCommand('copy');
+  } catch (err) {
+    console.error('Legacy copy failed:', err);
+  }
+  document.body.removeChild(ta);
+  return ok;
+}
+/**
+ * Show success feedback on button
+ * @param {HTMLElement} button - Copy button element
+ */
+function flashSuccess(button) {
+  button.classList.add('copied');
+  button.setAttribute('title', 'Copied!');
+  setTimeout(() => {
+    button.classList.remove('copied');
+    button.setAttribute('title', 'Copy output to clipboard');
+  }, 2000);
+}
+/**
+ * Update copy button enabled/disabled state based on output availability
+ */
+export function updateCopyButtonState() {
+  const btn = document.getElementById('copy-output');
+  if (!btn) return;
+  const outputText = document.getElementById('output-text');
+  const hasOutput = outputText && outputText.textContent.trim().length > 0;
+  btn.disabled = !hasOutput;
+}

static/favicon.svg ADDED Viewed

static/google-research-logo.svg ADDED Viewed

static/reset.js ADDED Viewed

	@@ -0,0 +1,103 @@

+// reset.js
+export function initClearButton() {
+  const clearBtn = document.getElementById('clear-input');
+  const inputArea = document.getElementById('input-text');
+  const outputBox = document.getElementById('output-text');
+  const rawBox = document.getElementById('raw-output');
+  const outputContainer = document.getElementById('output-text-container');
+  const instructionsEl = document.getElementById('instructions');
+  const promptOutput = document.getElementById('prompt-output');
+  if (!clearBtn || !inputArea) return;
+  // Add accessibility attributes
+  clearBtn.setAttribute('aria-label', 'Clear input text');
+  // Update button state based on input content
+  function updateClearButtonState() {
+    if (inputArea.value.trim()) {
+      clearBtn.disabled = false;
+    } else {
+      clearBtn.disabled = true;
+    }
+  }
+  // Initialize button state
+  updateClearButtonState();
+  // Monitor input changes
+  inputArea.addEventListener('input', updateClearButtonState);
+  clearBtn.addEventListener('click', async () => {
+    // 1. Clear user input
+    inputArea.value = '';
+    // 2. Hide or empty outputs
+    if (outputBox) {
+      outputBox.textContent = '';
+      outputBox.dataset.copy = ''; // Clear the copy data
+    }
+    if (rawBox) {
+      rawBox.style.display = 'none';
+      rawBox.textContent = '';
+      rawBox._jsonData = null;
+    }
+    // 3. Clear any error messages (look for error message simple)
+    const errorBox = outputContainer?.querySelector('.error-message-simple');
+    if (errorBox) {
+      errorBox.remove();
+    }
+    // 4. Reset ancillary UI
+    const copyBtn = document.getElementById('copy-output');
+    if (copyBtn) {
+      copyBtn.disabled = true;
+    }
+    // 5. Hide prompt output if visible
+    if (promptOutput) {
+      promptOutput.style.display = 'none';
+      promptOutput.textContent = '';
+    }
+    // 6. Show instructions again
+    if (instructionsEl) {
+      instructionsEl.style.display = 'block';
+    }
+    // 7. Flash success animation
+    await flashSuccess(clearBtn);
+    // 8. Update button state
+    updateClearButtonState();
+    // 9. Return focus to input for quick re-entry
+    inputArea.focus();
+  });
+  // Export the update function so it can be called externally
+  return { updateClearButtonState };
+}
+// Export a standalone update function that can be called from other modules
+export function updateClearButtonState() {
+  const clearBtn = document.getElementById('clear-input');
+  const inputArea = document.getElementById('input-text');
+  if (!clearBtn || !inputArea) return;
+  if (inputArea.value.trim()) {
+    clearBtn.disabled = false;
+  } else {
+    clearBtn.disabled = true;
+  }
+}
+// Flash success animation (similar to copy button)
+async function flashSuccess(button) {
+  button.classList.add('cleared');
+  await new Promise((resolve) => setTimeout(resolve, 1500));
+  button.classList.remove('cleared');
+}

static/sample_reports.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "samples": [
+    {
+      "id": "abdominal_ct",
+      "title": "Abdominal CT",
+      "modality": "CT",
+      "text": "EXAMINATION: CT abdomen and pelvis with IV contrast\nCLINICAL INDICATION: Abdominal pain, rule out acute pathology\nCOMPARISON: None available\nTECHNIQUE: Axial images of the abdomen and pelvis were obtained following administration of intravenous contrast material. Coronal and sagittal reformations were performed.\n\nFINDINGS:\nNo acute abnormality is seen in the visualized lung bases. The liver is normal in size and contour. There is a 1.2 cm simple-appearing low-attenuation lesion in hepatic segment VII, consistent with a cyst. The gallbladder contains numerous calcified gallstones, compatible with cholelithiasis, without gallbladder wall thickening, pericholecystic fluid, or other sonographic signs of acute cholecystitis. The common bile duct is non-dilated, measuring approximately 4 mm. The pancreas is unremarkable without focal mass or peripancreatic inflammatory stranding. The spleen and adrenal glands appear unremarkable. A 9 mm simple left renal cyst is noted. The kidneys are otherwise unremarkable without hydronephrosis or nephrolithiasis. There is sigmoid diverticulosis without evidence of acute diverticulitis.\n\nIMPRESSION:\n1. Cholelithiasis without evidence of acute cholecystitis.\n2. Hepatic and renal cysts.\n3. Sigmoid diverticulosis without acute diverticulitis."
+    },
+    {
+      "id": "lumbar_spine_mri",
+      "title": "Lumbar Spine MRI",
+      "modality": "MRI",
+      "text": "Exam: MRI Lumbar Spine\nClinical Indication: Low back pain, radiculopathy\n\nThere is mild degenerative anterolisthesis of L4 on L5. The normal lumbar lordosis is otherwise maintained. Vertebral body heights are preserved. There is a T1 and T2 hyperintense lesion in the L2 vertebral body consistent with a benign hemangioma. Marrow signal is otherwise unremarkable. The conus medullaris terminates at a normal level and is unremarkable in signal intensity.\n\nAt L1-L2 and L2-L3, there is mild disc desiccation without significant canal or foraminal stenosis. At L3-L4, a shallow posterior disc bulge and mild facet arthropathy result in mild central canal narrowing. The neural foramina are patent. At L4-L5, there is advanced disc space narrowing and desiccation. A broad-based posterior disc protrusion with a superimposed left paracentral extrusion severely narrows the central canal and contacts the traversing left S1 nerve root. There is moderate left neural foraminal stenosis. At L5-S1, mild disc desiccation is present without significant canal or foraminal stenosis. The paraspinal soft tissues are unremarkable.\n\nIMPRESSION:\n1. Severe L4-L5 disc protrusion with superimposed left paracentral extrusion, resulting in severe central canal narrowing and contact with the left S1 nerve root.\n2. Multilevel degenerative disc disease, most advanced at L4-L5.\n3. Benign hemangioma in L2 vertebral body."
+    },
+    {
+      "id": "shoulder_mri",
+      "title": "Shoulder MRI",
+      "modality": "MRI",
+      "text": "A full-thickness, full-width tear of the supraspinatus tendon is present, with the torn tendon end retracted approximately 2 cm medially to the level of the glenoid rim. There is moderate fatty infiltration and atrophy of the supraspinatus muscle. The infraspinatus, teres minor, and subscapularis tendons and muscles appear intact. There is a moderate joint effusion with synovial thickening. The glenoid labrum shows a small superior labral tear. The biceps tendon is intact and properly positioned within the bicipital groove. The acromioclavicular joint shows mild degenerative changes with small osteophytes but no significant narrowing."
+    },
+    {
+      "id": "abdominal_mri_pkd",
+      "title": "Abdominal MRI",
+      "modality": "MRI",
+      "text": "EXAMINATION: MRI abdomen without and with gadolinium contrast\nCLINICAL INDICATION: Polycystic kidney disease with suspected cyst infection, flank pain, fever\nCOMPARISON: CT abdomen from 3 months ago\nTECHNIQUE: Axial and coronal T1-weighted, T2-weighted, and post-gadolinium images were obtained.\n\nFINDINGS:\nBoth kidneys are markedly enlarged. The right kidney measures 18.2 cm and the left kidney measures 17.8 cm in length. Innumerable thin-walled cysts of varying sizes are present throughout both kidneys, consistent with autosomal dominant polycystic kidney disease. Several cysts demonstrate T1 hyperintensity consistent with hemorrhagic or proteinaceous content, particularly a 4.2 cm cyst in the right upper pole and a 3.1 cm cyst in the left mid-pole. \n\nA complex 5.8 cm cyst in the left lower pole demonstrates thick irregular walls, internal septations, and rim enhancement following contrast administration, highly suspicious for infected cyst. Surrounding perinephric inflammatory stranding is present. An additional 2.8 cm cyst in the right lower pole shows similar findings concerning for secondary infection.\n\nMultiple hepatic cysts are noted, the largest measuring 3.4 cm in segment IV. The liver is otherwise normal in signal intensity and enhancement pattern. The spleen, pancreas, and adrenal glands appear unremarkable. There is mild ascites in the pelvis. No hydronephrosis is identified despite the numerous cysts.\n\nIMPRESSION:\n1. Autosomal dominant polycystic kidney disease with bilateral renal enlargement and innumerable cysts.\n2. Probable infected cysts in the left lower pole (5.8 cm) and right lower pole (2.8 cm) with surrounding inflammatory changes.\n3. Multiple hemorrhagic cysts bilaterally.\n4. Multiple hepatic cysts.\n5. Mild ascites."
+    },
+    {
+      "id": "hip_mri",
+      "title": "Hip MRI",
+      "modality": "MRI",
+      "text": "There is a small joint effusion. Diffuse thinning of the articular cartilage is noted at the weight-bearing superior acetabulum and femoral head, with near full-thickness loss anterosuperiorly. A degenerative labral tear is present at the anterosuperior acetabulum. The joint capsule shows mild thickening. Moderate subchondral bone marrow edema is seen in the femoral head and acetabulum. Small subchondral cysts are noted in the superior acetabulum. The hip abductor tendons show signal alteration consistent with tendinosis, and there is a partial-thickness tear of the gluteus medius tendon at its greater trochanteric insertion.\n\nIMPRESSION:\n1. Moderate to severe osteoarthritis with cartilage loss and subchondral changes.\n2. Anterosuperior labral tear.\n3. Partial-thickness gluteus medius tendon tear with tendinosis."
+    },
+    {
+      "id": "chest_xray",
+      "title": "Chest X-Ray",
+      "modality": "XR",
+      "text": "Study: Chest Radiograph\n\nThe cardiac silhouette is normal in size and contour. The mediastinal contours are within normal limits. There is a 8 mm well-circumscribed nodule in the right upper lobe. The remainder of the lungs are clear without consolidation, pneumothorax, or pleural effusion. The pulmonary vasculature appears normal. No acute bony abnormalities are identified. The visualized upper abdomen is unremarkable."
+    },
+    {
+      "id": "cta_pulmonary_embolus",
+      "title": "CTA Pulmonary Embolus",
+      "modality": "CT",
+      "text": "EXAMINATION: CT angiography of the chest for pulmonary embolism\nCLINICAL INDICATION: Shortness of breath, chest pain, elevated D-dimer, rule out pulmonary embolism\nCOMPARISON: Chest X-ray from 2 days ago\nTECHNIQUE: Axial CT images of the chest were obtained following rapid intravenous administration of iodinated contrast material. Images were reconstructed in axial, coronal, and sagittal planes with MIP and VRT reformations.\n\nFINDINGS:\nThere are multiple filling defects consistent with acute pulmonary emboli involving the right main pulmonary artery extending into the right upper and middle lobe segmental branches. Additional smaller emboli are present in the left lower lobe subsegmental arteries. The main pulmonary artery is mildly dilated, measuring 3.2 cm in diameter. There is mild right heart strain with flattening of the interventricular septum and enlargement of the right ventricle. No evidence of right heart failure or pericardial effusion.\n\nThe lungs show mild bilateral lower lobe atelectasis and small bilateral pleural effusions. No consolidation or pneumothorax is identified. The mediastinal and hilar lymph nodes are not enlarged. The aorta and great vessels appear normal. The visualized portions of the upper abdomen are unremarkable. No acute bony abnormalities are identified.\n\nIMPRESSION:\n1. Acute pulmonary emboli involving the right main, upper and middle lobe segmental arteries, and left lower lobe subsegmental arteries.\n2. Mild pulmonary hypertension with right heart strain.\n3. Small bilateral pleural effusions and bilateral lower lobe atelectasis."
+    },
+    {
+      "id": "abdominal_ultrasound",
+      "title": "Abdominal Ultrasound",
+      "modality": "US",
+      "text": "EXAMINATION: Ultrasound of the abdomen\nCLINICAL INDICATION: Right upper quadrant pain, abnormal liver function tests\nCOMPARISON: None available\nTECHNIQUE: Real-time ultrasound examination of the abdomen was performed using a curved array transducer. Multiple images were obtained in sagittal, transverse, and oblique planes.\n\nFINDINGS:\nThe liver is normal in size measuring 15.2 cm in the midclavicular line. The hepatic parenchyma demonstrates increased echogenicity consistent with fatty infiltration. There is a well-defined hyperechoic lesion in the right hepatic lobe measuring 2.1 x 1.8 cm, consistent with a hemangioma. No focal hepatic masses or intrahepatic biliary dilatation is identified. Portal vein flow is normal on Doppler evaluation. The gallbladder is distended and contains multiple echogenic foci with posterior acoustic shadowing, consistent with cholelithiasis. The largest stone measures approximately 1.5 cm. The gallbladder wall measures 2 mm in thickness, which is within normal limits. No pericholecystic fluid is identified. Common bile duct measures 4 mm, which is normal. The visualized portions of the pancreatic head and body appear normal in echogenicity and size. The pancreatic duct is not dilated. The right kidney measures 10.8 cm and the left kidney measures 11.1 cm. Both kidneys demonstrate normal cortical echogenicity and corticomedullary differentiation. No hydronephrosis, stones, or masses are identified. The spleen is normal in size and echogenicity, measuring 10.2 cm in length.\n\nIMPRESSION:\n1. Cholelithiasis without evidence of acute cholecystitis.\n2. Hepatic steatosis (fatty liver).\n3. 2.1 cm hepatic hemangioma in the right lobe.\n4. Normal kidneys, spleen, and visualized pancreas."
+    },
+    {
+      "id": "cervical_spine_mri",
+      "title": "Cervical Spine MRI",
+      "modality": "MRI",
+      "text": "MRI Cervical Spine:\nComparison: MRI cervical spine dated 6 months ago\n\nThe cervical lordosis is maintained. Vertebral body heights and alignment are preserved. The spinal cord demonstrates normal signal intensity throughout its visualized extent. At C3-C4, there is mild disc desiccation without significant canal narrowing. At C4-C5, a small posterior disc osteophyte complex results in mild central canal narrowing. The neural foramina remain patent. At C5-C6, there is moderate disc space narrowing with a broad-based posterior disc bulge and bilateral uncinate spurring, causing mild to moderate bilateral neural foraminal narrowing. At C6-C7, mild disc desiccation is present without significant stenosis. The prevertebral soft tissues are unremarkable.\n\nIMPRESSION:\n1. Multilevel cervical spondylosis, most pronounced at C5-C6.\n2. Mild to moderate bilateral C5-C6 neural foraminal narrowing.\n3. No spinal cord compression or significant central canal stenosis."
+    },
+    {
+      "id": "whole_body_petct",
+      "title": "Whole-Body FDG PET/CT",
+      "modality": "PET",
+      "text": "EXAMINATION: Whole-body fluorodeoxyglucose (FDG) PET/CT\nCLINICAL INDICATION: Staging of newly diagnosed non-small-cell lung carcinoma (NSCLC)\nCOMPARISON: None available\nTECHNIQUE: Following a 60-minute uptake period after intravenous administration of 12 mCi of FDG, low-dose non-contrast CT images were obtained for attenuation correction and anatomic localization, followed by emission PET images from the skull base to mid-thigh.\n\nFINDINGS:\nA 3.1 cm spiculated mass in the right upper lobe demonstrates intense FDG uptake (SUVmax 12.4). Ipsilateral mediastinal (station 4R) lymph node measuring 1.2 cm shows increased activity (SUVmax 6.8). No contralateral mediastinal or hilar hypermetabolic nodes.\n\nMultiple focal areas of increased FDG uptake are seen in the axial and appendicular skeleton corresponding to sclerotic lesions on CT, compatible with osseous metastases (largest in right iliac bone, SUVmax 9.1). No abnormal activity in the liver, adrenal glands, or brain. Physiologic tracer distribution in myocardium, kidneys, and urinary bladder.\n\nIMPRESSION:\n1. FDG-avid right upper-lobe primary lung malignancy with hypermetabolic right paratracheal nodal metastasis (consistent with at least N2 disease).\n2. Numerous FDG-avid osseous metastases consistent with Stage IV disease."
+    }
+  ]
+}

static/script.js ADDED Viewed

	@@ -0,0 +1,1320 @@

+/**
+ * @fileoverview Interactive radiology report structuring demo interface.
+ *
+ * This script provides the frontend functionality for the radiology report
+ * structuring application, including sample report loading, API communication,
+ * and interactive hover-to-highlight functionality between structured output
+ * and original input text.
+ */
+// Import copy functionality
+import { initCopyButton, updateCopyButtonState } from './copy.js';
+// Import clear functionality
+import { initClearButton, updateClearButtonState } from './reset.js';
+document.addEventListener('DOMContentLoaded', function () {
+  // === CONFIGURATION CONSTANTS ===
+  const GRID_CONFIG = {
+    MOBILE_MIN_WIDTH: 120,
+    DESKTOP_MIN_WIDTH: 160,
+    MOBILE_BREAKPOINT: 768,
+    NARROW_BREAKPOINT: 360,
+    MAX_LABEL_LENGTH: 60,
+    BALANCE_DELAY: 100,
+    RESIZE_DEBOUNCE: 250,
+  };
+  const UI_CONFIG = {
+    SCROLL_SMOOTH_BEHAVIOR: 'smooth',
+    SCROLL_OFFSET_BUFFER: 100,
+  };
+  // === GLOBAL STATE ===
+  // Variables are declared where they're first used to avoid redeclaration errors
+  // === UTILITY FUNCTIONS ===
+  /**
+   * Checks if the device is a touch-only device (no hover capability).
+   * Uses CSS media queries to accurately detect hover capability rather than just touch presence.
+   * @returns {boolean} True if it's a touch-only device, false if it can hover
+   */
+  const isTouchDevice = () =>
+    !window.matchMedia('(hover: hover) and (pointer: fine)').matches;
+  /**
+   * Clears all highlights from text spans.
+   */
+  function clearAllHighlights() {
+    const spans = document.querySelectorAll('.text-span.highlight');
+    spans.forEach((span) => {
+      span.classList.remove('highlight');
+      span.dataset.highlighted = 'false';
+    });
+    clearInputHighlight();
+  }
+  // Add global click handler to clear highlights when clicking outside on mobile
+  document.addEventListener('click', function (e) {
+    if (isTouchDevice() && !e.target.classList.contains('text-span')) {
+      clearAllHighlights();
+    }
+  });
+  const predictButton = document.getElementById('predict-button');
+  const inputText = document.getElementById('input-text');
+  const outputTextContainer = document.getElementById('output-text');
+  const instructionsEl = document.querySelector('.instructions');
+  const loadingOverlay = document.getElementById('loading-overlay');
+  let processingLoadingTimer = null;
+  let originalInputText = '';
+  // Disable virtual keyboard on mobile devices
+  let allowInputFocus = false;
+  if (isTouchDevice()) {
+    // Prevent focus to avoid virtual keyboard, except during programmatic highlighting
+    inputText.addEventListener('focus', function (e) {
+      if (!allowInputFocus) {
+        e.target.blur();
+      }
+    });
+  }
+  let sampleReportsData = null;
+  let currentSampleId = null;
+  // Model dropdown elements
+  const modelSelect = document.getElementById('model-select');
+  const modelNameSpan = document.getElementById('model-name');
+  const modelLink = document.getElementById('model-link');
+  /**
+   * Mapping of model IDs to their display information.
+   * @const {Object<string, {text: string, link: string}>}
+   */
+  const modelInfo = {
+    'gemini-2.5-flash': {
+      text: 'Gemini 2.5 Flash',
+      link: 'https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash',
+    },
+    'gemini-2.5-pro': {
+      text: 'Gemini 2.5 Pro',
+      link: 'https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro',
+    },
+  };
+  /**
+   * Updates the model information display based on the selected model.
+   */
+  function updateModelInfo() {
+    const selectedModel = modelSelect.value;
+    if (modelNameSpan)
+      modelNameSpan.textContent = modelInfo[selectedModel].text;
+    if (modelLink) modelLink.href = modelInfo[selectedModel].link;
+  }
+  if (modelSelect) {
+    modelSelect.addEventListener('change', updateModelInfo);
+    updateModelInfo();
+  }
+  // Cache optimization elements
+  const cacheToggle = document.getElementById('cache-toggle');
+  // LX Toggle elements
+  const promptToggle = document.getElementById('prompt-toggle');
+  const rawToggle = document.getElementById('raw-toggle');
+  // Initialize copy functionality
+  initCopyButton();
+  // Initialize clear functionality
+  initClearButton();
+  /**
+   * Detect mobile devices and update placeholder text
+   * Mobile UX does not have text entry to avoid disrupting the user interaction
+   * with extractions in the output - users can only select from samples
+   */
+  function updatePlaceholderForMobile() {
+    const isMobile =
+      /iPhone|iPad|iPod|Android/i.test(navigator.userAgent) ||
+      (navigator.maxTouchPoints && navigator.maxTouchPoints > 0);
+    if (isMobile) {
+      inputText.placeholder = 'Please select a sample from above...';
+    }
+  }
+  updatePlaceholderForMobile();
+  /**
+   * Updates model dropdown state based on cache toggle.
+   * When cache is enabled, model dropdown is disabled since cache is model-specific.
+   */
+  function updateModelDropdownState() {
+    if (modelSelect && cacheToggle) {
+      modelSelect.disabled = cacheToggle.checked;
+      // Add visual indication
+      if (cacheToggle.checked) {
+        modelSelect.style.opacity = '0.6';
+        modelSelect.style.cursor = 'not-allowed';
+      } else {
+        modelSelect.style.opacity = '1';
+        modelSelect.style.cursor = 'pointer';
+      }
+    }
+  }
+  /**
+   * Handles cache toggle changes.
+   */
+  if (cacheToggle) {
+    cacheToggle.addEventListener('change', updateModelDropdownState);
+    updateModelDropdownState();
+  }
+  /**
+   * Updates LX toggles state based on content availability.
+   * Disables toggles when input is empty or no output is generated.
+   */
+  function updateLXToggleStates() {
+    const hasInput = inputText && inputText.value.trim().length > 0;
+    const hasOutput =
+      outputTextContainer && outputTextContainer.textContent.trim().length > 0;
+    if (promptToggle) {
+      promptToggle.disabled = !hasInput;
+      if (!hasInput) {
+        promptToggle.checked = false;
+        promptToggle.style.opacity = '0.5';
+        promptToggle.style.cursor = 'not-allowed';
+      } else {
+        promptToggle.style.opacity = '1';
+        promptToggle.style.cursor = 'pointer';
+      }
+      // Synchronize mobile toggle state
+      const mobilePromptToggle = document.getElementById(
+        'prompt-toggle-mobile',
+      );
+      if (mobilePromptToggle) {
+        mobilePromptToggle.disabled = !hasInput;
+        mobilePromptToggle.checked = promptToggle.checked;
+        mobilePromptToggle.style.opacity = promptToggle.style.opacity;
+        mobilePromptToggle.style.cursor = promptToggle.style.cursor;
+      }
+    }
+    if (rawToggle) {
+      rawToggle.disabled = !hasOutput;
+      if (!hasOutput) {
+        rawToggle.checked = false;
+        rawToggle.style.opacity = '0.5';
+        rawToggle.style.cursor = 'not-allowed';
+      } else {
+        rawToggle.style.opacity = '1';
+        rawToggle.style.cursor = 'pointer';
+      }
+      // Synchronize mobile toggle state
+      const mobileRawToggle = document.getElementById('raw-toggle-mobile');
+      if (mobileRawToggle) {
+        mobileRawToggle.disabled = !hasOutput;
+        mobileRawToggle.checked = rawToggle.checked;
+        mobileRawToggle.style.opacity = rawToggle.style.opacity;
+        mobileRawToggle.style.cursor = rawToggle.style.cursor;
+      }
+    }
+  }
+  updateLXToggleStates();
+  updateCopyButtonState();
+  /**
+   * Loads sample reports from the static JSON file.
+   * @returns {Promise<void>}
+   */
+  async function loadSampleReports() {
+    try {
+      const response = await fetch('/static/sample_reports.json');
+      const data = await response.json();
+      sampleReportsData = data;
+      initializeSampleButtons();
+    } catch (error) {
+      console.error('Failed to load sample reports:', error);
+    }
+  }
+  /**
+   * Initializes the sample report buttons in the UI.
+   */
+  function initializeSampleButtons() {
+    if (!sampleReportsData || !sampleReportsData.samples) return;
+    const sampleButtonsContainer = document.querySelector('.sample-buttons');
+    if (!sampleButtonsContainer) return;
+    sampleButtonsContainer.innerHTML = '';
+    const sortedSamples = [...sampleReportsData.samples].sort((a, b) =>
+      a.title.localeCompare(b.title),
+    );
+    sortedSamples.forEach((sample) => {
+      const button = document.createElement('button');
+      button.className = 'sample-button';
+      button.setAttribute('data-sample-id', sample.id);
+      button.innerHTML = `
+                <div class="sample-button-content">
+                    <div class="sample-title">${sample.title}</div>
+                    <div class="sample-meta">
+                        <span class="sample-modality">${sample.modality}</span>
+                    </div>
+                </div>
+            `;
+      const modalitySpan = button.querySelector('.sample-modality');
+      if (modalitySpan) {
+        modalitySpan.classList.add(`mod-${sample.modality.toLowerCase()}`);
+      }
+      button.addEventListener('click', function () {
+        loadSampleReport(sample);
+        document
+          .querySelectorAll('.sample-button.active')
+          .forEach((btn) => btn.classList.remove('active'));
+        this.classList.add('active');
+      });
+      sampleButtonsContainer.appendChild(button);
+    });
+    setTimeout(() => {
+      balanceByColumnCount();
+    }, GRID_CONFIG.BALANCE_DELAY);
+  }
+  /**
+   * Balances sample button rows by calculating optimal column count for even distribution.
+   * Keeps row-wise reading order while achieving visual balance (e.g., 5+5 instead of 6+4).
+   * Uses responsive sizing for better mobile experience.
+   */
+  function balanceByColumnCount() {
+    const container = document.querySelector('.sample-buttons');
+    if (!container) {
+      console.warn('Sample buttons container not found');
+      return;
+    }
+    const cards = container.querySelectorAll('.sample-button').length;
+    const styles = getComputedStyle(container);
+    const gap = parseFloat(styles.columnGap) || 12;
+    const viewport = window.innerWidth;
+    const minWidth =
+      viewport <= GRID_CONFIG.MOBILE_BREAKPOINT
+        ? GRID_CONFIG.MOBILE_MIN_WIDTH
+        : GRID_CONFIG.DESKTOP_MIN_WIDTH;
+    const containerWidth = container.clientWidth;
+    const columnsFit = Math.max(
+      1,
+      Math.floor((containerWidth + gap) / (minWidth + gap)),
+    );
+    if (viewport <= GRID_CONFIG.NARROW_BREAKPOINT) {
+      return;
+    }
+    // Find the column count that provides the most even distribution
+    let bestCols = columnsFit;
+    let bestRem = cards % columnsFit;
+    for (let cols = columnsFit - 1; cols >= 1; cols--) {
+      const rem = cards % cols;
+      if (rem === 0) {
+        bestCols = cols;
+        break; // Perfect distribution found
+      }
+      if (rem > bestRem) continue; // Worse distribution, skip
+      bestCols = cols;
+      bestRem = rem;
+    }
+    // Mobile-specific logic: prefer 2-3 columns for better touch targets
+    if (viewport <= GRID_CONFIG.MOBILE_BREAKPOINT) {
+      if (bestCols === 1 && columnsFit >= 2) {
+        bestCols = 2; // Force at least 2 columns on mobile
+      } else if (bestCols > 3 && cards >= 6) {
+        // If we have many columns, prefer 2-3 for mobile UX
+        const cols2Rem = cards % 2;
+        const cols3Rem = cards % 3;
+        if (cols2Rem <= cols3Rem) {
+          bestCols = 2;
+        } else {
+          bestCols = 3;
+        }
+      }
+    }
+    // Always apply the balanced column count for optimal visual distribution
+    container.style.gridTemplateColumns = `repeat(${bestCols}, minmax(${minWidth}px, 1fr))`;
+  }
+  /**
+   * Loads a sample report into the input area and automatically processes it.
+   * @param {Object} sample - The sample report data object
+   */
+  function loadSampleReport(sample) {
+    scrollToOutput();
+    // Normalize line endings for sample text
+    inputText.value = sample.text.replace(/\r\n?/g, '\n');
+    // Update clear button state after loading sample
+    updateClearButtonState();
+    outputTextContainer.innerHTML = '';
+    instructionsEl.style.display = 'block';
+    currentSampleId = sample.id;
+    // Automatically enable cache for sample reports
+    if (cacheToggle) {
+      cacheToggle.checked = true;
+      // Trigger the change event to update model dropdown state
+      updateModelDropdownState();
+    }
+    setTimeout(() => {
+      predictButton.click();
+    }, 100);
+  }
+  loadSampleReports();
+  let resizeTimeout;
+  window.addEventListener('resize', () => {
+    clearTimeout(resizeTimeout);
+    resizeTimeout = setTimeout(() => {
+      balanceByColumnCount();
+    }, GRID_CONFIG.RESIZE_DEBOUNCE);
+  });
+  /**
+   * Updates the cache status display in the UI.
+   * @returns {Promise<void>}
+   */
+  async function updateCacheStatus() {
+    try {
+      const response = await fetch('/cache/stats');
+      const stats = await response.json();
+      const statusEl = document.getElementById('cache-status');
+      if (statusEl && stats.total_entries > 0) {
+        statusEl.textContent = `(${stats.sample_entries} samples cached)`;
+      } else if (statusEl) {
+        statusEl.textContent = '';
+      }
+    } catch (e) {
+      console.log('Cache stats not available');
+    }
+  }
+  updateCacheStatus();
+  inputText.addEventListener('input', function () {
+    if (
+      currentSampleId &&
+      inputText.value !==
+        sampleReportsData?.samples?.find((s) => s.id === currentSampleId)?.text
+    ) {
+      currentSampleId = null;
+      document
+        .querySelectorAll('.sample-button.active')
+        .forEach((btn) => btn.classList.remove('active'));
+    }
+    // Uncheck cache when input text is modified (cache no longer applies)
+    if (cacheToggle && cacheToggle.checked) {
+      cacheToggle.checked = false;
+      updateModelDropdownState(); // Re-enable model dropdown
+      updateCacheStatus(); // Update cache status display
+    }
+    // Update LX toggle states based on input content
+    updateLXToggleStates();
+    updateCopyButtonState();
+  });
+  predictButton.addEventListener('click', async function () {
+    predictButton.disabled = true;
+    predictButton.textContent = 'Processing...';
+    const cacheEnabled = cacheToggle ? cacheToggle.checked : true;
+    if (processingLoadingTimer) clearTimeout(processingLoadingTimer);
+    // Show loading overlay after 200ms
+    processingLoadingTimer = setTimeout(() => {
+      if (loadingOverlay) {
+        loadingOverlay.style.display = 'flex';
+        const loaderMessage = document.querySelector('.loader-message');
+        if (loaderMessage) {
+          const modelText =
+            (modelSelect && modelInfo[modelSelect.value]?.text) ||
+            'Gemini 2.5 Flash';
+          loaderMessage.textContent = `Running LangExtract with ${modelText}...`;
+        }
+        if (typeof gsap !== 'undefined') {
+          startLoaderAnimation();
+        }
+      }
+    }, 200);
+    inputText.value = inputText.value.replace(/\r\n?/g, '\n');
+    originalInputText = inputText.value;
+    outputTextContainer.innerHTML = '';
+    updateLXToggleStates(); // Disable toggles when output is cleared
+    updateCopyButtonState();
+    try {
+      const useCache = cacheEnabled;
+      const headers = { 'Content-Type': 'text/plain' };
+      if (modelSelect) {
+        headers['X-Model-ID'] = modelSelect.value;
+      }
+      if (useCache) {
+        headers['X-Use-Cache'] = 'true';
+        if (currentSampleId) {
+          headers['X-Sample-ID'] = currentSampleId;
+        }
+      } else {
+        headers['X-Use-Cache'] = 'false';
+      }
+      const response = await fetch('/predict', {
+        method: 'POST',
+        headers: headers,
+        body: originalInputText,
+      });
+      if (!response.ok) {
+        const errorText = await response.text();
+        let errorJson;
+        try {
+          errorJson = JSON.parse(errorText);
+        } catch (parseError) {
+          throw new Error(errorText || 'unknown error');
+        }
+        const error = new Error(errorJson.error || 'unknown error');
+        error.details = errorJson;
+        throw error;
+      }
+      // Stop the initial overlay timer so it doesn't overwrite cache message
+      if (processingLoadingTimer) {
+        clearTimeout(processingLoadingTimer);
+        processingLoadingTimer = null;
+      }
+      const data = await response.json();
+      // Handle cached results with simulated loading
+      if (data.from_cache) {
+        // Ensure overlay is visible (may not be if response was quick)
+        if (loadingOverlay && loadingOverlay.style.display === 'none') {
+          loadingOverlay.style.display = 'flex';
+          if (typeof gsap !== 'undefined') {
+            startLoaderAnimation();
+          }
+        }
+        // Update loading message for cached results
+        const loaderMessage = document.querySelector('.loader-message');
+        if (loaderMessage) {
+          loaderMessage.textContent =
+            'Loading LangExtract Result from Cache...';
+        }
+        // Add 1-2 second delay for cached results to simulate loading
+        const delay = Math.random() * 1000 + 2000; // 2-3 seconds
+        await new Promise((resolve) => setTimeout(resolve, delay));
+      }
+      if (data.sanitized_input && data.sanitized_input !== originalInputText) {
+        const inputText = document.getElementById('input-text');
+        if (inputText) {
+          inputText.value = data.sanitized_input;
+          updateClearButtonState();
+        }
+      }
+      if (data.text) {
+        if (
+          data.segments &&
+          Array.isArray(data.segments) &&
+          data.segments.length > 0
+        ) {
+          renderSegments(data.segments);
+          updateLXToggleStates(); // Enable/update toggles when output is generated
+          updateCopyButtonState();
+          // Update raw / prompt panes
+          const rawOutput = document.getElementById('raw-output');
+          const promptOutput = document.getElementById('prompt-output');
+          if (rawToggle && rawOutput) {
+            const rawData = data.annotated_document_json || {
+              error: 'No annotated document data available',
+              available_data: data,
+            };
+            rawOutput.innerHTML = '';
+            const formatter = new JSONFormatter(rawData, {
+              hoverPreviewEnabled: true,
+              animateOpen: false,
+              animateClose: false,
+              theme: 'light',
+              open: true,
+            });
+            const renderedElement = formatter.render();
+            rawOutput.appendChild(renderedElement);
+            rawOutput._jsonFormatter = formatter;
+            rawOutput._jsonData = rawData;
+            setTimeout(() => {
+              try {
+                if (formatter.openAtDepth) {
+                  formatter.openAtDepth(3);
+                }
+              } catch (e) {
+                // Ignore errors if formatter doesn't support openAtDepth
+              }
+              const togglers = rawOutput.querySelectorAll(
+                '.json-formatter-toggler',
+              );
+              togglers.forEach((toggler) => {
+                try {
+                  toggler.click();
+                } catch (e) {
+                  // Ignore click errors on JSON formatter togglers
+                }
+              });
+            }, 10);
+            rawToggle.checked = false;
+            rawOutput.style.display = 'none';
+            outputTextContainer.style.display = 'block';
+          }
+          if (promptOutput) {
+            const promptText = data.raw_prompt || 'Prompt data not available.';
+            if (typeof marked !== 'undefined' && data.raw_prompt) {
+              // Render markdown with syntax highlighting support
+              promptOutput.innerHTML = marked.parse(promptText);
+            } else {
+              // Fallback to plain text
+              promptOutput.textContent = promptText;
+            }
+            promptToggle.checked = false;
+            showPromptView(false);
+          }
+          const hasIntervals = data.segments.some(
+            (segment) => segment.intervals && segment.intervals.length > 0,
+          );
+          instructionsEl.style.display = 'block';
+          if (!hasIntervals) {
+            instructionsEl.innerHTML =
+              '<p><strong>Note:</strong> Hover functionality is not available for this result.</p>';
+          }
+        } else {
+          outputTextContainer.textContent = data.text;
+          instructionsEl.style.display = 'none';
+        }
+      } else {
+        outputTextContainer.textContent = 'No content returned from server.';
+        instructionsEl.style.display = 'none';
+      }
+    } catch (error) {
+      if (error.details && typeof error.details === 'object') {
+        if (error.details.error === 'Empty input') {
+          const friendlyMessage = [
+            '<div class="error-message-simple" role="alert">',
+            '    <h3>📝 Input Required</h3>',
+            '    <p>Please paste or type a radiology report in the input area.</p>',
+            '    <p class="suggestion">You can try one of the sample reports below to see how the structuring works.</p>',
+            '</div>',
+          ].join('\n');
+          outputTextContainer.innerHTML = friendlyMessage;
+        } else if (
+          error.details.error === 'Input too long' &&
+          error.details.max_length
+        ) {
+          const friendlyMessage = [
+            '<div class="error-message-simple" role="alert">',
+            '    <h3>⚠️ Input Too Long</h3>',
+            `    <p>Your input contains <strong>${originalInputText.length.toLocaleString()}</strong> characters, `,
+            `    but this demo is limited to <strong>${error.details.max_length.toLocaleString()}</strong> characters `,
+            "    to reduce the load on this demo's Gemini API key.</p>",
+            '    <p class="suggestion">Try using a shorter excerpt from your report, or focus on the most relevant sections.</p>',
+            '    <div class="deploy-note">',
+            '        <strong>💡 Tip:</strong> If you deploy the source code with your own Gemini API key, you can modify this limit.',
+            '    </div>',
+            '</div>',
+          ].join('\n');
+          outputTextContainer.innerHTML = friendlyMessage;
+        } else {
+          let errorMessage = `Error: ${error.details.error}\n\n`;
+          errorMessage += `${error.details.message}`;
+          if (error.details.max_length) {
+            errorMessage += `\n\nMaximum allowed length: ${error.details.max_length} characters`;
+          }
+          outputTextContainer.textContent = errorMessage;
+        }
+      } else {
+        outputTextContainer.textContent = `Error: ${error.message}`;
+      }
+      instructionsEl.style.display = 'none';
+    } finally {
+      if (processingLoadingTimer) {
+        clearTimeout(processingLoadingTimer);
+        processingLoadingTimer = null;
+      }
+      if (loadingOverlay) loadingOverlay.style.display = 'none';
+      const message = document.querySelector('.loader-message');
+      const spinner = document.querySelector('.spinner');
+      if (message && spinner) {
+        gsap.killTweensOf([message, spinner]);
+        gsap.set([message, spinner], { clearProps: 'all' });
+      }
+      predictButton.disabled = false;
+      predictButton.textContent = 'Process';
+      updateCacheStatus();
+    }
+  });
+  /**
+   * Renders segments as interactive elements in the output container.
+   * @param {Array<Object>} segments - Array of segment objects from the API response
+   */
+  function renderSegments(segments) {
+    outputTextContainer.innerHTML = '';
+    const plainTextParts = []; // Collect plain text for data-copy
+    const segmentsByType = {
+      prefix: segments.filter((seg) => seg.type === 'prefix'),
+      body: segments.filter((seg) => seg.type === 'body'),
+      suffix: segments.filter((seg) => seg.type === 'suffix'),
+    };
+    if (segmentsByType.prefix.length > 0) {
+      // Check if there's an Examination segment that should get a header
+      const examinationSegments = segmentsByType.prefix.filter(
+        (seg) => seg.label && seg.label.toLowerCase() === 'examination',
+      );
+      const otherPrefixSegments = segmentsByType.prefix.filter(
+        (seg) => !seg.label || seg.label.toLowerCase() !== 'examination',
+      );
+      // Render Examination segments with content as header (no "EXAMINATION:" prefix)
+      if (examinationSegments.length > 0) {
+        examinationSegments.forEach((segment) => {
+          let content = segment.content;
+          // Remove various examination prefixes
+          const examPrefixes = ['EXAMINATION:', 'EXAM:', 'STUDY:'];
+          const upperContent = content.toUpperCase();
+          for (const prefix of examPrefixes) {
+            if (upperContent.startsWith(prefix)) {
+              content = content.substring(prefix.length).trim();
+              break;
+            }
+          }
+          // Use the clean content as the header text (capitalized)
+          if (content) {
+            appendSectionHeader(content.toUpperCase());
+            plainTextParts.push(content.toUpperCase());
+          }
+        });
+        outputTextContainer.appendChild(document.createElement('br'));
+      }
+      // Render other prefix segments normally
+      if (otherPrefixSegments.length > 0) {
+        otherPrefixSegments.forEach((segment) => {
+          outputTextContainer.appendChild(createSegmentElement(segment));
+          plainTextParts.push(segment.content);
+        });
+        outputTextContainer.appendChild(document.createElement('br'));
+      }
+    }
+    if (segmentsByType.body.length > 0) {
+      appendSectionHeader('FINDINGS:');
+      plainTextParts.push('\nFINDINGS:');
+      const groupMap = new Map();
+      segmentsByType.body.forEach((seg) => {
+        const rawLabel = seg.label || 'Other';
+        const parts = rawLabel.split(':');
+        const primary = parts[0].trim();
+        const sub = parts.slice(1).join(':').trim();
+        if (!groupMap.has(primary)) groupMap.set(primary, []);
+        groupMap.get(primary).push({ segment: seg, sublabel: sub });
+      });
+      groupMap.forEach((items, primary) => {
+        const primaryHeader = document.createElement('div');
+        primaryHeader.className = 'primary-label';
+        primaryHeader.textContent = primary;
+        outputTextContainer.appendChild(primaryHeader);
+        plainTextParts.push('\n' + primary);
+        if (items.length === 1) {
+          const p = document.createElement('p');
+          p.className = 'single-finding';
+          const labelSpan = document.createElement('span');
+          labelSpan.classList.add('segment-sublabel');
+          if (items[0].sublabel) {
+            labelSpan.textContent = `${items[0].sublabel}: `;
+            p.appendChild(labelSpan);
+          }
+          p.appendChild(createContentWithIntervalSpans(items[0].segment));
+          outputTextContainer.appendChild(p);
+          plainTextParts.push('- ' + p.textContent.trim());
+        } else {
+          const ul = document.createElement('ul');
+          ul.className = 'finding-list';
+          outputTextContainer.appendChild(ul);
+          items.forEach((item) => {
+            const li = document.createElement('li');
+            const labelSpan = document.createElement('span');
+            labelSpan.classList.add('segment-sublabel');
+            if (item.sublabel) {
+              labelSpan.textContent = `${item.sublabel}: `;
+              li.appendChild(labelSpan);
+            }
+            li.appendChild(createContentWithIntervalSpans(item.segment));
+            ul.appendChild(li);
+            plainTextParts.push('• ' + li.textContent.trim());
+          });
+        }
+      });
+    }
+    if (segmentsByType.suffix.length > 0) {
+      appendSectionHeader('IMPRESSION:');
+      plainTextParts.push('\nIMPRESSION:');
+      segmentsByType.suffix.forEach((segment) => {
+        outputTextContainer.appendChild(createSegmentElement(segment));
+        plainTextParts.push(segment.content);
+      });
+    }
+    // Store pre-computed plain text for efficient copying
+    const plainText = plainTextParts
+      .join('\n')
+      .replace(/\n{3,}/g, '\n\n')
+      .trim();
+    const outputEl = document.getElementById('output-text');
+    if (outputEl) {
+      outputEl.dataset.copy = plainText;
+    }
+  }
+  /**
+   * Helper function to create section headers.
+   * @param {string} text - The header text to display
+   */
+  function appendSectionHeader(text) {
+    const header = document.createElement('div');
+    header.className = 'section-header';
+    header.textContent = text;
+    outputTextContainer.appendChild(header);
+  }
+  /**
+   * Creates a DOM element for a segment.
+   * @param {Object} segment - The segment data object
+   * @returns {HTMLElement} The created segment element
+   */
+  function createSegmentElement(segment) {
+    const segmentDiv = document.createElement('div');
+    segmentDiv.classList.add('segment', `segment-${segment.type}`);
+    if (segment.type === 'body' && segment.label) {
+      const labelSpan = document.createElement('span');
+      labelSpan.classList.add('segment-label');
+      labelSpan.textContent = `${segment.label}: `;
+      segmentDiv.appendChild(labelSpan);
+    }
+    segmentDiv.appendChild(createContentWithIntervalSpans(segment));
+    return segmentDiv;
+  }
+  /**
+   * Creates content with interval spans for highlighting functionality.
+   * @param {Object} segment - The content segment with intervals and metadata
+   * @returns {DocumentFragment} Fragment containing the processed content
+   */
+  function createContentWithIntervalSpans(segment) {
+    const fragment = document.createDocumentFragment();
+    if (segment.intervals && segment.intervals.length > 0) {
+      const contentSpan = createIntervalSpan(segment);
+      addIntervalEventListeners(contentSpan);
+      fragment.appendChild(contentSpan);
+    } else {
+      fragment.appendChild(createRegularSpan(segment));
+    }
+    return fragment;
+  }
+  /**
+   * Creates a span element for content with intervals (highlighting capability).
+   * @param {Object} segment - The content segment
+   * @returns {HTMLSpanElement} The created span element
+   */
+  function createIntervalSpan(segment) {
+    const interval = segment.intervals[0];
+    const contentSpan = document.createElement('span');
+    contentSpan.classList.add('text-span');
+    // Set data attributes for position tracking
+    contentSpan.dataset.startPos = interval.startPos;
+    contentSpan.dataset.endPos = interval.endPos;
+    contentSpan.dataset.type = segment.type;
+    contentSpan.dataset.label = segment.label || '';
+    // Handle label styling if present
+    const labelInfo = extractLabelInfo(segment.content);
+    if (labelInfo.hasLabel) {
+      setupLabelSpan(contentSpan, labelInfo);
+    } else {
+      contentSpan.textContent = segment.content;
+    }
+    // Apply significance-based styling
+    applySignificanceStyles(contentSpan, segment.significance);
+    return contentSpan;
+  }
+  /**
+   * Extracts label information from content.
+   * @param {string} content - The content to analyze
+   * @returns {Object} Label information object
+   */
+  function extractLabelInfo(content) {
+    const colonIndex = content.indexOf(':');
+    const hasLabel =
+      colonIndex > 0 && colonIndex < GRID_CONFIG.MAX_LABEL_LENGTH;
+    return {
+      hasLabel,
+      labelText: hasLabel ? content.slice(0, colonIndex) : '',
+      restText: hasLabel ? content.slice(colonIndex) : content,
+    };
+  }
+  /**
+   * Sets up span with label and content parts for CSS styling.
+   * @param {HTMLSpanElement} contentSpan - The span to configure
+   * @param {Object} labelInfo - Label information object
+   */
+  function setupLabelSpan(contentSpan, labelInfo) {
+    contentSpan.classList.add('has-label');
+    const labelSpan = document.createElement('span');
+    labelSpan.className = 'label-part';
+    labelSpan.textContent = labelInfo.labelText;
+    const contentPartSpan = document.createElement('span');
+    contentPartSpan.className = 'content-part';
+    contentPartSpan.textContent = labelInfo.restText;
+    contentSpan.appendChild(labelSpan);
+    contentSpan.appendChild(contentPartSpan);
+  }
+  /**
+   * Applies significance-based CSS classes to content spans.
+   * @param {HTMLSpanElement} span - The span to style
+   * @param {string} significance - The significance level
+   */
+  function applySignificanceStyles(span, significance) {
+    if (significance) {
+      const significanceLevel = (significance || '').toLowerCase();
+      if (
+        significanceLevel === 'minor' ||
+        significanceLevel === 'significant'
+      ) {
+        span.classList.add(`significance-${significanceLevel}`);
+      }
+    }
+  }
+  /**
+   * Creates a regular span for content without intervals.
+   * @param {Object} segment - The content segment
+   * @returns {HTMLSpanElement} The created span element
+   */
+  function createRegularSpan(segment) {
+    const regularSpan = document.createElement('span');
+    regularSpan.textContent = segment.content;
+    // Apply significance styling even for non-interval content
+    applySignificanceStyles(regularSpan, segment.significance);
+    return regularSpan;
+  }
+  /**
+   * Adds event listeners for interval spans with distinct desktop/mobile interaction patterns.
+   * Desktop: Hover to highlight/unhighlight instantly
+   * Mobile: Tap to toggle highlight on/off
+   * @param {HTMLSpanElement} contentSpan - The span to add listeners to
+   */
+  function addIntervalEventListeners(contentSpan) {
+    const isDesktop = !isTouchDevice();
+    if (isDesktop) {
+      // Desktop: Hover-based highlighting
+      contentSpan.addEventListener('mouseenter', function () {
+        contentSpan.classList.add('highlight');
+        const startPos = parseInt(contentSpan.dataset.startPos);
+        const endPos = parseInt(contentSpan.dataset.endPos);
+        if (!isNaN(startPos) && !isNaN(endPos)) {
+          highlightInputText(startPos, endPos);
+        }
+      });
+      contentSpan.addEventListener('mouseleave', function () {
+        contentSpan.classList.remove('highlight');
+        clearInputHighlight();
+      });
+    } else {
+      // Mobile: Tap-based highlighting (toggle)
+      contentSpan.addEventListener('touchstart', function (e) {
+        e.preventDefault();
+        handleMobileHighlight(contentSpan);
+      });
+      contentSpan.addEventListener('click', function (e) {
+        e.preventDefault();
+        handleMobileHighlight(contentSpan);
+      });
+    }
+  }
+  /**
+   * Handles mobile highlighting toggle for touch devices.
+   * Toggles highlight on/off when tapping the same span, or switches to new span.
+   * @param {HTMLSpanElement} span - The span to highlight
+   */
+  function handleMobileHighlight(span) {
+    const isCurrentlyHighlighted = span.classList.contains('highlight');
+    // Clear all highlights first
+    clearAllHighlights();
+    // If this span wasn't highlighted before, highlight it now
+    if (!isCurrentlyHighlighted) {
+      span.classList.add('highlight');
+      span.dataset.highlighted = 'true';
+      const startPos = parseInt(span.dataset.startPos);
+      const endPos = parseInt(span.dataset.endPos);
+      if (!isNaN(startPos) && !isNaN(endPos)) {
+        highlightInputText(startPos, endPos);
+      }
+    } else {
+      // If it was highlighted, just clear (already done above)
+      clearInputHighlight();
+    }
+  }
+  /**
+   * Highlights text in the input textarea based on character positions.
+   * @param {number} startPos - Starting character position
+   * @param {number} endPos - Ending character position
+   */
+  function highlightInputText(startPos, endPos) {
+    // Enable focus for programmatic text selection
+    if (isTouchDevice()) {
+      allowInputFocus = true;
+    }
+    inputText.focus();
+    if (typeof inputText.setSelectionRange === 'function') {
+      inputText.setSelectionRange(startPos, endPos);
+      scrollInputToRange(startPos, endPos); // Centre the selection in viewport
+    }
+    // Restore focus prevention
+    if (isTouchDevice()) {
+      allowInputFocus = false;
+    }
+  }
+  /**
+   * Scrolls the textarea so the selected range is vertically centered in the viewport.
+   * Uses a temporary clone to calculate precise text measurements for accurate positioning.
+   * @param {number} startPos - Start position of the selection
+   * @param {number} endPos - End position of the selection
+   */
+  function scrollInputToRange(startPos, endPos) {
+    const style = window.getComputedStyle(inputText);
+    const clone = document.createElement('textarea');
+    // Clone essential styles so scrollHeight matches the real textarea
+    const ESSENTIAL_STYLES = [
+      'width',
+      'fontFamily',
+      'fontSize',
+      'fontWeight',
+      'lineHeight',
+      'letterSpacing',
+      'padding',
+      'border',
+      'boxSizing',
+    ];
+    ESSENTIAL_STYLES.forEach((prop) => (clone.style[prop] = style[prop]));
+    // Position clone off-screen for measurement
+    Object.assign(clone.style, {
+      position: 'absolute',
+      top: '-9999px',
+      height: 'auto',
+    });
+    document.body.appendChild(clone);
+    try {
+      // Calculate height before the selection
+      clone.value = originalInputText.slice(0, startPos);
+      const heightBefore = clone.scrollHeight;
+      // Calculate height of the selection itself
+      clone.value = originalInputText.slice(startPos, endPos);
+      const heightSelection = clone.scrollHeight;
+      // Calculate optimal scroll position to center the selection
+      const viewportHeight = inputText.clientHeight;
+      const targetScrollTop = Math.max(
+        0,
+        heightBefore - viewportHeight / 2 + heightSelection / 2,
+      );
+      inputText.scrollTo({
+        top: targetScrollTop,
+        behavior: UI_CONFIG.SCROLL_SMOOTH_BEHAVIOR,
+      });
+    } finally {
+      // Always cleanup the clone element
+      document.body.removeChild(clone);
+    }
+  }
+  /**
+   * Starts the GSAP loader pulse animation.
+   */
+  function startLoaderAnimation() {
+    const message = document.querySelector('.loader-message');
+    const spinner = document.querySelector('.spinner');
+    if (!message || !spinner) return;
+    gsap.killTweensOf([message, spinner]);
+    gsap.set([message, spinner], { clearProps: 'all' });
+    gsap.to(spinner, {
+      rotation: 360,
+      duration: 1.8,
+      ease: 'none',
+      repeat: -1,
+    });
+    gsap.fromTo(
+      message,
+      {
+        opacity: 0.4,
+        scale: 0.98,
+      },
+      {
+        opacity: 1,
+        scale: 1,
+        duration: 1.2,
+        ease: 'power2.inOut',
+        yoyo: true,
+        repeat: -1,
+      },
+    );
+    gsap.to(message, {
+      color: '#4285F4',
+      duration: 2,
+      ease: 'sine.inOut',
+      yoyo: true,
+      repeat: -1,
+    });
+  }
+  /**
+   * Clears any highlighting in the input textarea.
+   */
+  function clearInputHighlight() {
+    if (document.activeElement === inputText) {
+      inputText.blur();
+    }
+  }
+  const rawOutput = document.getElementById('raw-output');
+  const promptOutput = document.getElementById('prompt-output');
+  /**
+   * Shows or hides the prompt view panel.
+   * @param {boolean} show - Whether to show the prompt view
+   */
+  function showPromptView(show) {
+    if (!promptOutput) return;
+    promptOutput.style.display = show ? 'block' : 'none';
+    inputText.style.display = show ? 'none' : 'block';
+  }
+  if (rawToggle) {
+    rawToggle.addEventListener('change', () => {
+      const showRaw = rawToggle.checked;
+      rawOutput.style.display = showRaw ? 'block' : 'none';
+      outputTextContainer.style.display = showRaw ? 'none' : 'block';
+      const mobileRawToggle = document.getElementById('raw-toggle-mobile');
+      if (mobileRawToggle) {
+        mobileRawToggle.checked = showRaw;
+      }
+      if (showRaw) {
+        setTimeout(() => {
+          const formatter = rawOutput._jsonFormatter;
+          if (formatter && formatter.openAtDepth) {
+            try {
+              formatter.openAtDepth(3);
+              return;
+            } catch (e) {
+              // Fall back to manual clicking
+            }
+          }
+          // Fallback: manually click the root toggler if it's collapsed
+          const rootToggler = rawOutput.querySelector(
+            '.json-formatter-toggler',
+          );
+          if (rootToggler) {
+            const arrow =
+              rootToggler.querySelector('.json-formatter-toggler-link') ||
+              rootToggler;
+            const arrowText = arrow.textContent || arrow.innerText || '';
+            if (arrowText.includes('►') || !arrowText.includes('▼')) {
+              try {
+                rootToggler.click();
+              } catch (e) {
+                console.error('Failed to expand JSON:', e);
+              }
+            }
+          }
+        }, 100);
+      }
+    });
+  }
+  if (promptToggle) {
+    promptToggle.addEventListener('change', () => {
+      const showPrompt = promptToggle.checked;
+      showPromptView(showPrompt);
+      // Synchronize with mobile toggle
+      const mobilePromptToggle = document.getElementById(
+        'prompt-toggle-mobile',
+      );
+      if (mobilePromptToggle) {
+        mobilePromptToggle.checked = showPrompt;
+      }
+    });
+  }
+  // Mobile prompt toggle event handling
+  const mobilePromptToggle = document.getElementById('prompt-toggle-mobile');
+  if (mobilePromptToggle && promptToggle) {
+    mobilePromptToggle.addEventListener('change', () => {
+      const showPrompt = mobilePromptToggle.checked;
+      promptToggle.checked = showPrompt;
+      showPromptView(showPrompt);
+    });
+  }
+  // Mobile raw toggle event handling
+  const mobileRawToggle = document.getElementById('raw-toggle-mobile');
+  if (mobileRawToggle && rawToggle) {
+    mobileRawToggle.addEventListener('change', () => {
+      const showRaw = mobileRawToggle.checked;
+      rawToggle.checked = showRaw;
+      rawToggle.dispatchEvent(new Event('change'));
+    });
+  }
+});
+/**
+ * Scrolls to the output panel to direct user focus to the results area.
+ * Provides improved navigation experience for sample report selection workflow.
+ */
+function scrollToOutput() {
+  const outputContainer = document.getElementById('output-container');
+  if (outputContainer) {
+    // Smooth scroll to the output area
+    outputContainer.scrollIntoView({
+      behavior: 'smooth',
+      block: 'center',
+    });
+  }
+}
+/**
+ * Toggles the interface options panel between expanded and collapsed states.
+ */
+function toggleInterfaceOptions() {
+  const content = document.getElementById('interface-options-content');
+  const icon = document.getElementById('interface-expand-icon');
+  if (content.style.display === 'none' || content.style.display === '') {
+    content.style.display = 'block';
+    icon.classList.add('expanded');
+  } else {
+    content.style.display = 'none';
+    icon.classList.remove('expanded');
+  }
+}
+// Set up event delegation for interface toggle
+document.addEventListener('click', (e) => {
+  if (e.target.closest('[data-action="toggle-interface"]')) {
+    toggleInterfaceOptions();
+  }
+});

static/style.css ADDED Viewed

	@@ -0,0 +1,2239 @@

+/* === Google Material Palette === */
+:root {
+  --google-blue: #1a73e8;
+  --google-blue-dark: #174ea6;
+  --google-blue-light: #4285f4;
+  --google-purple: #9c27b0;
+  --google-purple-dark: #7b1fa2;
+  --google-purple-light: #e1bee7;
+  --google-grey-900: #202124;
+  --google-grey-700: #5f6368;
+  --google-grey-200: #e8eaed;
+  --google-grey-100: #f1f3f4;
+  --google-yellow: #f9ab00;
+}
+body {
+  background-color: var(--google-grey-100);
+  font-family: 'Google Sans Text', sans-serif;
+}
+.elev-1 {
+  box-shadow: 0 1px 3px rgba(60, 64, 67, 0.15);
+}
+/* Header and Banner */
+.header-container {
+  margin-top: 40px;
+  margin-bottom: 32px;
+  text-align: center;
+}
+.header-container h1 {
+  font-family: 'Google Sans', sans-serif;
+  font-weight: 500;
+  letter-spacing: -0.03em;
+  margin: 0 0 12px;
+  font-size: clamp(
+    1.6rem,
+    4vw + 0.5rem,
+    2.75rem
+  ); /* ~26px → 44px fluid scaling */
+  line-height: 1.2; /* tighter on large screens */
+  text-wrap: balance; /* modern browsers balance line lengths */
+  color: var(--google-blue-dark);
+}
+/* Ensure brand-split stays inline by default */
+.brand-split {
+  display: inline;
+}
+.banner {
+  background: #fff;
+  color: var(--google-grey-900);
+  border: 1px solid var(--google-grey-200);
+  box-shadow: 0 1px 3px rgba(60, 64, 67, 0.15);
+  padding: 20px;
+}
+.banner-content h2 {
+  font-family: 'Google Sans', sans-serif;
+  font-weight: 600;
+  font-size: 28px;
+  margin: 0 0 16px;
+  color: var(--google-grey-900);
+}
+.banner-section-title {
+  font-family: 'Google Sans', sans-serif;
+  font-weight: 600;
+  font-size: 18px;
+  color: var(--google-blue-dark);
+  margin: 0 0 12px 0;
+}
+.banner-description {
+  font-family: 'Google Sans Text', sans-serif;
+  font-size: 16px;
+  line-height: 1.6;
+  color: var(--google-grey-700);
+  margin-bottom: 12px;
+}
+.banner-description:last-child {
+  margin-bottom: 0;
+}
+.banner-link {
+  color: var(--google-blue-dark);
+  text-decoration: none;
+}
+.banner-link:hover {
+  text-decoration: underline;
+}
+.banner-note {
+  font-size: 13px;
+  color: var(--google-grey-600);
+  text-align: right;
+  margin-top: 6px;
+}
+.citation-note {
+  font-size: 13px;
+  color: var(--google-grey-600);
+  background: #f8f9fa;
+  border: 1px solid var(--google-grey-200);
+  border-radius: 8px;
+  padding: 10px 16px;
+  margin: 12px 0 0 0;
+  line-height: 1.5;
+  text-align: center;
+}
+.how-to-use {
+  background: rgba(255, 255, 255, 0.1);
+  border-radius: 8px;
+  padding: 20px;
+  border-left: 4px solid rgba(255, 255, 255, 0.3);
+}
+.how-to-use h3 {
+  margin: 0 0 15px 0;
+  font-size: 1.2em;
+  font-weight: 500;
+}
+.how-to-use ol {
+  margin: 0;
+  padding-left: 20px;
+}
+.how-to-use li {
+  margin-bottom: 8px;
+  line-height: 1.5;
+}
+/* Attribution block - semantic grouping */
+.attribution {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  text-align: center;
+}
+/* Google Research Logo */
+.google-research-logo {
+  margin-top: 2px;
+  margin-bottom: 12px;
+}
+.attribution .google-research-logo img {
+  height: 1.5em; /* ~24-26px when subtitle is 16-18px */
+  width: auto;
+  opacity: 1;
+}
+/* Disclaimer */
+.disclaimer-container {
+  margin-top: 8px;
+}
+.disclaimer-box {
+  display: flex;
+  align-items: flex-start;
+  gap: 12px;
+  background: #fffbeb;
+  border: none;
+  border-left: 4px solid var(--google-yellow);
+  border-radius: 12px;
+  padding: 12px 16px;
+  font-size: 14px;
+  line-height: 1.5;
+  box-shadow: 0 1px 3px rgba(60, 64, 67, 0.1);
+}
+.disclaimer-box p {
+  margin: 0;
+}
+.disclaimer-box strong {
+  color: #495057;
+  font-weight: 600;
+}
+.disclaimer-icon {
+  color: var(--google-yellow);
+  font-size: 18px;
+}
+.disclaimer-text {
+  color: var(--google-grey-700);
+}
+/* Sample Reports */
+.samples-container {
+  background-color: #fff;
+  border-radius: 16px;
+  padding: 24px;
+  margin-bottom: 25px;
+  box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
+  border: 1px solid #e9ecef;
+}
+.samples-container h3 {
+  font-family: 'Google Sans', sans-serif;
+  font-size: 20px;
+  font-weight: normal;
+  margin-bottom: 6px;
+}
+.samples-description {
+  font-size: 15px;
+  margin-bottom: 24px;
+  line-height: 1.6;
+  color: var(--google-grey-700);
+}
+.samples-description strong {
+  color: var(--google-blue-dark);
+  font-weight: 600;
+}
+.instruction-step {
+  margin-bottom: 6px;
+  padding-left: 0;
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+.step-number {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  width: 20px;
+  height: 20px;
+  border-radius: 50%;
+  background-color: #e8f0fe;
+  color: var(--google-blue);
+  font-size: 12px;
+  font-weight: 600;
+  flex-shrink: 0;
+}
+/* === SAMPLE REPORT BUTTON GRID ================================= */
+.sample-buttons {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
+  gap: clamp(8px, 1vw, 16px);
+  margin-top: 8px;
+  grid-auto-flow: row;
+  justify-content: center;
+}
+/* Tip under sample buttons */
+.samples-tip {
+  font-family:
+    'Google Sans',
+    -apple-system,
+    BlinkMacSystemFont,
+    'Segoe UI',
+    Roboto,
+    sans-serif;
+  font-size: 14px;
+  color: var(--google-grey-700);
+  background-color: #f8f9fa;
+  padding: 12px 16px;
+  border-radius: 8px;
+  margin-top: 8px;
+  font-weight: 400;
+  line-height: 1.6;
+}
+/* Desktop tip visibility */
+.tip-desktop {
+  display: block;
+}
+/* Mobile tip - show only on touch devices */
+@media (hover: none) {
+  .mobile-tip {
+    margin: 0.75rem 1rem 1rem;
+    font-size: 0.875rem;
+    color: #5f6368;
+    background: #f8f9fa;
+    border-radius: 8px;
+    padding: 0.75rem 1rem;
+    line-height: 1.5;
+    display: block;
+    text-align: left;
+  }
+  .mobile-tip .icon {
+    display: inline;
+    font-size: 1rem;
+    margin-right: 0.25rem;
+  }
+  .mobile-tip strong {
+    color: #1a73e8;
+    font-weight: 600;
+  }
+  .mobile-tip em {
+    font-style: italic;
+    color: #5f6368;
+    font-size: 0.825rem;
+  }
+}
+/* Hide mobile tip on mouse/desktop */
+@media (hover: hover) {
+  .mobile-tip {
+    display: none;
+  }
+}
+/* === CARD ====================================================== */
+.sample-button {
+  display: block;
+  padding: clamp(12px, 1.2vw, 18px);
+  border-radius: 8px;
+  background: var(--google-grey-100);
+  border: 1px solid var(--google-grey-200);
+  color: var(--google-grey-900);
+  cursor: pointer;
+  position: relative;
+  overflow: hidden;
+  transition:
+    transform 0.15s ease,
+    box-shadow 0.15s ease;
+  box-shadow:
+    0 2px 6px rgba(60, 64, 67, 0.15),
+    0 1px 2px rgba(60, 64, 67, 0.1);
+}
+.sample-button-content {
+  text-align: left;
+}
+/* === TYPOGRAPHY =============================================== */
+.sample-title {
+  font-weight: 500;
+  letter-spacing: -0.15px;
+  font-size: clamp(0.95rem, 1.55vw, 1.1rem);
+  line-height: 1.3;
+  margin: 0 0 6px 0;
+}
+.sample-meta {
+  font-size: clamp(0.72rem, 1.2vw, 0.85rem);
+  opacity: 0.8;
+  display: flex;
+  justify-content: flex-start;
+  align-items: center;
+}
+.sample-modality {
+  padding: 0.12em 0.7em;
+  border-radius: 14px;
+  font-weight: 600;
+  background: rgba(255, 255, 255, 0.2);
+  font-size: 12px;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+}
+.sample-modality.mod-ct {
+  background: rgba(66, 133, 244, 0.15);
+  color: var(--google-blue-dark);
+}
+.sample-modality.mod-mri {
+  background: rgba(156, 39, 176, 0.15);
+  color: var(--google-purple-dark);
+}
+.sample-modality.mod-xr {
+  background: rgba(255, 152, 0, 0.15);
+  color: #e65100;
+}
+.sample-modality.mod-us {
+  background: rgba(0, 188, 212, 0.15);
+  color: #00695c;
+}
+.sample-modality.mod-pet {
+  background: rgba(255, 64, 129, 0.15);
+  color: #ad1457;
+}
+.sample-button:hover {
+  background: var(--google-grey-200);
+  border-color: var(--google-blue-light);
+  transform: translateY(-2px);
+  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.12);
+}
+.sample-button:active {
+  transform: none;
+  box-shadow: 0 2px 6px rgba(0, 0, 0, 0.2);
+}
+/* === MINIMUM TOUCH SIZE ======================================= */
+@supports (height: 100lvh) {
+  .sample-button {
+    min-height: 44px;
+  }
+}
+/* Mobile optimization for sample buttons */
+@media (max-width: 768px) {
+  .sample-button {
+    padding: 10px 12px;
+  }
+  .sample-title {
+    font-size: clamp(0.9rem, 2.5vw, 1rem);
+    line-height: 1.3;
+  }
+  .sample-meta {
+    margin-top: 4px;
+  }
+  /* Hide desktop tip on mobile */
+  .tip-desktop {
+    display: none;
+  }
+}
+/* Very narrow phones - horizontal scroll */
+@media (max-width: 360px) {
+  .sample-buttons {
+    display: flex;
+    overflow-x: auto;
+    scroll-snap-type: x mandatory;
+    padding-inline: 4px;
+  }
+  .sample-button {
+    flex: 0 0 80%;
+    margin-inline: 4px;
+    scroll-snap-align: start;
+  }
+}
+/* Loading state */
+.sample-button.loading {
+  pointer-events: none;
+  opacity: 0.6;
+}
+.sample-button.loading::after {
+  content: '';
+  position: absolute;
+  inset: 0;
+  background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"><circle cx="12" cy="12" r="10" stroke="%234285F4" stroke-width="2" stroke-dasharray="31.416" stroke-dashoffset="31.416"><animateTransform attributeName="transform" type="rotate" dur="1s" values="0 12 12;360 12 12" repeatCount="indefinite"/></circle></svg>')
+    center / 24px 24px no-repeat;
+}
+/* Mobile touch device optimizations */
+@media (hover: none) and (pointer: coarse) {
+  .large-text-area {
+    -webkit-user-select: none;
+    user-select: none;
+    -webkit-touch-callout: none;
+    -webkit-tap-highlight-color: transparent;
+  }
+  .output-container,
+  #prompt-output {
+    -webkit-overflow-scrolling: touch;
+    will-change: scroll-position;
+    overscroll-behavior: contain;
+  }
+  .text-span {
+    touch-action: manipulation;
+    -webkit-user-select: none;
+    user-select: none;
+    -webkit-touch-callout: none;
+  }
+}
+/* Text Areas */
+.text-area-container {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: 24px;
+}
+/* Ensure both panels handle long content consistently */
+.large-text-area,
+.output-container {
+  min-width: 0; /* Prevent grid overflow */
+  max-width: 100%; /* Stay within grid bounds */
+  overflow-wrap: break-word; /* Break long words */
+  word-break: break-word; /* Handle long headers */
+}
+.text-area-wrapper {
+  flex: 1;
+}
+.text-area-wrapper h2 {
+  margin: 0 0 15px 0;
+  color: #2c3e50;
+  font-size: 1.3em;
+  font-weight: 500;
+  border-bottom: 2px solid #e9ecef;
+  padding-bottom: 8px;
+}
+.large-text-area {
+  width: 100%;
+  height: clamp(300px, 50vh, 500px);
+  min-height: 300px;
+  padding: 15px;
+  box-sizing: border-box;
+  resize: vertical;
+  border: 1px solid var(--google-grey-200);
+  border-radius: 8px;
+  font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
+  font-size: 14px;
+  line-height: 1.5;
+  transition: border-color 0.3s ease;
+  -webkit-overflow-scrolling: touch;
+  scroll-behavior: smooth;
+}
+.large-text-area:focus {
+  outline: none;
+  border-color: var(--google-blue);
+  box-shadow: 0 0 0 3px rgba(26, 115, 232, 0.2);
+}
+.output-container {
+  width: 100%;
+  height: clamp(300px, 50vh, 500px);
+  min-height: 300px;
+  overflow: auto;
+  border: 2px solid #e9ecef;
+  border-radius: 8px;
+  background-color: #f8f9fa;
+  position: relative;
+  /* Ensure container doesn't expand beyond grid bounds */
+  box-sizing: border-box;
+  -webkit-overflow-scrolling: touch;
+  scroll-behavior: smooth;
+}
+/* Loading Overlay */
+.loading-overlay {
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 100%;
+  height: 100%;
+  background: rgba(240, 248, 255, 0.85);
+  backdrop-filter: blur(2px);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  flex-direction: column;
+  border-radius: 8px;
+  z-index: 10;
+}
+.loader-message {
+  font-family: 'Google Sans', sans-serif;
+  font-size: 15px;
+  font-weight: 500;
+  color: var(--google-blue-dark);
+  display: inline-block;
+}
+.loader-text {
+  margin-top: 24px;
+  text-align: center;
+}
+.spinner {
+  width: 80px;
+  height: 80px;
+  border: 8px solid rgba(26, 115, 232, 0.15);
+  border-top-color: var(--google-blue-dark);
+  border-right-color: var(--google-blue);
+  border-radius: 50%;
+  filter: drop-shadow(0 3px 6px rgba(26, 115, 232, 0.15));
+}
+.sample-button.active {
+  background: var(--google-blue);
+  color: #fff;
+  border-color: var(--google-blue);
+  box-shadow: 0 2px 8px rgba(26, 115, 232, 0.3);
+  transform: translateY(-1px);
+}
+.sample-button.active .sample-modality {
+  background: rgba(255, 255, 255, 0.2);
+  color: #fff;
+}
+.sample-button.active:hover {
+  background: var(--google-blue-light);
+  border-color: var(--google-blue-light);
+  box-shadow: 0 3px 12px rgba(26, 115, 232, 0.4);
+}
+.output-text {
+  width: 100%;
+  height: 100%;
+  padding: 15px;
+  margin: 0;
+  font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
+  border: none;
+  overflow: auto;
+  white-space: normal;
+  word-wrap: break-word;
+  box-sizing: border-box;
+  background-color: transparent;
+  font-size: 14px;
+  line-height: 1.5;
+}
+#output-text {
+  white-space: pre-wrap;
+}
+#predict-button {
+  padding: 12px 24px;
+  background: var(--google-blue);
+  color: white;
+  border: none;
+  border-radius: 6px;
+  cursor: pointer;
+  font-size: 14px;
+  font-weight: 500;
+  transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
+  box-shadow: 0 1px 3px rgba(60, 64, 67, 0.15);
+  margin: 12px auto 8px auto;
+  display: block;
+  position: relative;
+  overflow: hidden;
+}
+#predict-button:hover {
+  background: var(--google-blue-light);
+  transform: translateY(-1px);
+  box-shadow: 0 2px 8px rgba(26, 115, 232, 0.3);
+}
+#predict-button:active {
+  transform: translateY(0);
+  box-shadow: 0 1px 4px rgba(26, 115, 232, 0.3);
+}
+#predict-button:disabled {
+  background: #bdc1c6;
+  cursor: not-allowed;
+  transform: none;
+  box-shadow: 0 1px 2px rgba(60, 64, 67, 0.1);
+}
+.text-span {
+  cursor: pointer;
+  border-radius: 2px;
+  transition: background-color 0.2s;
+}
+/* Style labels within text spans */
+.text-span.has-label .label-part {
+  font-weight: 600;
+  color: var(--google-blue);
+  font-family: 'Google Sans', sans-serif;
+  pointer-events: none; /* Let hover events bubble to parent .text-span */
+}
+.text-span.has-label .content-part {
+  font-weight: normal;
+}
+.text-span:hover {
+  background-color: rgba(66, 165, 245, 0.2);
+  outline: 1px solid rgba(66, 165, 245, 0.3);
+}
+.highlight {
+  background-color: rgba(66, 165, 245, 0.35) !important;
+  outline: 1px solid rgba(66, 165, 245, 0.5) !important;
+}
+/* Input textarea selection - exactly match output highlighting */
+.large-text-area::selection {
+  background-color: rgba(66, 165, 245, 0.35) !important;
+  color: inherit !important;
+}
+.large-text-area::-moz-selection {
+  background-color: rgba(66, 165, 245, 0.35) !important;
+  color: inherit !important;
+}
+#output-text-container {
+  position: relative;
+}
+#input-text-container {
+  position: relative;
+}
+#output-text-container .significance-minor {
+  text-decoration: underline;
+  text-decoration-style: solid;
+  text-decoration-color: #fbc02d; /* subtle yellow */
+  text-decoration-thickness: 2px;
+}
+#output-text-container .significance-significant {
+  text-decoration: underline;
+  text-decoration-style: solid;
+  text-decoration-color: #f48fb1; /* subtle light pink */
+  text-decoration-thickness: 2px;
+}
+/* Report Structure */
+.segment-label {
+  font-weight: bold;
+}
+/* Sub-label styling */
+.segment-sublabel {
+  font-family: 'Google Sans', sans-serif; /* align with headers */
+  font-weight: 600; /* stronger bold for clarity */
+  color: var(--google-grey-900);
+  margin-right: 2px;
+  white-space: nowrap;
+}
+.segment-body {
+  margin-bottom: 12px;
+}
+.section-header {
+  font-family: 'Google Sans', sans-serif;
+  font-weight: 600;
+  font-size: 1.1em;
+  margin-top: 12px;
+  margin-bottom: 2px;
+  color: var(--google-blue-dark);
+}
+textarea::selection {
+  background-color: #ffeb3b;
+  color: #000;
+}
+.instructions {
+  background: var(--google-grey-100);
+  border-radius: 8px;
+  border: 1px solid var(--google-grey-200);
+  padding: 10px 16px;
+  font-size: 14px;
+  color: var(--google-grey-700);
+}
+.instructions p {
+  margin: 0;
+  line-height: 1.6;
+  color: #2c3e50;
+  font-size: 1em;
+}
+.instructions strong {
+  color: #1565c0;
+  font-weight: 600;
+}
+.instructions ul {
+  margin: 15px 0;
+  padding-left: 20px;
+}
+.instructions li {
+  margin-bottom: 8px;
+  line-height: 1.6;
+}
+.instructions li strong {
+  color: #2c3e50;
+}
+.samples-container h3,
+.header-container h1 {
+  font-weight: 400;
+}
+.page-wrapper {
+  max-width: 1200px;
+  margin: 0 auto;
+  padding: 24px;
+}
+.card {
+  background: #fff;
+  border: 1px solid var(--google-grey-200);
+  border-radius: 12px;
+  box-shadow: 0 1px 3px rgba(60, 64, 67, 0.15);
+  padding: 24px;
+}
+.text-area-container {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: 24px;
+}
+.banner.card {
+  padding: 24px;
+  border-radius: 16px;
+  margin-top: 24px;
+  border-color: var(--google-grey-200);
+}
+.banner-content {
+  max-width: 820px;
+  margin: 0 auto;
+  text-align: left;
+}
+.sub-header {
+  margin: 0;
+  font-family: 'Google Sans Text', sans-serif;
+  font-size: 18px;
+  color: var(--google-grey-700);
+}
+/* Modality color tags */
+.sample-modality.ct {
+  background: #e3f2fd;
+  color: #1565c0;
+}
+.sample-modality.mri {
+  background: #e8f5e9;
+  color: #2e7d32;
+}
+.sample-modality.xr {
+  background: #fff3e0;
+  color: #e65100;
+}
+.sample-modality.us {
+  background: #e0f2f1;
+  color: #00695c;
+}
+.sample-modality.pet {
+  background: #fce4ec;
+  color: #ad1457;
+}
+/* keep active chip modality white when selected */
+.sample-button.active .sample-modality.ct,
+.sample-button.active .sample-modality.mri,
+.sample-button.active .sample-modality.xr,
+.sample-button.active .sample-modality.us,
+.sample-button.active .sample-modality.pet {
+  background: rgba(255, 255, 255, 0.2);
+  color: #fff;
+}
+.primary-label {
+  font-family: 'Google Sans Text', sans-serif;
+  font-weight: 500;
+  margin-top: 10px;
+  margin-bottom: 2px;
+  color: var(--google-blue-dark);
+}
+.finding-list {
+  margin: 0 0 4px 0;
+  padding-left: 0;
+  list-style-position: inside;
+}
+.finding-list li {
+  margin-bottom: 6px;
+  line-height: 1.5;
+  padding-left: 0.4em;
+  text-indent: -0.4em;
+}
+.single-finding {
+  margin: 0 0 6px 0;
+}
+@keyframes loaderPulse {
+  0%,
+  100% {
+    opacity: 0.35;
+  }
+  50% {
+    opacity: 0.8;
+  }
+}
+.source-link {
+  display: none;
+}
+.footer-note {
+  font-size: 13px;
+  color: var(--google-grey-600);
+  text-align: center;
+  margin-top: 24px;
+}
+.footer-note .banner-link {
+  font-weight: 500;
+  color: var(--google-blue-dark);
+}
+.footer-note .hug-emoji {
+  font-size: 16px;
+}
+.banner-description + .banner-description {
+  border-top: 1px solid var(--google-grey-200);
+  padding-top: 12px;
+}
+.output-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 2px;
+}
+.copy-button-overlay {
+  position: absolute;
+  bottom: 12px;
+  right: 20px;
+  width: 36px;
+  height: 36px;
+  border-radius: 50%;
+  background-color: rgba(255, 255, 255, 0.6);
+  border: 1px solid rgba(0, 0, 0, 0.06);
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
+  color: #9aa0a6;
+  cursor: pointer;
+  transition: all 0.3s ease;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  backdrop-filter: blur(8px);
+  z-index: 5;
+  opacity: 0.7;
+}
+.copy-button-overlay:hover {
+  background-color: rgba(255, 255, 255, 0.85);
+  box-shadow: 0 2px 6px rgba(0, 0, 0, 0.12);
+  color: #5f6368;
+  opacity: 1;
+}
+.copy-button-overlay:active {
+  transform: scale(0.98);
+}
+.copy-button-overlay.copied {
+  background-color: rgba(230, 247, 255, 0.8);
+  border-color: rgba(24, 144, 255, 0.3);
+  color: #1890ff;
+  opacity: 0.9;
+}
+.copy-button-overlay svg {
+  width: 18px;
+  height: 18px;
+  stroke-width: 1.5;
+}
+.copy-button-overlay.copied svg {
+  display: none;
+}
+.copy-button-overlay.copied::after {
+  content: '✓';
+  font-size: 16px;
+  font-weight: 500;
+}
+.copy-button-overlay:disabled {
+  opacity: 0;
+  pointer-events: none;
+}
+.clear-button-overlay {
+  position: absolute;
+  bottom: 12px;
+  right: 20px;
+  width: 36px;
+  height: 36px;
+  border-radius: 50%;
+  background-color: rgba(255, 255, 255, 0.6);
+  border: 1px solid rgba(0, 0, 0, 0.06);
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
+  color: #9aa0a6;
+  cursor: pointer;
+  transition: all 0.3s ease;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  backdrop-filter: blur(8px);
+  z-index: 5;
+  opacity: 0.7;
+}
+.clear-button-overlay:hover {
+  background-color: rgba(255, 255, 255, 0.85);
+  box-shadow: 0 2px 6px rgba(0, 0, 0, 0.12);
+  color: #5f6368;
+  opacity: 1;
+}
+.clear-button-overlay:active {
+  transform: scale(0.98);
+}
+.clear-button-overlay.cleared {
+  background-color: rgba(230, 247, 255, 0.8);
+  border-color: rgba(24, 144, 255, 0.3);
+  color: #1890ff;
+  opacity: 0.9;
+}
+.clear-button-overlay svg {
+  width: 18px;
+  height: 18px;
+  stroke-width: 1.5;
+}
+.clear-button-overlay.cleared svg {
+  display: none;
+}
+.clear-button-overlay.cleared::after {
+  content: '✓';
+  font-size: 16px;
+  font-weight: 500;
+}
+.clear-button-overlay:disabled {
+  opacity: 0;
+  pointer-events: none;
+}
+.toggle-group {
+  display: flex;
+  gap: 16px;
+  align-items: center;
+}
+.cache-toggle {
+  font-size: 13px;
+  color: var(--google-grey-700);
+  user-select: none;
+}
+.raw-toggle,
+.prompt-toggle {
+  font-size: 13px;
+  color: #546e7a;
+  font-weight: 500;
+  user-select: none;
+  transition: color 0.2s ease;
+}
+.raw-toggle:hover,
+.prompt-toggle:hover {
+  color: #37474f;
+}
+.cache-toggle input,
+.raw-toggle input {
+  margin-right: 4px;
+}
+.cache-status {
+  font-size: 11px;
+  color: var(--google-grey-700);
+  margin-left: 4px;
+  opacity: 0.8;
+}
+.raw-json {
+  font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+  font-size: 12px;
+  line-height: 1.4;
+  background: #fafafa;
+  border: 1px solid var(--google-grey-200);
+  border-radius: 8px;
+  padding: 16px;
+  max-height: 500px;
+  overflow-y: auto;
+  overflow-x: hidden !important;
+  word-wrap: break-word;
+  word-break: break-word;
+  width: 100%;
+  box-sizing: border-box;
+}
+/* Disable animations to prevent flicker */
+.raw-json * {
+  animation: none !important;
+  transition: none !important;
+}
+/* Override JSON formatter table layout for text wrapping */
+.raw-json .json-formatter-row {
+  display: block !important;
+  width: 100% !important;
+  table-layout: auto !important;
+}
+.raw-json .json-formatter-row > * {
+  display: inline !important;
+  max-width: 100% !important;
+}
+/* Force the entire JSON formatter to use block layout */
+.raw-json .json-formatter-table {
+  display: block !important;
+  width: 100% !important;
+  table-layout: fixed !important;
+}
+.raw-json .json-formatter-table,
+.raw-json .json-formatter-table * {
+  table-layout: fixed !important;
+  word-wrap: break-word !important;
+  overflow-wrap: break-word !important;
+}
+/* Basic JSON formatter styling */
+.raw-json .json-formatter-key {
+  color: #0066cc;
+}
+.raw-json .json-formatter-string {
+  color: #22863a;
+  word-wrap: break-word;
+  word-break: break-word;
+  white-space: pre-wrap;
+  display: inline !important;
+  max-width: 100% !important;
+  vertical-align: top !important;
+  overflow-wrap: break-word !important;
+}
+.raw-json .json-formatter-number {
+  color: #005cc5;
+}
+.raw-json .json-formatter-boolean {
+  color: #d73a49;
+}
+.raw-json .json-formatter-null {
+  color: #6f42c1;
+}
+.raw-json .json-formatter-toggler {
+  color: #586069;
+  cursor: pointer;
+}
+.raw-json .json-formatter-toggler:hover {
+  color: #0366d6;
+}
+/* Force text wrapping for all elements */
+.raw-json .json-formatter-string,
+.raw-json .json-formatter-key,
+.raw-json .json-formatter-row,
+.raw-json .json-formatter-row *,
+.raw-json .json-formatter-preview {
+  max-width: 100% !important;
+  overflow-wrap: break-word !important;
+  word-break: break-word !important;
+  white-space: pre-wrap !important;
+  box-sizing: border-box !important;
+  min-width: 0 !important;
+  width: auto !important;
+}
+/* Prevent horizontal scrolling and text cutoff */
+.raw-json,
+.raw-json * {
+  max-width: 100% !important;
+  word-wrap: break-word !important;
+  overflow-wrap: break-word !important;
+  hyphens: auto !important;
+  overflow-x: hidden !important;
+}
+#raw-output {
+  width: 100% !important;
+  max-width: 100% !important;
+  box-sizing: border-box !important;
+  overflow-wrap: break-word !important;
+}
+.input-header,
+.output-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  gap: 12px;
+  flex-wrap: nowrap;
+  border-bottom: 2px solid #e9ecef;
+  padding-bottom: 4px;
+  margin-bottom: 8px;
+}
+.input-controls {
+  display: flex;
+  align-items: center;
+  gap: 16px;
+  flex-wrap: wrap;
+}
+/* Panel controls at bottom of Input/Output container */
+.panel-controls {
+  display: flex !important;
+  justify-content: center !important;
+  align-items: center !important;
+  gap: 16px !important;
+  padding: 4px 16px !important;
+  border-top: 1px solid #e9ecef !important;
+  background: #fafbfc !important;
+  border-radius: 0 0 16px 16px !important;
+  margin-top: 0px !important;
+  margin-left: auto !important;
+  margin-right: auto !important;
+  flex-wrap: wrap !important;
+  text-align: center !important;
+  width: 100% !important;
+  box-sizing: border-box !important;
+  grid-column: 1 / -1 !important;
+}
+.input-header h2,
+.output-header h2 {
+  margin: 0;
+  line-height: 1.2;
+}
+.text-area-wrapper h2 {
+  border: none;
+}
+#prompt-output {
+  width: 100%;
+  height: clamp(300px, 50vh, 500px);
+  min-height: 300px;
+  overflow: auto;
+  border: 2px solid #e9ecef;
+  border-radius: 8px;
+  background: #fafafa;
+  padding: 16px;
+  box-sizing: border-box;
+  font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
+  font-size: 12px;
+  line-height: 1.4;
+  white-space: normal;
+  word-wrap: break-word;
+  word-break: break-word;
+  overflow-wrap: break-word;
+  -webkit-overflow-scrolling: touch;
+  scroll-behavior: smooth;
+}
+/* Markdown styling for prompt output */
+#prompt-output h1 {
+  font-family: 'Google Sans', sans-serif;
+  font-size: 18px;
+  font-weight: 600;
+  color: var(--google-blue-dark);
+  margin: 0 0 16px 0;
+  padding-bottom: 8px;
+  border-bottom: 2px solid var(--google-grey-200);
+}
+#prompt-output h2 {
+  font-family: 'Google Sans', sans-serif;
+  font-size: 16px;
+  font-weight: 500;
+  color: var(--google-blue-dark);
+  margin: 24px 0 12px 0;
+}
+#prompt-output h3 {
+  font-family: 'Google Sans', sans-serif;
+  font-size: 14px;
+  font-weight: 500;
+  color: var(--google-grey-700);
+  margin: 16px 0 8px 0;
+}
+#prompt-output strong {
+  font-weight: 600;
+  color: var(--google-blue-dark);
+}
+#prompt-output blockquote {
+  margin: 12px 0;
+  padding: 8px 16px;
+  border-left: 4px solid var(--google-blue);
+  background: rgba(26, 115, 232, 0.05);
+  font-style: italic;
+}
+#prompt-output code {
+  font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace;
+  background: rgba(0, 0, 0, 0.05);
+  padding: 2px 4px;
+  border-radius: 3px;
+  font-size: 11px;
+}
+#prompt-output pre {
+  background: #f5f5f5;
+  border: 1px solid var(--google-grey-200);
+  border-radius: 6px;
+  padding: 12px;
+  overflow-x: auto;
+  margin: 12px 0;
+  font-size: 11px;
+  line-height: 1.3;
+  word-wrap: break-word;
+  white-space: pre-wrap;
+}
+#prompt-output pre code {
+  background: none;
+  padding: 0;
+  border-radius: 0;
+}
+#prompt-output ul,
+#prompt-output ol {
+  margin: 8px 0;
+  padding-left: 20px;
+}
+#prompt-output li {
+  margin-bottom: 4px;
+  line-height: 1.4;
+}
+#prompt-output p {
+  margin: 8px 0;
+  line-height: 1.5;
+  font-family: 'Google Sans Text', sans-serif;
+  font-size: 13px;
+}
+.no-bold {
+  font-weight: 400;
+}
+.model-select-container {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  font-size: 14px;
+  margin: 0;
+}
+.model-select-container select {
+  padding: 6px 12px;
+  border-radius: 6px;
+  border: 1px solid var(--google-grey-200);
+  font-size: 14px;
+  background: #fff;
+  cursor: pointer;
+}
+.model-select-container select:focus {
+  outline: none;
+  border-color: var(--google-blue);
+  box-shadow: 0 0 0 3px rgba(26, 115, 232, 0.2);
+}
+.model-select-container label {
+  font-weight: 500;
+  color: var(--google-grey-700);
+}
+.banner-divider {
+  border: none;
+  border-top: 1px solid var(--google-grey-200);
+  margin: 24px 0 18px 0;
+}
+/* Interface Options Panel */
+.interface-options-panel {
+  margin: 16px 0;
+  padding: 16px !important; /* Override default card padding for more compact layout */
+}
+.interface-options-header {
+  cursor: pointer;
+  transition: background-color 0.2s ease;
+  border-radius: 8px;
+  padding: 4px;
+  margin: -4px;
+}
+.interface-options-header:hover {
+  background-color: var(--google-grey-100);
+}
+.interface-options-title {
+  font-family: 'Google Sans', sans-serif;
+  font-size: 20px;
+  font-weight: normal;
+  margin: 0 0 6px 0;
+  color: var(--google-grey-900);
+}
+.interface-options-summary {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  font-size: 13px;
+  color: var(--google-grey-700);
+  margin-bottom: 4px;
+}
+.expand-icon {
+  color: var(--google-blue-dark);
+  transition:
+    transform 0.3s ease,
+    color 0.2s ease,
+    background-color 0.2s ease;
+  font-size: 20px;
+  cursor: pointer;
+  opacity: 0.9;
+  padding: 3px;
+  border-radius: 50%;
+  background-color: transparent;
+}
+.expand-icon:hover {
+  color: var(--google-blue);
+  opacity: 1;
+  background-color: rgba(66, 133, 244, 0.1);
+  transform: scale(1.05);
+}
+.expand-icon.expanded {
+  transform: rotate(180deg);
+}
+.expand-icon.expanded:hover {
+  transform: rotate(180deg) scale(1.05);
+}
+.interface-options-content {
+  margin-top: 10px;
+  transition: all 0.3s ease;
+}
+.interface-options-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+  gap: 14px;
+  margin-bottom: 10px;
+}
+.interface-option {
+  padding: 12px;
+  border: 1px solid var(--google-grey-200);
+  border-radius: 8px;
+  background: #fff;
+  transition: border-color 0.2s ease;
+}
+.interface-option:hover {
+  border-color: var(--google-blue-light);
+}
+.option-header {
+  display: flex;
+  align-items: center;
+  margin-bottom: 6px;
+}
+.option-icon {
+  font-size: 18px;
+  color: var(--google-blue);
+  margin-right: 6px;
+}
+.option-header strong {
+  font-family: 'Google Sans', sans-serif;
+  font-size: 14px;
+  color: var(--google-grey-900);
+}
+.option-description {
+  font-size: 13px;
+  color: var(--google-grey-700);
+  line-height: 1.3;
+  margin: 0;
+}
+.interface-options-note {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  padding: 8px 12px;
+  background: var(--google-grey-100);
+  border-radius: 6px;
+  font-size: 12px;
+  color: var(--google-grey-700);
+}
+.note-icon {
+  font-size: 16px;
+  color: var(--google-blue);
+  margin-right: 6px;
+}
+/* Clinical Significance Legend */
+.clinical-significance-legend {
+  margin: 0 0 4px 0;
+  padding: 6px 12px;
+  background: #fff;
+  border: 1px solid var(--google-grey-200);
+  border-radius: 6px;
+  display: flex;
+  justify-content: flex-end;
+  align-items: center;
+  gap: 16px;
+  font-size: 12px;
+}
+.legend-title {
+  font-weight: 600;
+  color: var(--google-grey-700);
+  font-size: 12px;
+}
+.legend-item {
+  display: inline-flex;
+  align-items: center;
+  color: var(--google-grey-700);
+  font-weight: 500;
+}
+.legend-line {
+  display: inline-block;
+  width: 18px;
+  height: 2px;
+  border-radius: 1px;
+  margin-right: 4px;
+}
+.legend-line.minor {
+  background-color: #fbc02d;
+}
+.legend-line.major {
+  background-color: #f48fb1;
+}
+.legend-line.grounding {
+  background-color: rgba(66, 165, 245, 0.7);
+  border: 1px solid rgba(66, 165, 245, 0.8);
+}
+.action-bar {
+  margin-top: 16px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap: 16px;
+  position: relative;
+}
+.action-bar #predict-button {
+  margin: 0;
+}
+/* Social sharing buttons */
+/* Top header share placement */
+.share-top {
+  display: none;
+  align-items: center;
+  gap: 8px;
+  font-size: 14px;
+  margin: 8px 0 24px 0;
+  justify-content: center;
+  color: var(--google-grey-700);
+}
+/* Bottom share placement */
+.share-bottom {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  font-size: 12px;
+  margin: 48px 0 8px 0;
+  justify-content: center;
+  flex-wrap: wrap;
+  color: var(--google-grey-600);
+  opacity: 0.85;
+}
+/* Bottom share button styling */
+.share-bottom .shr-btn {
+  width: 24px;
+  height: 24px;
+  background: transparent;
+  border: none;
+  opacity: 0.7;
+}
+.share-bottom .shr-btn svg {
+  width: 18px;
+  height: 18px;
+}
+.share-bottom .shr-btn:hover {
+  background: #f8f9fa;
+  opacity: 1;
+  transform: none;
+  box-shadow: none;
+}
+/* Common share button styling */
+.shr-btn {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  width: 32px;
+  height: 32px;
+  border-radius: 50%;
+  background: #f8f9fa;
+  color: #5f6368;
+  transition: all 0.2s ease;
+  text-decoration: none;
+  border: 1px solid #e8eaed;
+}
+.shr-btn:hover {
+  background: #e8f0fe;
+  transform: translateY(-1px);
+  box-shadow: 0 2px 8px rgba(66, 133, 244, 0.2);
+}
+.shr-btn:focus {
+  outline: 2px solid var(--google-blue);
+  outline-offset: 2px;
+}
+.shr-btn.shr-x {
+  color: #1da1f2;
+}
+.shr-btn.shr-x:hover {
+  background: #e3f2fd;
+  color: #1976d2;
+}
+.shr-btn.shr-li {
+  color: #0a66c2;
+}
+.shr-btn.shr-li:hover {
+  background: #e3f2fd;
+  color: #1565c0;
+}
+/* Mobile-only toggles - hidden on desktop */
+.mobile-toggle {
+  display: none;
+}
+/* Mobile optimizations */
+@media (max-width: 768px) {
+  .share-bottom {
+    margin-top: 20px;
+  }
+  .shr-btn {
+    width: 28px;
+    height: 28px;
+  }
+  .shr-btn svg {
+    width: 16px;
+    height: 16px;
+  }
+  /* Center Input and Output headers on mobile */
+  .input-header,
+  .output-header {
+    justify-content: center;
+    text-align: center;
+  }
+  /* Mobile panel controls - 2-column grid layout */
+  .panel-controls {
+    display: grid !important;
+    grid-template-columns: repeat(2, 1fr);
+    column-gap: 8px;
+    row-gap: 8px;
+    padding: 12px 16px;
+    justify-items: stretch;
+    align-items: center;
+    border-top: 1px solid #e9ecef;
+    background: #fafbfc;
+  }
+  /* Grid item styling for all controls */
+  .panel-controls .model-select-container,
+  .panel-controls .cache-toggle,
+  .panel-controls .prompt-toggle.mobile-toggle,
+  .panel-controls .raw-toggle.mobile-toggle {
+    font-size: 12px !important;
+    color: var(--google-grey-700) !important;
+    font-weight: normal !important;
+    user-select: none;
+    display: flex;
+    align-items: center;
+    gap: 4px;
+    transition: none !important;
+    text-decoration: none !important;
+  }
+  /* Hide desktop header toggles on mobile */
+  .input-header .prompt-toggle,
+  .output-header .raw-toggle {
+    display: none;
+  }
+  /* Hide cache status text on mobile */
+  .cache-status {
+    display: none;
+  }
+  /* Standardized checkbox sizing */
+  .panel-controls label input {
+    width: 16px;
+    height: 16px;
+    margin-right: 4px !important;
+  }
+  /* Uniform checkbox appearance */
+  .panel-controls .cache-toggle input,
+  .panel-controls .prompt-toggle.mobile-toggle input,
+  .panel-controls .raw-toggle.mobile-toggle input {
+    appearance: auto;
+    margin: 0 4px 0 0 !important;
+    padding: 0;
+    border: none;
+    background: none;
+    width: 16px !important;
+    height: 16px !important;
+  }
+  /* Remove hover effects on mobile */
+  .panel-controls .cache-toggle:hover,
+  .panel-controls .prompt-toggle.mobile-toggle:hover,
+  .panel-controls .raw-toggle.mobile-toggle:hover {
+    color: var(--google-grey-700) !important;
+    transition: none !important;
+    transform: none !important;
+  }
+  /* Mobile disclaimer styling */
+  .disclaimer-container {
+    margin-top: 24px;
+  }
+  .disclaimer-box {
+    font-size: 12px !important;
+    padding: 10px 14px !important;
+    background: #fafbfc !important;
+    border-left: 3px solid #e8c547 !important;
+    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.05) !important;
+    border-radius: 8px !important;
+    gap: 10px !important;
+  }
+  .disclaimer-icon {
+    font-size: 16px !important;
+    color: #e8c547 !important;
+  }
+  .disclaimer-text {
+    color: #6b7280 !important;
+    line-height: 1.4 !important;
+  }
+  /* Mobile Google Research logo */
+  .google-research-logo {
+    margin-top: 2px !important;
+    margin-bottom: 20px !important;
+  }
+  .attribution .google-research-logo img {
+    height: 1.4em !important; /* ~21-22px when subtitle is 15-16px */
+    opacity: 1 !important;
+  }
+}
+/* Small mobile devices - tighter logo scaling */
+@media (max-width: 480px) {
+  .attribution .google-research-logo img {
+    height: 1.3em !important; /* ~20px, prevents headline competition */
+  }
+}
+/* Print styles - ensure logo visibility */
+@media print {
+  .attribution .google-research-logo img {
+    opacity: 1 !important;
+  }
+}
+/* Position legend on the right - desktop only */
+@media (min-width: 769px) {
+  .clinical-significance-legend {
+    position: absolute;
+    right: 0;
+    font-size: 0.8em;
+  }
+}
+/* === MOBILE RESPONSIVE OPTIMIZATIONS === */
+/* Mobile breakpoint for tablets and phones */
+@media (max-width: 768px) {
+  /* Smaller citation note on mobile */
+  .citation-note {
+    font-size: 12px;
+    padding: 8px 14px;
+    margin: 10px 0 0 0;
+  }
+  /* Page wrapper adjustments for mobile */
+  .page-wrapper {
+    padding: 16px;
+    max-width: 100%;
+  }
+  /* Header optimization for mobile */
+  .header-container h1 {
+    margin-bottom: 8px;
+    margin-top: 1.2rem; /* Reduce top margin to keep content above fold */
+  }
+}
+/* Optional forced break for perfect balance on very narrow screens */
+@media (max-width: 430px) {
+  .brand-split {
+    display: block; /* Forces line break on very narrow screens */
+  }
+  .sub-header {
+    font-size: 16px;
+  }
+  /* Card padding optimization for mobile */
+  .card {
+    padding: 16px;
+    border-radius: 8px;
+  }
+  .banner.card {
+    padding: 16px;
+    border-radius: 8px;
+    margin-top: 16px;
+  }
+  /* Keep side-by-side but optimize for mobile */
+  .text-area-container {
+    grid-template-columns: 1fr 1fr !important;
+    gap: 12px !important;
+  }
+  .large-text-area {
+    height: clamp(300px, 55vh, 450px);
+    min-height: 300px;
+    font-size: 13px !important;
+  }
+  .output-container {
+    height: clamp(300px, 55vh, 450px);
+    min-height: 300px;
+  }
+  .output-text {
+    font-size: 13px !important;
+  }
+  #prompt-output {
+    height: clamp(300px, 55vh, 450px);
+    min-height: 300px;
+  }
+  /* Compact tip text for mobile */
+  .samples-tip {
+    font-size: 12px !important;
+    padding: 8px 12px !important;
+    margin-top: 12px !important;
+    line-height: 1.4 !important;
+  }
+  .output-container,
+  .large-text-area,
+  #prompt-output {
+    -webkit-overflow-scrolling: touch !important;
+    scroll-behavior: smooth !important;
+    transform: translateZ(0) !important;
+    -webkit-transform: translateZ(0) !important;
+  }
+  /* Mobile highlighting and touch interactions */
+  .text-span {
+    cursor: pointer !important;
+    padding: 2px 1px !important;
+    margin: 1px 0 !important;
+    border-radius: 3px !important;
+    transition: all 0.2s ease !important;
+    /* Increase touch target size slightly */
+    min-height: 18px !important;
+    display: inline !important;
+    white-space: normal !important; /* ensure wrapping inside span */
+    overflow-wrap: anywhere !important; /* long words like "FDG/CT" */
+    word-break: break-word !important; /* safety net */
+    position: relative !important;
+    touch-action: manipulation !important;
+  }
+  .text-span:hover,
+  .text-span:active {
+    background-color: rgba(66, 165, 245, 0.2) !important;
+    outline: 1px solid rgba(66, 165, 245, 0.3) !important;
+    transform: none !important; /* Disable transform on mobile */
+  }
+  /* On mobile, add subtle visual cue that elements are tappable */
+  @media (hover: none) and (pointer: coarse) {
+    .text-span {
+      position: relative;
+    }
+    .text-span:active {
+      transform: scale(0.98) !important;
+      transition: transform 0.1s ease !important;
+    }
+  }
+  .text-span.highlight {
+    background-color: rgba(66, 165, 245, 0.35) !important;
+    outline: 1px solid rgba(66, 165, 245, 0.5) !important;
+    font-weight: 500 !important;
+  }
+  /* Input textarea selection - exactly match output highlighting */
+  .large-text-area::selection {
+    background-color: rgba(66, 165, 245, 0.35) !important;
+    color: inherit !important;
+  }
+  .large-text-area::-moz-selection {
+    background-color: rgba(66, 165, 245, 0.35) !important;
+    color: inherit !important;
+  }
+  /* Header adjustments for mobile - make more compact */
+  .input-header,
+  .output-header {
+    flex-direction: row !important;
+    flex-wrap: wrap !important;
+    align-items: center !important;
+    gap: 6px !important;
+    margin-bottom: 6px !important;
+  }
+  .input-header h2,
+  .output-header h2 {
+    font-size: 1em !important;
+    margin: 0 !important;
+  }
+  /* Compact toggles for mobile headers */
+  .prompt-toggle,
+  .raw-toggle {
+    font-size: 10px !important;
+  }
+  .prompt-toggle label,
+  .raw-toggle label {
+    font-size: 10px !important;
+  }
+  /* Make toggle labels specifically smaller */
+  .prompt-toggle,
+  .raw-toggle {
+    color: var(--google-grey-700) !important;
+    font-weight: 400 !important;
+  }
+  /* Control panels optimization for mobile */
+  .panel-controls {
+    flex-direction: row !important;
+    flex-wrap: wrap !important;
+    justify-content: center !important;
+    gap: 8px !important;
+    padding: 8px !important;
+  }
+  /* ========== Mobile layout for Process button + legend ========== */
+  .action-bar {
+    display: flex; /* already in your base CSS */
+    flex-direction: column; /* vertical stack */
+    align-items: center; /* centre both items */
+    gap: 32px;
+    margin-top: 24px;
+  }
+  /* Process button - REDUCED SIZE FOR MOBILE */
+  #predict-button {
+    /* width 100% on small screens, but <=200px as you had */
+    width: 100%;
+    max-width: 180px;
+    padding: 10px 16px;
+    font-size: 14px;
+    /* no position, no z-index, no order change needed */
+  }
+  /* Legend – full-width, centred text */
+  .clinical-significance-legend {
+    display: flex; /* keep the flex row */
+    flex-wrap: wrap;
+    justify-content: center;
+    gap: 8px;
+    width: 100%;
+    max-width: 320px;
+    text-align: center; /* labels do not hug the left */
+    font-size: 0.75em; /* you already tested this */
+    /* Normal flow item – no position / top / left / z-index */
+  }
+  .legend-item {
+    font-size: 1em;
+  }
+  /* Interface options panel for mobile */
+  .interface-options-grid {
+    grid-template-columns: 1fr !important;
+    gap: 12px !important;
+  }
+  .interface-option {
+    padding: 12px !important;
+  }
+  /* Mobile toggle standardization */
+  .cache-toggle,
+  .raw-toggle,
+  .prompt-toggle,
+  .mobile-toggle {
+    font-size: 12px !important;
+    color: var(--google-grey-700) !important;
+    font-weight: normal !important;
+    transition: none !important;
+  }
+  .model-select-container {
+    text-align: center !important;
+  }
+  .model-select-container label {
+    font-size: 12px !important;
+  }
+  #model-select {
+    padding: 6px !important;
+    font-size: 12px !important;
+  }
+  /* Cache status text */
+  .cache-status {
+    font-size: 11px !important;
+  }
+}
+/* Smaller mobile phones */
+@media (max-width: 480px) {
+  /* Even smaller citation note on small phones */
+  .citation-note {
+    font-size: 11px;
+    padding: 6px 12px;
+  }
+  .page-wrapper {
+    padding: 8px;
+  }
+  .header-container h1 {
+    font-size: 22px;
+  }
+  .card {
+    padding: 8px;
+  }
+  /* Even more compact for very small screens */
+  .text-area-container {
+    gap: 8px !important;
+  }
+  .large-text-area,
+  .output-container,
+  #prompt-output {
+    height: clamp(250px, 50vh, 400px);
+    min-height: 250px;
+  }
+  .large-text-area {
+    font-size: 12px !important;
+  }
+  .output-text {
+    font-size: 12px !important;
+  }
+  /* Even more compact tip for very small screens */
+  .samples-tip {
+    font-size: 11px !important;
+    padding: 6px 10px !important;
+    margin-top: 8px !important;
+    line-height: 1.3 !important;
+  }
+  /* Smaller button for very small screens */
+  #predict-button {
+    max-width: 160px !important;
+    padding: 8px 14px !important;
+    font-size: 13px !important;
+  }
+  /* Tighter spacing for very small screens */
+  .action-bar {
+    gap: 28px; /* slightly smaller gap for tiny screens */
+    margin-top: 20px;
+  }
+  .clinical-significance-legend {
+    font-size: 0.65em; /* smaller text for tiny screens */
+    max-width: 300px; /* slightly narrower */
+  }
+  .input-header h2,
+  .output-header h2 {
+    font-size: 0.9em !important;
+  }
+  /* Extra small toggle labels for tiny screens */
+  .prompt-toggle,
+  .raw-toggle {
+    font-size: 9px !important;
+  }
+  .prompt-toggle label,
+  .raw-toggle label {
+    font-size: 9px !important;
+  }
+}
+/* Simple Error Message Styling */
+.error-message-simple {
+  background-color: #fef2f2;
+  border: 1px solid #fecaca;
+  border-radius: 8px;
+  padding: 24px;
+  margin: 16px 0;
+  text-align: center;
+  animation: fadeIn 0.3s ease-in;
+}
+.error-message-simple h3 {
+  color: #991b1b;
+  margin: 0 0 16px 0;
+  font-size: 1.35rem;
+  font-weight: 600;
+  font-family: 'Google Sans', sans-serif;
+}
+.error-message-simple p {
+  margin: 0 0 12px 0;
+  line-height: 1.6;
+  color: #7f1d1d;
+}
+.error-message-simple strong {
+  color: #991b1b;
+  font-weight: 600;
+}
+.error-message-simple .suggestion {
+  color: #92400e;
+  font-style: italic;
+  margin-bottom: 20px;
+}
+.error-message-simple .deploy-note {
+  background-color: #fef3c7;
+  border-radius: 6px;
+  padding: 12px;
+  margin-top: 16px;
+  font-size: 0.9rem;
+  color: #78350f;
+}
+.error-message-simple .deploy-note strong {
+  color: #92400e;
+}
+@keyframes fadeIn {
+  from {
+    opacity: 0;
+    transform: translateY(-10px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+/* Copy and clear button overlays always use light mode styling */
+/* Error messages always use light mode styling */

structure_report.py ADDED Viewed

	@@ -0,0 +1,734 @@

+"""Core radiology report structuring functionality using LangExtract.
+This module provides the RadiologyReportStructurer class that processes raw
+radiology reports into structured segments categorized as prefix, body, or suffix sections with clinical significance annotations (normal, minor, significant).
+The structuring uses LangExtract with example-guided prompting to extract segments with character intervals that enable interactive hover-to-highlight functionality in the web frontend.
+Backend-Frontend Integration:
+- Backend generates segments with character intervals (startPos/endPos)
+- Frontend creates interactive spans that highlight corresponding input text on hover
+- Significance levels drive CSS styling for visual differentiation
+- Segment types organize content into structured sections (EXAMINATION, FINDINGS, IMPRESSION)
+Example usage:
+    structurer = RadiologyReportStructurer(
+        api_key="your_api_key",
+        model_id="gemini-2.5-flash"
+    )
+    result = structurer.predict("FINDINGS: Normal chest CT...")
+"""
+import collections
+import dataclasses
+import itertools
+from enum import Enum
+from functools import wraps
+from typing import Any, TypedDict
+import langextract as lx
+import langextract.data
+import prompt_instruction
+import prompt_lib
+import report_examples
+class FrontendIntervalDict(TypedDict):
+    """Character interval for frontend with startPos and endPos."""
+    startPos: int
+    endPos: int
+class SegmentDict(TypedDict):
+    """Segment dictionary for JSON response."""
+    type: str
+    label: str | None
+    content: str
+    intervals: list[FrontendIntervalDict]
+    significance: str | None
+class SerializedExtractionDict(TypedDict):
+    """Serialized extraction for JSON response."""
+    extraction_text: str | None
+    extraction_class: str | None
+    attributes: dict[str, str] | None
+    char_interval: dict[str, int | None] | None
+    alignment_status: str | None
+class ResponseDict(TypedDict):
+    """Complete response dictionary structure."""
+    segments: list[SegmentDict]
+    annotated_document_json: dict[str, Any]
+    text: str
+    raw_prompt: str
+FINDINGS_HEADER = "FINDINGS:"
+IMPRESSION_HEADER = "IMPRESSION:"
+EXAMINATION_HEADER = "EXAMINATION:"
+SECTION_ATTRIBUTE_KEY = "section"
+START_POSITION = "startPos"
+END_POSITION = "endPos"
+EXAM_PREFIXES = ("EXAMINATION:", "EXAM:", "STUDY:")
+EXAMINATION_LABEL = "examination"
+PREFIX_LABEL = "prefix"
+SIGNIFICANCE_NORMAL = "normal"
+SIGNIFICANCE_MINOR = "minor"
+SIGNIFICANCE_SIGNIFICANT = "significant"
+SIGNIFICANCE_NOT_APPLICABLE = "not_applicable"
+def _initialize_langextract_patches():
+    """Initialize LangExtract patches for proper alignment behavior.
+    This function applies necessary patches to LangExtract's Resolver.align method to force accept_match_lesser=False and set fuzzy_alignment_threshold to 0.50. This should be called before using LangExtract functionality.
+    Note: This is a temporary workaround until LangExtract exposes
+    accept_match_lesser and fuzzy_alignment_threshold parameters via its public API.
+    """
+    # Store original method
+    original_align = lx.resolver.Resolver.align
+    @wraps(original_align)
+    def _align_patched(self, *args, **kwargs):
+        # Set default if not explicitly provided
+        kwargs.setdefault("accept_match_lesser", False)
+        # Set fuzzy matching threshold to 0.50
+        kwargs.setdefault("fuzzy_alignment_threshold", 0.50)
+        return original_align(self, *args, **kwargs)
+    # Apply the patch
+    lx.resolver.Resolver.align = _align_patched
+class ReportSectionType(Enum):
+    """Enum representing sections of a radiology report with their extraction class names."""
+    PREFIX = "findings_prefix"
+    BODY = "findings_body"
+    SUFFIX = "findings_suffix"
+    @property
+    def display_name(self) -> str:
+        """Returns the lowercase section type name for display purposes."""
+        return self.name.lower()
+@dataclasses.dataclass
+class Segment:
+    """Represents a single merged segment of text in the final structured report.
+    Attributes:
+        type: The section type (prefix, body, or suffix).
+        label: Optional section label for organization.
+        content: The text content of this segment.
+        intervals: List of character position intervals.
+        significance: Optional clinical significance indicator.
+    """
+    type: ReportSectionType
+    label: str | None
+    content: str
+    intervals: list[FrontendIntervalDict]
+    significance: str | None = None
+    def to_dict(self) -> SegmentDict:
+        """Converts the segment to a dictionary representation.
+        Returns:
+            A dictionary containing all segment data with type as display name.
+        """
+        return SegmentDict(
+            type=self.type.display_name,
+            label=self.label,
+            content=self.content,
+            intervals=self.intervals,
+            significance=self.significance,
+        )
+class RadiologyReportStructurer:
+    """Structures radiology reports using LangExtract and large language models.
+    This class processes raw radiology report text and converts it
+    into structured segments categorized as prefix, body, or suffix
+    sections with appropriate labeling and clinical significance annotations.
+    """
+    api_key: str | None
+    model_id: str
+    temperature: float
+    examples: list[langextract.data.ExampleData]
+    _patches_initialized: bool
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model_id: str = "gemini-2.5-flash",
+        temperature: float = 0.0,
+    ):
+        """Initializes the RadiologyReportStructurer.
+        Args:
+            api_key: API key for the language model service.
+            model_id: Identifier for the specific model to use.
+            temperature: Sampling temperature for model generation.
+        """
+        self.api_key = api_key
+        self.model_id = model_id
+        self.temperature = temperature
+        self.examples = report_examples.get_examples_for_model()
+        self._patches_initialized = False
+    def _ensure_patches_initialized(self):
+        """Ensure LangExtract patches are initialized before use."""
+        if not self._patches_initialized:
+            _initialize_langextract_patches()
+            self._patches_initialized = True
+    def _generate_formatted_prompt_with_examples(
+        self, input_text: str | None = None
+    ) -> str:
+        """Generates a comprehensive, markdown-formatted prompt including examples.
+        Args:
+            input_text: Optional input text to include in the prompt display.
+        Returns:
+            A markdown-formatted string containing the full prompt and examples.
+        """
+        return prompt_lib.generate_markdown_prompt(self.examples, input_text)
+    def predict(self, report_text: str, max_char_buffer: int = 2000) -> ResponseDict:
+        """Processes a radiology report text into structured format.
+        Takes raw radiology report text and uses LangExtract with example-guided
+        prompting to extract structured segments with character intervals and
+        clinical significance annotations.
+        Args:
+            report_text: Raw radiology report text to be processed.
+            max_char_buffer: Maximum character buffer size for processing.
+        Returns:
+            A dictionary containing:
+                - segments: List of structured report segments
+                - annotated_document_json: Raw extraction results
+                - text: Formatted text representation
+        Raises:
+            ValueError: If report_text is empty or whitespace-only.
+        """
+        if not report_text.strip():
+            raise ValueError("Report text cannot be empty")
+        try:
+            result = self._perform_langextract(report_text, max_char_buffer)
+            return self._build_response(result, report_text)
+        except (ValueError, TypeError, AttributeError) as e:
+            return ResponseDict(
+                text=f"Error processing report: {str(e)}",
+                segments=[],
+                annotated_document_json={},
+                raw_prompt="",
+            )
+    def _perform_langextract(
+        self, report_text: str, max_char_buffer: int
+    ) -> langextract.data.AnnotatedDocument:
+        """Performs LangExtract processing on the input text.
+        Args:
+            report_text: Raw radiology report text to be processed.
+            max_char_buffer: Maximum character buffer size for processing.
+        Returns:
+            LangExtract result object containing extractions.
+        Raises:
+            ValueError: If LangExtract processing fails.
+            TypeError: If invalid parameters are provided.
+        """
+        self._ensure_patches_initialized()
+        return lx.extract(
+            text_or_documents=report_text,
+            prompt_description=prompt_instruction.PROMPT_INSTRUCTION.split(
+                "# Few-Shot Examples"
+            )[0],
+            examples=self.examples,
+            model_id=self.model_id,
+            api_key=self.api_key,
+            max_char_buffer=max_char_buffer,
+            temperature=self.temperature,
+            # accept_match_lesser handled via monkey-patch
+            # (Resolver.align patched at import time)
+        )
+    def _build_response(
+        self, result: langextract.data.AnnotatedDocument, report_text: str
+    ) -> ResponseDict:
+        """Builds the final response dictionary from LangExtract results.
+        Args:
+            result: LangExtract result object containing extractions.
+            report_text: Original input text for prompt generation.
+        Returns:
+            Dictionary containing structured segments and metadata.
+        """
+        segments = self._build_segments_from_langextract_result(result)
+        organized_segments = self._organize_segments_by_label(segments)
+        response: ResponseDict = {
+            "segments": [segment.to_dict() for segment in organized_segments],
+            "annotated_document_json": self._serialize_extraction_results(result),
+            "text": self._format_segments_to_text(organized_segments),
+            "raw_prompt": self._generate_formatted_prompt_with_examples(report_text),
+        }
+        return response
+    def _serialize_extraction_results(
+        self, result: langextract.data.AnnotatedDocument
+    ) -> dict[str, Any]:
+        """Serializes LangExtract results for JSON response.
+        Args:
+            result: LangExtract result object containing extractions.
+        Returns:
+            Dictionary containing serialized extraction data or error information.
+        """
+        try:
+            if not hasattr(result, "extractions"):
+                return {"error": "No extractions found in result"}
+            return {
+                "extractions": [
+                    self._serialize_single_extraction(extraction)
+                    for extraction in result.extractions
+                ]
+            }
+        except (AttributeError, TypeError, KeyError) as e:
+            return {
+                "error": "Failed to serialize extraction result",
+                "error_message": str(e),
+                "fallback_string": str(result),
+            }
+    def _serialize_single_extraction(
+        self, extraction: langextract.data.Extraction
+    ) -> SerializedExtractionDict:
+        """Serializes a single extraction to dictionary format."""
+        return {
+            "extraction_text": extraction.extraction_text,
+            "extraction_class": extraction.extraction_class,
+            "attributes": extraction.attributes,
+            "char_interval": self._extract_char_interval(extraction),
+            "alignment_status": self._get_alignment_status_string(extraction),
+        }
+    def _get_alignment_status_string(
+        self, extraction: langextract.data.Extraction
+    ) -> str | None:
+        """Extracts alignment status from extraction as string."""
+        status = getattr(extraction, "alignment_status", None)
+        return str(status) if status is not None else None
+    def _build_segments_from_langextract_result(
+        self, result: langextract.data.AnnotatedDocument
+    ) -> list[Segment]:
+        """Builds segments from LangExtract result data using one-segment-per-interval strategy.
+        Creates exactly one segment per character interval to enable precise
+        frontend hover-to-highlight functionality. Processes only
+        langextract.data.Extraction objects for consistent typing.
+        Args:
+            result: LangExtract result object containing extractions.
+        Returns:
+            List of Segment objects optimized for frontend rendering and interaction.
+        """
+        segments_list = []
+        for extraction in result.extractions:
+            section_type = self._map_section(extraction.extraction_class)
+            if section_type is None:
+                continue
+            section_label = self._determine_section_label(
+                extraction.attributes, section_type
+            )
+            significance_val = self._extract_clinical_significance(
+                extraction.attributes
+            )
+            intervals = self._get_intervals_from_extraction_dict(
+                extraction, extraction.char_interval
+            )
+            segments_list.extend(
+                self._create_segments_for_intervals(
+                    section_type,
+                    section_label,
+                    extraction.extraction_text,
+                    intervals,
+                    significance_val,
+                )
+            )
+        return segments_list
+    def _determine_section_label(
+        self,
+        attributes: dict[str, str] | None,
+        section_type: ReportSectionType,
+    ) -> str:
+        """Determines the appropriate section label for a segment."""
+        if attributes and isinstance(attributes, dict):
+            section_label = attributes.get(SECTION_ATTRIBUTE_KEY)
+            if section_label:
+                return section_label
+        return section_type.display_name
+    def _extract_clinical_significance(
+        self, attributes: dict[str, str] | None
+    ) -> str | None:
+        """Extracts clinical significance from attributes safely."""
+        if not attributes or not isinstance(attributes, dict):
+            return None
+        try:
+            sig_raw = attributes.get("clinical_significance")
+            if sig_raw is not None:
+                return getattr(sig_raw, "value", str(sig_raw)).lower()
+        except (AttributeError, TypeError):
+            pass
+        return None
+    def _create_segments_for_intervals(
+        self,
+        section_type: ReportSectionType,
+        section_label: str,
+        content: str,
+        intervals: list[FrontendIntervalDict],
+        significance: str | None,
+    ) -> list[Segment]:
+        """Creates segment objects for the given intervals."""
+        if not intervals:
+            return [
+                Segment(
+                    type=section_type,
+                    label=section_label,
+                    content=content,
+                    intervals=[],
+                    significance=significance,
+                )
+            ]
+        return [
+            Segment(
+                type=section_type,
+                label=section_label,
+                content=content,
+                intervals=[interval],
+                significance=significance,
+            )
+            for interval in intervals
+        ]
+    def _map_section(self, extraction_class: str) -> ReportSectionType | None:
+        """Maps extraction class string to ReportSectionType enum."""
+        extraction_class = extraction_class.lower().strip()
+        for section_type in ReportSectionType:
+            if section_type.value == extraction_class:
+                return section_type
+        return None
+    def _get_intervals_from_extraction_dict(
+        self,
+        extraction: langextract.data.Extraction,
+        char_interval: langextract.data.CharInterval | dict[str, int] | None = None,
+    ) -> list[FrontendIntervalDict]:
+        """Extracts character intervals from extraction data.
+        Returns a list of interval dictionaries from the extraction's
+        char_interval in the format expected by the frontend.
+        Args:
+            extraction: langextract.data.Extraction object containing interval data.
+            char_interval: Optional override for character interval data.
+        Returns:
+            List of dictionaries with startPos and endPos keys.
+        """
+        interval_list = []
+        try:
+            char_interval = (
+                char_interval if char_interval is not None else extraction.char_interval
+            )
+            if char_interval is not None:
+                # Handle both dict and object formats for char_interval (langextract.data.CharInterval object or dict override)
+                if isinstance(char_interval, dict):
+                    start_pos = char_interval.get("start_pos")
+                    end_pos = char_interval.get("end_pos")
+                else:
+                    start_pos = getattr(char_interval, "start_pos", None)
+                    end_pos = getattr(char_interval, "end_pos", None)
+                start_position, end_position = self._extract_positions(
+                    start_pos, end_pos
+                )
+                if start_position is not None and end_position is not None:
+                    interval_list.append(
+                        FrontendIntervalDict(
+                            startPos=start_position, endPos=end_position
+                        )
+                    )
+        except Exception:
+            pass
+        return interval_list
+    def _extract_positions(self, start_obj, end_obj) -> tuple[int | None, int | None]:
+        """Extracts position integers from potentially complex objects.
+        Handles possible slice objects or direct integers for start and end positions.
+        """
+        if hasattr(start_obj, "start"):
+            start_obj = start_obj.start
+        if hasattr(end_obj, "stop"):
+            end_obj = end_obj.stop
+        try:
+            start_position = int(start_obj) if start_obj is not None else None
+            end_position = int(end_obj) if end_obj is not None else None
+            if start_position is not None and end_position is not None:
+                return (start_position, end_position)
+        except Exception:
+            pass
+        return (None, None)
+    def _extract_char_interval(
+        self, extraction: langextract.data.Extraction
+    ) -> dict[str, int | None] | None:
+        """Extracts character interval information from an extraction."""
+        char_interval = extraction.char_interval
+        if char_interval is None:
+            return None
+        return {
+            "start_pos": getattr(char_interval, "start_pos", None),
+            "end_pos": getattr(char_interval, "end_pos", None),
+        }
+    def _format_segments_to_text(self, segments: list[Segment]) -> str:
+        """Formats segments into a readable text representation.
+        Merges segments with the same label into coherent paragraphs
+        while preserving the original order of labels as they appear
+        in the document.
+        """
+        grouped = self._group_segments_by_type_and_label(segments)
+        formatted_parts: list[str] = []
+        self._render_prefix_sections(grouped, segments, formatted_parts)
+        self._render_body_sections(grouped, formatted_parts)
+        self._render_suffix_sections(grouped, formatted_parts)
+        return "\n".join(formatted_parts).rstrip()
+    def _group_segments_by_type_and_label(
+        self, segments: list[Segment]
+    ) -> collections.OrderedDict[tuple[ReportSectionType, str | None], list[str]]:
+        """Groups segments by (type, label) preserving insertion order.
+        Creates a dictionary keyed by (ReportSectionType, label) tuples
+        that maintains the order segments are first encountered.
+        Deduplicates content within each group while preserving
+        the original sequence of unique content items.
+        Args:
+            segments: List of Segment objects to group.
+        Returns:
+            OrderedDict mapping (type, label) tuples to lists of unique content strings.
+        """
+        grouped: collections.OrderedDict[
+            tuple[ReportSectionType, str | None], list[str]
+        ] = collections.OrderedDict()
+        for seg in segments:
+            key = (seg.type, seg.label)
+            grouped.setdefault(key, [])
+            if seg.content not in grouped[key]:
+                grouped[key].append(seg.content.strip())
+        return grouped
+    def _render_prefix_sections(
+        self,
+        grouped: collections.OrderedDict[
+            tuple[ReportSectionType, str | None], list[str]
+        ],
+        segments: list[Segment],
+        formatted_parts: list[str],
+    ) -> None:
+        """Renders PREFIX sections with appropriate headers."""
+        add = formatted_parts.append
+        def blank() -> None:
+            formatted_parts.append("")
+        structured_prefix_exists = any(
+            seg.type == ReportSectionType.PREFIX
+            and seg.label
+            and seg.label.lower() != PREFIX_LABEL
+            for seg in segments
+        )
+        if structured_prefix_exists:
+            for (stype, label), contents in grouped.items():
+                if stype is not ReportSectionType.PREFIX:
+                    continue
+                if label and label.lower() == EXAMINATION_LABEL:
+                    add(EXAMINATION_HEADER)
+                    blank()
+                    for c in contents:
+                        stripped = self._strip_exam_prefix(c)
+                        if stripped:
+                            add(stripped)
+                    blank()
+                elif label and label.lower() != PREFIX_LABEL:
+                    for c in contents:
+                        if c:
+                            add(c)
+                    blank()
+                else:
+                    for c in contents:
+                        if c:
+                            add(c)
+                    blank()
+        else:
+            plain_prefix = []
+            for (stype, _), contents in grouped.items():
+                if stype is ReportSectionType.PREFIX:
+                    plain_prefix.extend(contents)
+            if plain_prefix:
+                add("\n\n".join(plain_prefix).rstrip())
+    def _render_body_sections(
+        self,
+        grouped: collections.OrderedDict[
+            tuple[ReportSectionType, str | None], list[str]
+        ],
+        formatted_parts: list[str],
+    ) -> None:
+        """Renders BODY (FINDINGS) sections."""
+        add = formatted_parts.append
+        def blank() -> None:
+            formatted_parts.append("")
+        body_items = [
+            (k, v) for k, v in grouped.items() if k[0] is ReportSectionType.BODY
+        ]
+        if body_items:
+            if formatted_parts:
+                blank()
+            add(FINDINGS_HEADER)
+            blank()
+            for (_, label), contents in body_items:
+                combined = " ".join(contents).strip()
+                if combined:
+                    add(f"{label}: {combined}")
+                    blank()
+    def _render_suffix_sections(
+        self,
+        grouped: collections.OrderedDict[
+            tuple[ReportSectionType, str | None], list[str]
+        ],
+        formatted_parts: list[str],
+    ) -> None:
+        """Renders SUFFIX (IMPRESSION) sections."""
+        add = formatted_parts.append
+        def blank() -> None:
+            formatted_parts.append("")
+        suffix_items = [
+            (k, v) for k, v in grouped.items() if k[0] is ReportSectionType.SUFFIX
+        ]
+        if suffix_items:
+            if formatted_parts and formatted_parts[-1].strip():
+                blank()
+            add(IMPRESSION_HEADER)
+            blank()
+            suffix_block = "\n".join(
+                itertools.chain.from_iterable(v for _, v in suffix_items)
+            ).rstrip()
+            add(suffix_block)
+    def _organize_segments_by_label(self, segments: list[Segment]) -> list[Segment]:
+        """Organizes segments into the correct order for presentation.
+        Orders segments by section type (prefix → body → suffix), groups
+        body segments by label while preserving original appearance order,
+        and maintains extraction order for segments with the same label.
+        Args:
+            segments: List of Segment objects to organize.
+        Returns:
+            List of segments in proper presentation order.
+        """
+        prefix_segments = [
+            segment for segment in segments if segment.type == ReportSectionType.PREFIX
+        ]
+        body_segments = [
+            segment for segment in segments if segment.type == ReportSectionType.BODY
+        ]
+        suffix_segments = [
+            segment for segment in segments if segment.type == ReportSectionType.SUFFIX
+        ]
+        body_segments_by_label: dict[str, list[Segment]] = {}
+        labels_in_order: list[str] = []
+        for segment in body_segments:
+            if segment.label:
+                if segment.label not in body_segments_by_label:
+                    body_segments_by_label[segment.label] = []
+                    labels_in_order.append(segment.label)
+                body_segments_by_label[segment.label].append(segment)
+        organized_segments = []
+        organized_segments.extend(prefix_segments)
+        for label in labels_in_order:
+            organized_segments.extend(body_segments_by_label[label])
+        organized_segments.extend(suffix_segments)
+        return organized_segments
+    def _strip_exam_prefix(self, text: str) -> str:
+        """Removes common examination prefixes from a string."""
+        upper = text.upper()
+        for prefix in EXAM_PREFIXES:
+            if upper.startswith(prefix):
+                return text[len(prefix) :].lstrip()
+        return text.strip()

templates/index.html ADDED Viewed

	@@ -0,0 +1,524 @@

+<!doctype html>
+<html>
+  <head>
+    <title>Radiology Report Structuring</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <!-- Open Graph / Twitter Card meta tags for rich link previews -->
+    <meta
+      property="og:title"
+      content="RadExtract – Radiology Report Structuring Demo"
+    />
+    <meta
+      property="og:description"
+      content="Transform unstructured radiology reports into actionable, structured data instantly. See how Google's Gemini AI + LangExtract revolutionize medical documentation with real-time extraction of findings, impressions, and clinical insights."
+    />
+    <meta property="og:url" content="{{ share_url_for_sharing }}" />
+    <meta
+      property="og:image"
+      content="{{ share_url_for_sharing }}/static/radextract-preview.jpg"
+    />
+    <meta property="og:type" content="website" />
+    <meta property="og:video" content="{{ share_url_for_sharing }}/static/radextract-preview.mp4" />
+    <meta property="og:video:secure_url" content="{{ share_url_for_sharing }}/static/radextract-preview.mp4" />
+    <meta property="og:video:type" content="video/mp4" />
+    <meta property="og:video:width" content="1920" />
+    <meta property="og:video:height" content="1080" />
+    <meta name="twitter:card" content="player" />
+    <meta
+      name="twitter:title"
+      content="RadExtract – Radiology Report Structuring Demo"
+    />
+    <meta
+      name="twitter:description"
+      content="Transform unstructured radiology reports into actionable, structured data instantly. See how Google's Gemini AI + LangExtract revolutionize medical documentation with real-time extraction of findings, impressions, and clinical insights."
+    />
+    <meta
+      name="twitter:image"
+      content="{{ share_url_for_sharing }}/static/radextract-preview.jpg"
+    />
+    <meta name="twitter:player" content="{{ share_url_for_sharing }}/static/radextract-preview.mp4" />
+    <meta name="twitter:player:width" content="1920" />
+    <meta name="twitter:player:height" content="1080" />
+    <meta name="twitter:player:stream" content="{{ share_url_for_sharing }}/static/radextract-preview.mp4" />
+    <meta name="twitter:player:stream:content_type" content="video/mp4" />
+    <link rel="icon" type="image/svg+xml" href="/static/favicon.svg" />
+    <link rel="shortcut icon" href="/static/favicon.svg" />
+    <link rel="apple-touch-icon" href="/static/favicon.svg" />
+    <link rel="stylesheet" href="/static/style.css?v=20250129-video-preview" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500;700&family=Google+Sans+Text:wght@400;500&display=swap"
+      rel="stylesheet"
+    />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght@300;400;500"
+      rel="stylesheet"
+    />
+    <link
+      rel="stylesheet"
+      href="https://cdn.jsdelivr.net/npm/[email protected]/dist/json-formatter.min.css"
+    />
+  </head>
+  <body class="page-wrapper">
+    <div class="header-container">
+      <h1>
+        <strong>RadExtract</strong>:
+        <span class="brand-split">Radiology Report Structuring&nbsp;Demo</span>
+      </h1>
+      <!-- Top share buttons -->
+      <div class="share-top">
+        <span>Share →</span>
+        <a
+          class="shr-btn shr-x"
+          href="https://twitter.com/intent/tweet?text={{ share_text }}"
+          target="_blank"
+          rel="noopener"
+          aria-label="Share on X"
+        >
+          <svg viewBox="0 0 24 24" width="20" aria-hidden="true">
+            <path
+              fill="currentColor"
+              d="M23 2.999a9.05 9.05 0 0 1-2.588.71A4.516 4.516 0 0 0 22.36.365a9.04 9.04 0 0 1-2.867 1.096 4.505 4.505 0 0 0-7.67 4.107A12.79 12.79 0 0 1 1.64.896a4.505 4.505 0 0 0 1.396 6.01 4.47 4.47 0 0 1-2.04-.563v.057a4.507 4.507 0 0 0 3.614 4.417 4.522 4.522 0 0 1-2.034.077 4.508 4.508 0 0 0 4.207 3.128A9.03 9.03 0 0 1 0 19.54a12.75 12.75 0 0 0 6.92 2.026c8.304 0 12.846-6.877 12.846-12.837 0-.196-.004-.392-.013-.586A9.17 9.17 0 0 0 23 2.999z"
+            />
+          </svg>
+        </a>
+        <a
+          class="shr-btn shr-li"
+          href="https://www.linkedin.com/shareArticle?mini=true&url={{ share_url_encoded }}&title={{ linkedin_title }}&summary={{ linkedin_summary }}&source=RadExtract"
+          target="_blank"
+          rel="noopener"
+          aria-label="Share on LinkedIn"
+        >
+          <svg viewBox="0 0 24 24" width="20" aria-hidden="true">
+            <path
+              fill="currentColor"
+              d="M4.98 3.5C4.98 5.43 3.43 7 1.5 7S-1.98 5.43-1.98 3.5 0.57 0 2.5 0 4.98 1.57 4.98 3.5zM.02 8h5V24h-5V8zM7.98 8h4.8v2.2h.07c.67-1.27 2.31-2.6 4.76-2.6 5.09 0 6.04 3.35 6.04 7.7V24h-5v-7.7c0-1.84-.03-4.21-2.57-4.21-2.57 0-2.96 1.99-2.96 4.07V24h-5V8z"
+            />
+          </svg>
+        </a>
+      </div>
+      <!-- Attribution block: subtitle + logo -->
+      <div class="attribution">
+        <p class="sub-header">
+          <strong>Powered by LangExtract + Gemini 2.5</strong>
+        </p>
+        <!-- Google Research logo -->
+        <div class="google-research-logo">
+          <img
+            src="/static/google-research-logo.svg"
+            alt="Google Research"
+            width="174"
+            height="25"
+            loading="lazy"
+            tabindex="-1"
+          />
+        </div>
+      </div>
+      <div class="disclaimer-container">
+        <div class="disclaimer-box">
+          <span class="material-symbols-outlined disclaimer-icon">warning</span>
+          <span class="disclaimer-text"
+            >This demonstration is for illustrative purposes only to show the
+            baseline capabilities of LangExtract, the library that powers this
+            demo. It does not represent a finished or approved product, is not
+            intended to diagnose or suggest treatment for any disease or
+            condition, and should not be used for medical advice.</span
+          >
+        </div>
+        <div class="citation-note">
+          <strong>License & Citation:</strong> If you use
+          RadExtract or LangExtract in production or
+          publications, please cite accordingly and acknowledge usage. Use is
+          subject to the Apache 2.0 License. See
+          <a
+            class="banner-link"
+            href="https://huggingface.co/spaces/google/radextract/blob/main/README.md#disclaimer"
+            target="_blank"
+            rel="noopener noreferrer"
+            >README</a
+          >&nbsp;for&nbsp;details.
+        </div>
+      </div>
+      <div class="banner card">
+        <p class="banner-description">
+          <a
+            class="banner-link"
+            href="https://github.com/google/langextract"
+            target="_blank"
+            rel="noopener noreferrer"
+            ><strong>LangExtract (LX)</strong></a
+          >
+          is a multi-purpose NLP extraction library that uses large language
+          models such as Gemini to convert free-text into schema-controlled
+          data. It learns from your few-shot examples (structured using
+          <strong>LX</strong>'s extraction schema) to identify and extract
+          information, with every datum linked back to its exact words in the
+          source.
+        </p>
+        <hr class="banner-divider" />
+        <h3 class="banner-section-title">Demo Overview</h3>
+        <p class="banner-description">
+          <strong>RadExtract</strong> uses
+          <a
+            class="banner-link"
+            href="https://github.com/google/langextract"
+            target="_blank"
+            rel="noopener noreferrer"
+            ><strong>LangExtract (LX)</strong></a
+          >
+          powered by
+          <a
+            id="model-link"
+            class="banner-link"
+            href="https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash"
+            target="_blank"
+            rel="noopener noreferrer"
+            ><span id="model-name">Gemini 2.5 Flash</span></a
+          >
+          or
+          <a
+            class="banner-link"
+            href="https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro"
+            target="_blank"
+            rel="noopener noreferrer"
+            >Gemini 2.5 Pro</a
+          >
+          to convert radiology report findings into structured, optimized
+          radiology reports with highlighted significant findings. By leveraging
+          a
+          <a
+            class="banner-link"
+            href="https://huggingface.co/spaces/google/radextract/blob/main/prompt_instruction.py"
+            target="_blank"
+            rel="noopener noreferrer"
+            >prompt</a
+          >
+          that describes the structuring task with <strong>LX</strong>'s schema
+          and a
+          <a
+            class="banner-link"
+            href="https://huggingface.co/spaces/google/radextract/blob/main/report_examples.py"
+            target="_blank"
+            rel="noopener noreferrer"
+            >few select examples</a
+          >, <strong>LX</strong> processes free text into the structured output
+          shown below. Leveraging Gemini's foundational knowledge,
+          <strong>RadExtract</strong> can also process imaging modalities beyond
+          those included in the prompt examples, such as the X-ray and
+          ultrasound samples available below.
+        </p>
+        <p class="banner-description">
+          <strong>Interactive Features</strong><br />
+          Each extracted finding is directly grounded by
+          <strong>LX</strong> (linked precisely back to its original words in
+          the source text); hover over any structured item to see this exact
+          textual origin highlighted. <strong>Clinical significance</strong> is
+          visually highlighted: general findings are marked with yellow
+          underlines, while significant findings have red&nbsp;underlines.
+        </p>
+        <p class="banner-description">
+          <strong>Clinical Background</strong><br />
+          Structured reporting helps ensure completeness, reduces ambiguity, and
+          facilitates data sharing in radiology. For background on the value of
+          structured radiology reports, see
+          <a
+            class="banner-link"
+            href="https://link.springer.com/article/10.1007/s13244-017-0588-8"
+            target="_blank"
+            rel="noopener noreferrer"
+            >this European Society of Radiology paper</a
+          >.
+        </p>
+      </div>
+    </div>
+    <div class="samples-container card">
+      <h3>Select a Report</h3>
+      <div class="samples-description">
+        <div class="instruction-step">
+          <span class="step-number">1</span> Select a sample or paste your
+          report
+        </div>
+        <div class="instruction-step">
+          <span class="step-number">2</span> Click "Process" to start for pasted
+          reports
+        </div>
+        <div class="instruction-step">
+          <span class="step-number">3</span> Hover output findings to highlight
+          source text
+        </div>
+      </div>
+      <div class="sample-buttons"></div>
+      <p class="samples-tip tip-desktop">
+        💡 Try tweaking a sample (remove sections, add extra findings, or paste
+        your own report) to see how the demo responds.
+      </p>
+      <p class="mobile-tip" role="note">
+        <span class="icon">💡</span>
+        Tap any sample report to explore the structuring features. The keyboard
+        stays closed so you can easily scroll and interact with highlighted findings.<br><br>
+        <strong>Tip:</strong> Custom input is available only on desktop or laptop computers.
+        If you're viewing this on a mobile device, please switch to a computer to enter your own reports.
+      </p>
+    </div>
+    <div class="interface-options-panel card">
+      <div class="interface-options-header" data-action="toggle-interface">
+        <h4 class="interface-options-title">Interface Controls</h4>
+        <div class="interface-options-summary">
+          <span>LX Generated Prompt • LX Structured Output • Use Cache</span>
+          <span
+            class="material-symbols-outlined expand-icon"
+            id="interface-expand-icon"
+            >expand_more</span
+          >
+        </div>
+      </div>
+      <div
+        class="interface-options-content"
+        id="interface-options-content"
+        style="display: none"
+      >
+        <div class="interface-options-grid">
+          <div class="interface-option">
+            <div class="option-header">
+              <span class="material-symbols-outlined option-icon"
+                >visibility</span
+              >
+              <strong>LX Generated Prompt</strong>
+            </div>
+            <p class="option-description">
+              View the complete prompt sent to the model, including task
+              description, examples, and your input text
+            </p>
+          </div>
+          <div class="interface-option">
+            <div class="option-header">
+              <span class="material-symbols-outlined option-icon">code</span>
+              <strong>LX Structured Output</strong>
+            </div>
+            <p class="option-description">
+              Toggle between the formatted text view and raw LangExtract JSON
+              data with extraction details
+            </p>
+          </div>
+          <div class="interface-option">
+            <div class="option-header">
+              <span class="material-symbols-outlined option-icon">cached</span>
+              <strong>Use Cache</strong>
+            </div>
+            <p class="option-description">
+              Switch between live model inference and pre-generated Gemini 2.5
+              Pro cached results for faster testing
+            </p>
+          </div>
+        </div>
+        <div class="interface-options-note">
+          <span class="material-symbols-outlined note-icon">info</span>
+          <span
+            >These controls are located in the Input and Output headers below
+            for easy access during interaction.</span
+          >
+        </div>
+      </div>
+    </div>
+    <div class="text-area-container card">
+      <div class="text-area-wrapper input-wrapper">
+        <div class="input-header">
+          <h2>Input</h2>
+          <label class="prompt-toggle"
+            ><input type="checkbox" id="prompt-toggle" /> LX Prompt</label
+          >
+        </div>
+        <div id="input-text-container" class="input-container">
+          <textarea
+            id="input-text"
+            class="large-text-area"
+            placeholder="Enter radiology report here or load a sample from above..."
+            spellcheck="false"
+            autocomplete="off"
+            autocorrect="off"
+            autocapitalize="off"
+            inputmode="none"
+          ></textarea>
+          <button
+            id="clear-input"
+            class="clear-button-overlay"
+            title="Clear input"
+          >
+            <svg
+              width="20"
+              height="20"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="2"
+            >
+              <line x1="18" y1="6" x2="6" y2="18"></line>
+              <line x1="6" y1="6" x2="18" y2="18"></line>
+            </svg>
+          </button>
+        </div>
+        <div id="prompt-output" class="raw-json" style="display: none"></div>
+      </div>
+      <div class="text-area-wrapper" id="output-container">
+        <div class="output-header">
+          <h2>Output</h2>
+          <label class="raw-toggle"
+            ><input type="checkbox" id="raw-toggle" /> LX Data</label
+          >
+        </div>
+        <div id="output-text-container" class="output-container">
+          <pre
+            id="output-text"
+            class="large-text-area output-text"
+            placeholder="Structured output will appear here..."
+          ></pre>
+          <div
+            id="raw-output"
+            class="raw-json output-text"
+            style="display: none"
+          ></div>
+          <button
+            id="copy-output"
+            class="copy-button-overlay"
+            title="Copy output to clipboard"
+          >
+            <svg
+              width="20"
+              height="20"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="2"
+            >
+              <rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect>
+              <path
+                d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"
+              ></path>
+            </svg>
+          </button>
+          <div
+            id="loading-overlay"
+            class="loading-overlay"
+            style="display: none"
+          >
+            <div class="spinner"></div>
+            <div class="loader-text">
+              <span class="loader-message"
+                >Running LangExtract with Gemini 2.5 Flash</span
+              >
+            </div>
+          </div>
+        </div>
+      </div>
+      <div class="panel-controls">
+        <!-- ROW 1 -->
+        <div class="model-select-container">
+          <label for="model-select">Model:</label>
+          <select id="model-select">
+            <option value="gemini-2.5-flash" selected>Gemini 2.5 Flash</option>
+            <option value="gemini-2.5-pro">Gemini 2.5 Pro</option>
+          </select>
+        </div>
+        <label class="cache-toggle">
+          <input type="checkbox" id="cache-toggle" checked />
+          Use Cache
+          <span class="cache-status" id="cache-status"></span>
+        </label>
+        <!-- ROW 2 (mobile only) -->
+        <label class="prompt-toggle mobile-toggle">
+          <input type="checkbox" id="prompt-toggle-mobile" /> LX Prompt
+        </label>
+        <label class="raw-toggle mobile-toggle">
+          <input type="checkbox" id="raw-toggle-mobile" /> LX Data
+        </label>
+      </div>
+    </div>
+    <div class="action-bar">
+      <button id="predict-button">Process</button>
+      <div class="clinical-significance-legend">
+        <span class="legend-title">Findings:</span>
+        <span class="legend-item"
+          ><span class="legend-line minor"></span>General</span
+        >
+        <span class="legend-item"
+          ><span class="legend-line major"></span>Significant</span
+        >
+        <span class="legend-item"
+          ><span class="legend-line grounding"></span>Grounding</span
+        >
+      </div>
+    </div>
+    <div class="instructions"></div>
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/json-formatter.umd.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/marked.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/gsap/3.12.2/gsap.min.js"></script>
+    <script
+      type="module"
+      src="/static/script.js?v=20250125-refactored"
+    ></script>
+    <!-- Bottom share placement (minimal space above footer) -->
+    <div class="share-bottom">
+      <span>Share&nbsp;this&nbsp;demo →</span>
+      <a
+        class="shr-btn shr-x"
+        href="https://twitter.com/intent/tweet?text={{ share_text }}"
+        target="_blank"
+        rel="noopener"
+        aria-label="Share on X"
+      >
+        <svg viewBox="0 0 24 24" width="24" aria-hidden="true">
+          <path
+            fill="currentColor"
+            d="M23 2.999a9.05 9.05 0 0 1-2.588.71A4.516 4.516 0 0 0 22.36.365a9.04 9.04 0 0 1-2.867 1.096 4.505 4.505 0 0 0-7.67 4.107A12.79 12.79 0 0 1 1.64.896a4.505 4.505 0 0 0 1.396 6.01 4.47 4.47 0 0 1-2.04-.563v.057a4.507 4.507 0 0 0 3.614 4.417 4.522 4.522 0 0 1-2.034.077 4.508 4.508 0 0 0 4.207 3.128A9.03 9.03 0 0 1 0 19.54a12.75 12.75 0 0 0 6.92 2.026c8.304 0 12.846-6.877 12.846-12.837 0-.196-.004-.392-.013-.586A9.17 9.17 0 0 0 23 2.999z"
+          />
+        </svg>
+      </a>
+      <a
+        class="shr-btn shr-li"
+        href="https://www.linkedin.com/shareArticle?mini=true&url={{ share_url_encoded }}&title={{ linkedin_title }}&summary={{ linkedin_summary }}&source=RadExtract"
+        target="_blank"
+        rel="noopener"
+        aria-label="Share on LinkedIn"
+      >
+        <svg viewBox="0 0 24 24" width="24" aria-hidden="true">
+          <path
+            fill="currentColor"
+            d="M4.98 3.5C4.98 5.43 3.43 7 1.5 7S-1.98 5.43-1.98 3.5 0.57 0 2.5 0 4.98 1.57 4.98 3.5zM.02 8h5V24h-5V8zM7.98 8h4.8v2.2h.07c.67-1.27 2.31-2.6 4.76-2.6 5.09 0 6.04 3.35 6.04 7.7V24h-5v-7.7c0-1.84-.03-4.21-2.57-4.21-2.57 0-2.96 1.99-2.96 4.07V24h-5V8z"
+          />
+        </svg>
+      </a>
+    </div>
+    <div class="footer-note">
+      View this demo’s source on&nbsp;<a
+        class="banner-link"
+        href="https://huggingface.co/spaces/google/radextract/tree/main"
+        target="_blank"
+        rel="noopener noreferrer"
+        >Hugging&nbsp;Face&nbsp;Spaces</a
+      >&nbsp;🤗
+    </div>
+  </body>
+</html>

test_app.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""Test suite for Flask application endpoints and integration.
+This module provides comprehensive tests for the Flask application including
+route testing, model integration, caching behavior, and error handling.
+Run with: python test_app.py or pytest test_app.py
+"""
+import json
+import os
+import unittest
+from unittest import mock
+# Mock the environment before importing app to avoid initialization errors
+with mock.patch.dict(os.environ, {'KEY': 'test_api_key_for_import'}):
+    from app import Model, app, setup_cache
+class TestFlaskApplication(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.test_client = app.test_client()
+        app.config['TESTING'] = True
+    def test_index_route_returns_html(self):
+        response = self.test_client.get('/')
+        self.assertEqual(response.status_code, 200)
+        self.assertIn('text/html', response.content_type)
+    def test_cache_stats_route(self):
+        response = self.test_client.get('/cache/stats')
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content_type, 'application/json')
+        data = json.loads(response.data)
+        self.assertIsInstance(data, dict)
+    @mock.patch('app.model.predict')
+    def test_predict_route_with_valid_data(self, mock_predict):
+        mock_predict.return_value = {
+            'segments': [{'type': 'body', 'content': 'test'}],
+            'text': 'test output',
+        }
+        response = self.test_client.post('/predict', data='FINDINGS: Normal chest CT')
+        self.assertEqual(response.status_code, 200)
+        data = json.loads(response.data)
+        self.assertIn('segments', data)
+        self.assertIn('text', data)
+    def test_predict_route_with_empty_data(self):
+        response = self.test_client.post('/predict', data='')
+        self.assertEqual(response.status_code, 400)
+        data = json.loads(response.data)
+        self.assertIn('error', data)
+        self.assertEqual(data['error'], 'Empty input')
+        self.assertIn('message', data)
+        self.assertEqual(data['message'], 'Input text is required')
+        self.assertIn('max_length', data)
+    @mock.patch('app.model.predict')
+    def test_predict_with_custom_headers(self, mock_predict):
+        mock_predict.return_value = {'segments': [], 'text': 'test'}
+        headers = {
+            'X-Use-Cache': 'false',
+            'X-Sample-ID': 'test_sample',
+            'X-Model-ID': 'gemini-2.5-flash',
+        }
+        response = self.test_client.post(
+            '/predict', data='Test report', headers=headers
+        )
+        self.assertEqual(response.status_code, 200)
+        mock_predict.assert_called_once_with('Test report', model_id='gemini-2.5-flash')
+    @mock.patch('app.cache_manager.get_cached_result')
+    def test_predict_with_cache_hit(self, mock_get_cached):
+        cached_response = {
+            'segments': [{'type': 'body', 'content': 'cached'}],
+            'text': 'cached result',
+        }
+        mock_get_cached.return_value = cached_response
+        response = self.test_client.post(
+            '/predict', data='Test report', headers={'X-Use-Cache': 'true'}
+        )
+        data = json.loads(response.data)
+        self.assertTrue(data.get('from_cache'))
+        self.assertIn('segments', data)
+class TestModelClass(unittest.TestCase):
+    @mock.patch.dict(os.environ, {'KEY': 'test_api_key'})
+    def test_model_initialization_with_api_key(self):
+        model = Model()
+        self.assertEqual(model.gemini_api_key, 'test_api_key')
+        self.assertIn('gemini-2.5-flash', model._structurers)
+    @mock.patch.dict(os.environ, {}, clear=True)
+    def test_model_initialization_without_api_key(self):
+        with self.assertRaises(ValueError) as context:
+            Model()
+        self.assertIn('KEY environment variable not set', str(context.exception))
+    @mock.patch.dict(os.environ, {'KEY': 'test_key', 'MODEL_ID': 'custom-model'})
+    def test_model_initialization_with_custom_model(self):
+        model = Model()
+        self.assertIn('custom-model', model._structurers)
+    @mock.patch.dict(os.environ, {'KEY': 'test_key'})
+    @mock.patch('app.RadiologyReportStructurer')
+    def test_get_structurer_creates_new_instance(self, mock_structurer_class):
+        model = Model()
+        model._get_structurer('new-model')
+        # Should be called twice: once for default, once for new model
+        self.assertEqual(mock_structurer_class.call_count, 2)
+    @mock.patch.dict(os.environ, {'KEY': 'test_key'})
+    @mock.patch('app.RadiologyReportStructurer')
+    def test_predict_calls_structurer(self, mock_structurer_class):
+        mock_instance = mock.Mock()
+        mock_instance.predict.return_value = {'result': 'test'}
+        mock_structurer_class.return_value = mock_instance
+        model = Model()
+        result = model.predict('test data', 'test-model')
+        mock_instance.predict.assert_called_once_with('test data')
+        self.assertEqual(result, {'result': 'test'})
+class TestCacheSetup(unittest.TestCase):
+    @mock.patch('os.path.exists')
+    @mock.patch('shutil.copy2')
+    @mock.patch('os.makedirs')
+    def test_setup_cache_copies_existing_file(
+        self, mock_makedirs, mock_copy, mock_exists
+    ):
+        mock_exists.return_value = True
+        cache_dir = setup_cache()
+        mock_makedirs.assert_called_once_with('/tmp/cache', exist_ok=True)
+        mock_copy.assert_called_once()
+        self.assertEqual(cache_dir, '/tmp/cache')
+    @mock.patch('os.path.exists')
+    @mock.patch('os.makedirs')
+    def test_setup_cache_handles_missing_source(self, mock_makedirs, mock_exists):
+        mock_exists.return_value = False
+        cache_dir = setup_cache()
+        mock_makedirs.assert_called_once_with('/tmp/cache', exist_ok=True)
+        self.assertEqual(cache_dir, '/tmp/cache')
+class TestErrorHandling(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.test_client = app.test_client()
+        app.config['TESTING'] = True
+    def setUp(self):
+        # Suppress all logging during error tests to reduce noise
+        import logging
+        logging.disable(logging.CRITICAL)
+    def tearDown(self):
+        # Re-enable logging
+        import logging
+        logging.disable(logging.NOTSET)
+    @mock.patch('app.model.predict')
+    @mock.patch('app.logger')
+    def test_predict_handles_type_error(self, mock_logger, mock_predict):
+        mock_predict.side_effect = TypeError('Invalid type')
+        response = self.test_client.post('/predict', data='Test data')
+        self.assertEqual(response.status_code, 500)
+        data = json.loads(response.data)
+        self.assertIn('Processing error', data['error'])
+    @mock.patch('app.model.predict')
+    @mock.patch('app.logger')
+    def test_predict_handles_general_exception(self, mock_logger, mock_predict):
+        mock_predict.side_effect = Exception('General error')
+        response = self.test_client.post('/predict', data='Test data')
+        self.assertEqual(response.status_code, 500)
+        data = json.loads(response.data)
+        self.assertIn('General error', data['error'])
+if __name__ == '__main__':
+    unittest.main()

test_validation.py ADDED Viewed

	@@ -0,0 +1,152 @@

+#!/usr/bin/env python3
+"""End-to-end validation tests for radiology report structuring.
+This module provides focused validation tests that verify the complete
+RadiologyReportStructurer pipeline by comparing actual processing
+results against known good cached outputs.
+Typical usage example:
+  # Run with unittest (built-in)
+  python test_validation.py
+  python -m unittest test_validation.py -v
+  # Run with pytest (recommended for CI/CD)
+  pytest test_validation.py -v
+"""
+import json
+import os
+import sys
+import unittest
+from typing import Any
+from unittest import mock
+# Add the current directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from structure_report import RadiologyReportStructurer
+class TestRadiologyReportEndToEnd(unittest.TestCase):
+    """End-to-end tests for complete RadiologyReportStructurer pipeline."""
+    cache_file: str
+    sample_data: dict[str, Any]
+    structurer: RadiologyReportStructurer
+    @classmethod
+    def setUpClass(cls):
+        cls.cache_file = 'cache/sample_cache.json'
+        cls.sample_data = cls._load_sample_cache()
+        cls.structurer = RadiologyReportStructurer(
+            api_key='test_key', model_id='gemini-2.5-flash'
+        )
+    @classmethod
+    def _load_sample_cache(cls) -> dict[str, Any]:
+        if not os.path.exists(cls.cache_file):
+            raise FileNotFoundError(f'Sample cache file not found: {cls.cache_file}')
+        with open(cls.cache_file, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    def _validate_response_structure(self, response: dict[str, Any]) -> None:
+        self.assertIn('segments', response)
+        self.assertIn('text', response)
+        self.assertIsInstance(response['segments'], list)
+        self.assertIsInstance(response['text'], str)
+    def _validate_successful_response(self, response: dict[str, Any]) -> None:
+        self._validate_response_structure(response)
+        self.assertGreater(len(response['segments']), 0)
+        self.assertGreater(len(response['text']), 0)
+        for segment in response['segments']:
+            self._validate_segment_structure(segment)
+    def _validate_segment_structure(self, segment: dict[str, Any]) -> None:
+        required_fields = ['type', 'label', 'content', 'intervals']
+        for field in required_fields:
+            self.assertIn(field, segment)
+        valid_types = ['prefix', 'body', 'suffix']
+        self.assertIn(segment['type'], valid_types)
+        if segment['intervals']:
+            for interval in segment['intervals']:
+                self.assertIn('startPos', interval)
+                self.assertIn('endPos', interval)
+                self.assertGreaterEqual(interval['startPos'], 0)
+                self.assertGreater(interval['endPos'], interval['startPos'])
+    @mock.patch('structure_report.lx.extract')
+    def test_end_to_end_processing_pipeline(self, mock_extract):
+        mock_result = mock.MagicMock()
+        mock_result.extractions = []
+        mock_extract.return_value = mock_result
+        input_text = 'EXAMINATION: Chest CT\n\nFINDINGS: Normal lungs.\n\nIMPRESSION: No acute findings.'
+        response = self.structurer.predict(input_text)
+        self._validate_response_structure(response)
+        mock_extract.assert_called_once()
+        call_args = mock_extract.call_args
+        self.assertEqual(call_args[1]['text_or_documents'], input_text)
+        self.assertEqual(call_args[1]['model_id'], 'gemini-2.5-flash')
+    def test_all_cached_samples_validation(self):
+        self.assertGreater(len(self.sample_data), 0, 'No samples found in cache')
+        for sample_key, sample in self.sample_data.items():
+            with self.subTest(sample=sample_key):
+                self._validate_successful_response(sample)
+    def test_error_handling_with_invalid_input(self):
+        with self.assertRaises(ValueError) as context:
+            self.structurer.predict('')
+        self.assertIn('Report text cannot be empty', str(context.exception))
+        with self.assertRaises(ValueError):
+            self.structurer.predict('   \n\t  ')
+    def test_error_handling_with_no_api_key(self):
+        error_structurer = RadiologyReportStructurer(api_key=None)
+        response = error_structurer.predict('EXAMINATION: Test')
+        self._validate_response_structure(response)
+        self.assertEqual(len(response['segments']), 0)
+        self.assertIn('Error processing report', response['text'])
+    def test_patch_initialization_on_first_use(self):
+        new_structurer = RadiologyReportStructurer()
+        self.assertFalse(new_structurer._patches_initialized)
+        new_structurer._ensure_patches_initialized()
+        self.assertTrue(new_structurer._patches_initialized)
+    def test_section_mapping_core_functionality(self):
+        self.assertEqual(
+            self.structurer._map_section('findings_prefix'),
+            self.structurer._map_section('findings_prefix'),
+        )
+        self.assertIsNone(self.structurer._map_section('invalid_section'))
+        self.assertIsNone(self.structurer._map_section(''))
+    def test_exam_prefix_stripping(self):
+        self.assertEqual(
+            self.structurer._strip_exam_prefix('EXAMINATION: Chest CT'), 'Chest CT'
+        )
+        self.assertEqual(
+            self.structurer._strip_exam_prefix('Normal findings'), 'Normal findings'
+        )
+if __name__ == '__main__':
+    unittest.main(verbosity=2)

tools/rebuild_cache.py ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/usr/bin/env python3
+"""Utility script to rebuild the demonstration cache with current structurer output.
+This development tool rebuilds the cache using the current
+RadiologyReportStructurer implementation, ensuring that cached results
+include the latest features such as raw_prompt data. The script processes
+all sample reports from the static JSON file and caches their structured
+results for improved demo performance.
+The script requires the KEY environment variable to be set with a valid
+Gemini API key and optionally accepts MODEL_ID to specify which model
+to use for processing.
+Usage:
+    export KEY=your_gemini_api_key_here
+    export MODEL_ID=gemini-2.5-pro  # optional, defaults to gemini-2.5-pro
+    python tools/rebuild_cache.py
+"""
+import json
+import os
+import sys
+from pathlib import Path
+# Add parent directory to path to import modules
+sys.path.append(str(Path(__file__).parent.parent))
+from cache_manager import CacheManager
+from structure_report import RadiologyReportStructurer
+API_KEY = os.environ.get("KEY")
+if not API_KEY:
+    sys.exit("KEY environment variable not set. Export KEY before running.")
+SAMPLES_PATH = Path("static/sample_reports.json")
+if not SAMPLES_PATH.exists():
+    sys.exit("static/sample_reports.json not found")
+samples = json.loads(SAMPLES_PATH.read_text())["samples"]
+MODEL_ID = os.environ.get("MODEL_ID", "gemini-2.5-pro")
+structurer = RadiologyReportStructurer(api_key=API_KEY, model_id=MODEL_ID)
+import time
+cache = CacheManager(cache_dir="cache")
+print("Clearing existing cache...")
+cache.clear_cache()
+print(f"Processing {len(samples)} samples with {MODEL_ID}...")
+for s in samples:
+    sid = s["id"]
+    text = s["text"]
+    print(f"  Processing {sid}...")
+    retries = 0
+    while retries < 5:
+        try:
+            result = structurer.predict(text)
+            cache.cache_result(text, result, sample_id=sid)
+            break
+        except Exception as e:
+            retries += 1
+            print(f"    Warning: {e}. Retry {retries}/5...")
+            time.sleep(5)
+    else:
+        print(f"    Error: Failed to process {sid} after 5 retries, skipping.")
+    time.sleep(3)  # base throttle
+print("Cache rebuild completed successfully.")

view_logs_endpoint.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Example endpoint to add to app.py for viewing logs
+# Only add this if you need remote log access
+@app.route("/logs/recent")
+def view_recent_logs():
+    """View recent log entries (protected endpoint)."""
+    # Check for authentication
+    auth_token = request.args.get('token') or request.headers.get('X-Log-Token')
+    expected_token = os.environ.get('LOG_ACCESS_TOKEN')
+    if not expected_token or auth_token != expected_token:
+        return jsonify({"error": "Unauthorized"}), 401
+    try:
+        # Check if persistent storage exists
+        if not os.path.exists("/data/logs"):
+            return jsonify({"error": "No persistent storage available"}), 404
+        # Get today's log file
+        today = datetime.now().strftime("%Y-%m-%d")
+        log_file = f"/data/logs/radextract-{today}.log"
+        if not os.path.exists(log_file):
+            return jsonify({"error": "No logs for today"}), 404
+        # Read last 100 lines
+        with open(log_file, 'r') as f:
+            lines = f.readlines()
+            recent_lines = lines[-100:] if len(lines) > 100 else lines
+        # Filter for request logs
+        request_logs = [
+            line.strip() for line in recent_lines
+            if "[Req " in line and ("🔴" in line or "🟢" in line)
+        ]
+        return jsonify({
+            "log_file": log_file,
+            "total_lines": len(lines),
+            "recent_requests": request_logs
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500