Spaces:

DataQuests
/

DeepCritical

Running

File size: 8,814 Bytes

"""Modal-based secure code execution tool for statistical analysis.

This module provides sandboxed Python code execution using Modal's serverless infrastructure.
It's designed for running LLM-generated statistical analysis code safely.
"""

import os
from functools import lru_cache
from typing import Any

import structlog

logger = structlog.get_logger(__name__)

# Shared library versions for Modal sandbox - used by both executor and LLM prompts
# Keep these in sync to avoid version mismatch between generated code and execution
SANDBOX_LIBRARIES: dict[str, str] = {
    "pandas": "2.2.0",
    "numpy": "1.26.4",
    "scipy": "1.11.4",
    "matplotlib": "3.8.2",
    "scikit-learn": "1.4.0",
    "statsmodels": "0.14.1",
}


def get_sandbox_library_list() -> list[str]:
    """Get list of library==version strings for Modal image."""
    return [f"{lib}=={ver}" for lib, ver in SANDBOX_LIBRARIES.items()]


def get_sandbox_library_prompt() -> str:
    """Get formatted library versions for LLM prompts."""
    return "\n".join(f"- {lib}=={ver}" for lib, ver in SANDBOX_LIBRARIES.items())


class CodeExecutionError(Exception):
    """Raised when code execution fails."""

    pass


class ModalCodeExecutor:
    """Execute Python code securely using Modal sandboxes.

    This class provides a safe environment for executing LLM-generated code,
    particularly for scientific computing and statistical analysis tasks.

    Features:
    - Sandboxed execution (isolated from host system)
    - Pre-installed scientific libraries (numpy, scipy, pandas, matplotlib)
    - Network isolation for security
    - Timeout protection
    - Stdout/stderr capture

    Example:
        >>> executor = ModalCodeExecutor()
        >>> result = executor.execute('''
        ... import pandas as pd
        ... df = pd.DataFrame({'a': [1, 2, 3]})
        ... result = df['a'].sum()
        ... ''')
        >>> print(result['stdout'])
        6
    """

    def __init__(self) -> None:
        """Initialize Modal code executor.

        Note:
            Logs a warning if Modal credentials are not configured.
            Execution will fail at runtime without valid credentials.
        """
        # Check for Modal credentials
        self.modal_token_id = os.getenv("MODAL_TOKEN_ID")
        self.modal_token_secret = os.getenv("MODAL_TOKEN_SECRET")

        if not self.modal_token_id or not self.modal_token_secret:
            logger.warning(
                "Modal credentials not found. Code execution will fail unless modal setup is run."
            )

    def execute(self, code: str, timeout: int = 60, allow_network: bool = False) -> dict[str, Any]:
        """Execute Python code in a Modal sandbox.

        Args:
            code: Python code to execute
            timeout: Maximum execution time in seconds (default: 60)
            allow_network: Whether to allow network access (default: False for security)

        Returns:
            Dictionary containing:
                - stdout: Standard output from code execution
                - stderr: Standard error from code execution
                - success: Boolean indicating if execution succeeded
                - error: Error message if execution failed

        Raises:
            CodeExecutionError: If execution fails or times out
        """
        try:
            import modal
        except ImportError as e:
            raise CodeExecutionError(
                "Modal SDK not installed. Run: uv sync or pip install modal>=0.63.0"
            ) from e

        logger.info("executing_code", code_length=len(code), timeout=timeout)

        try:
            # Create or lookup Modal app
            app = modal.App.lookup("deepcritical-code-execution", create_if_missing=True)

            # Define scientific computing image with common libraries
            scientific_image = modal.Image.debian_slim(python_version="3.11").uv_pip_install(
                *get_sandbox_library_list()
            )

            # Create sandbox with security restrictions
            sandbox = modal.Sandbox.create(
                app=app,
                image=scientific_image,
                timeout=timeout,
                block_network=not allow_network,  # Wire the network control
            )

            try:
                # Execute the code
                # Wrap code to capture result
                wrapped_code = f"""
import sys
import io
from contextlib import redirect_stdout, redirect_stderr

stdout_io = io.StringIO()
stderr_io = io.StringIO()

try:
    with redirect_stdout(stdout_io), redirect_stderr(stderr_io):
        {self._indent_code(code, 8)}
    print("__EXECUTION_SUCCESS__")
except Exception as e:
    print(f"__EXECUTION_ERROR__: {{type(e).__name__}}: {{e}}", file=sys.stderr)

print("__STDOUT_START__")
print(stdout_io.getvalue())
print("__STDOUT_END__")
print("__STDERR_START__")
print(stderr_io.getvalue(), file=sys.stderr)
print("__STDERR_END__", file=sys.stderr)
"""

                # Run the wrapped code
                process = sandbox.exec("python", "-c", wrapped_code, timeout=timeout)

                # Read output
                stdout_raw = process.stdout.read()
                stderr_raw = process.stderr.read()
            finally:
                # Always clean up sandbox to prevent resource leaks
                sandbox.terminate()

            # Parse output
            success = "__EXECUTION_SUCCESS__" in stdout_raw

            # Extract actual stdout/stderr
            stdout = self._extract_output(stdout_raw, "__STDOUT_START__", "__STDOUT_END__")
            stderr = self._extract_output(stderr_raw, "__STDERR_START__", "__STDERR_END__")

            result = {
                "stdout": stdout,
                "stderr": stderr,
                "success": success,
                "error": stderr if not success else None,
            }

            logger.info(
                "code_execution_completed",
                success=success,
                stdout_length=len(stdout),
                stderr_length=len(stderr),
            )

            return result

        except Exception as e:
            logger.error("code_execution_failed", error=str(e), error_type=type(e).__name__)
            raise CodeExecutionError(f"Code execution failed: {e}") from e

    def execute_with_return(self, code: str, timeout: int = 60) -> Any:
        """Execute code and return the value of the 'result' variable.

        Convenience method that executes code and extracts a return value.
        The code should assign its final result to a variable named 'result'.

        Args:
            code: Python code to execute (must set 'result' variable)
            timeout: Maximum execution time in seconds

        Returns:
            The value of the 'result' variable from the executed code

        Example:
            >>> executor.execute_with_return("result = 2 + 2")
            4
        """
        # Modify code to print result as JSON
        wrapped = f"""
import json
{code}
print(json.dumps({{"__RESULT__": result}}))
"""

        execution_result = self.execute(wrapped, timeout=timeout)

        if not execution_result["success"]:
            raise CodeExecutionError(f"Execution failed: {execution_result['error']}")

        # Parse result from stdout
        import json

        try:
            output = execution_result["stdout"].strip()
            if "__RESULT__" in output:
                # Extract JSON line
                for line in output.split("\n"):
                    if "__RESULT__" in line:
                        data = json.loads(line)
                        return data["__RESULT__"]
            raise ValueError("Result not found in output")
        except (json.JSONDecodeError, ValueError) as e:
            logger.warning(
                "failed_to_parse_result", error=str(e), stdout=execution_result["stdout"]
            )
            return execution_result["stdout"]

    def _indent_code(self, code: str, spaces: int) -> str:
        """Indent code by specified number of spaces."""
        indent = " " * spaces
        return "\n".join(indent + line if line.strip() else line for line in code.split("\n"))

    def _extract_output(self, text: str, start_marker: str, end_marker: str) -> str:
        """Extract content between markers."""
        try:
            start_idx = text.index(start_marker) + len(start_marker)
            end_idx = text.index(end_marker)
            return text[start_idx:end_idx].strip()
        except ValueError:
            # Markers not found, return original text
            return text.strip()


@lru_cache(maxsize=1)
def get_code_executor() -> ModalCodeExecutor:
    """Get or create singleton code executor instance (thread-safe via lru_cache)."""
    return ModalCodeExecutor()