Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

DeepCritical / src /tools /code_execution.py

VibecoderMcSwaggins

refactor: address final CodeRabbit feedback

b50fffd 14 days ago

raw

history blame

8.81 kB

	"""Modal-based secure code execution tool for statistical analysis.

	This module provides sandboxed Python code execution using Modal's serverless infrastructure.
	It's designed for running LLM-generated statistical analysis code safely.
	"""

	import os
	from functools import lru_cache
	from typing import Any

	import structlog

	logger = structlog.get_logger(__name__)

	# Shared library versions for Modal sandbox - used by both executor and LLM prompts
	# Keep these in sync to avoid version mismatch between generated code and execution
	SANDBOX_LIBRARIES: dict[str, str] = {
	"pandas": "2.2.0",
	"numpy": "1.26.4",
	"scipy": "1.11.4",
	"matplotlib": "3.8.2",
	"scikit-learn": "1.4.0",
	"statsmodels": "0.14.1",
	}


	def get_sandbox_library_list() -> list[str]:
	"""Get list of library==version strings for Modal image."""
	return [f"{lib}=={ver}" for lib, ver in SANDBOX_LIBRARIES.items()]


	def get_sandbox_library_prompt() -> str:
	"""Get formatted library versions for LLM prompts."""
	return "\n".join(f"- {lib}=={ver}" for lib, ver in SANDBOX_LIBRARIES.items())


	class CodeExecutionError(Exception):
	"""Raised when code execution fails."""

	pass


	class ModalCodeExecutor:
	"""Execute Python code securely using Modal sandboxes.

	This class provides a safe environment for executing LLM-generated code,
	particularly for scientific computing and statistical analysis tasks.

	Features:
	- Sandboxed execution (isolated from host system)
	- Pre-installed scientific libraries (numpy, scipy, pandas, matplotlib)
	- Network isolation for security
	- Timeout protection
	- Stdout/stderr capture

	Example:
	>>> executor = ModalCodeExecutor()
	>>> result = executor.execute('''
	... import pandas as pd
	... df = pd.DataFrame({'a': [1, 2, 3]})
	... result = df['a'].sum()
	... ''')
	>>> print(result['stdout'])
	6
	"""

	def __init__(self) -> None:
	"""Initialize Modal code executor.

	Note:
	Logs a warning if Modal credentials are not configured.
	Execution will fail at runtime without valid credentials.
	"""
	# Check for Modal credentials
	self.modal_token_id = os.getenv("MODAL_TOKEN_ID")
	self.modal_token_secret = os.getenv("MODAL_TOKEN_SECRET")

	if not self.modal_token_id or not self.modal_token_secret:
	logger.warning(
	"Modal credentials not found. Code execution will fail unless modal setup is run."
	)

	def execute(self, code: str, timeout: int = 60, allow_network: bool = False) -> dict[str, Any]:
	"""Execute Python code in a Modal sandbox.

	Args:
	code: Python code to execute
	timeout: Maximum execution time in seconds (default: 60)
	allow_network: Whether to allow network access (default: False for security)

	Returns:
	Dictionary containing:
	- stdout: Standard output from code execution
	- stderr: Standard error from code execution
	- success: Boolean indicating if execution succeeded
	- error: Error message if execution failed

	Raises:
	CodeExecutionError: If execution fails or times out
	"""
	try:
	import modal
	except ImportError as e:
	raise CodeExecutionError(
	"Modal SDK not installed. Run: uv sync or pip install modal>=0.63.0"
	) from e

	logger.info("executing_code", code_length=len(code), timeout=timeout)

	try:
	# Create or lookup Modal app
	app = modal.App.lookup("deepcritical-code-execution", create_if_missing=True)

	# Define scientific computing image with common libraries
	scientific_image = modal.Image.debian_slim(python_version="3.11").uv_pip_install(
	*get_sandbox_library_list()
	)

	# Create sandbox with security restrictions
	sandbox = modal.Sandbox.create(
	app=app,
	image=scientific_image,
	timeout=timeout,
	block_network=not allow_network, # Wire the network control
	)

	try:
	# Execute the code
	# Wrap code to capture result
	wrapped_code = f"""
	import sys
	import io
	from contextlib import redirect_stdout, redirect_stderr

	stdout_io = io.StringIO()
	stderr_io = io.StringIO()

	try:
	with redirect_stdout(stdout_io), redirect_stderr(stderr_io):
	{self._indent_code(code, 8)}
	print("__EXECUTION_SUCCESS__")
	except Exception as e:
	print(f"__EXECUTION_ERROR__: {{type(e).__name__}}: {{e}}", file=sys.stderr)

	print("__STDOUT_START__")
	print(stdout_io.getvalue())
	print("__STDOUT_END__")
	print("__STDERR_START__")
	print(stderr_io.getvalue(), file=sys.stderr)
	print("__STDERR_END__", file=sys.stderr)
	"""

	# Run the wrapped code
	process = sandbox.exec("python", "-c", wrapped_code, timeout=timeout)

	# Read output
	stdout_raw = process.stdout.read()
	stderr_raw = process.stderr.read()
	finally:
	# Always clean up sandbox to prevent resource leaks
	sandbox.terminate()

	# Parse output
	success = "__EXECUTION_SUCCESS__" in stdout_raw

	# Extract actual stdout/stderr
	stdout = self._extract_output(stdout_raw, "__STDOUT_START__", "__STDOUT_END__")
	stderr = self._extract_output(stderr_raw, "__STDERR_START__", "__STDERR_END__")

	result = {
	"stdout": stdout,
	"stderr": stderr,
	"success": success,
	"error": stderr if not success else None,
	}

	logger.info(
	"code_execution_completed",
	success=success,
	stdout_length=len(stdout),
	stderr_length=len(stderr),
	)

	return result

	except Exception as e:
	logger.error("code_execution_failed", error=str(e), error_type=type(e).__name__)
	raise CodeExecutionError(f"Code execution failed: {e}") from e

	def execute_with_return(self, code: str, timeout: int = 60) -> Any:
	"""Execute code and return the value of the 'result' variable.

	Convenience method that executes code and extracts a return value.
	The code should assign its final result to a variable named 'result'.

	Args:
	code: Python code to execute (must set 'result' variable)
	timeout: Maximum execution time in seconds

	Returns:
	The value of the 'result' variable from the executed code

	Example:
	>>> executor.execute_with_return("result = 2 + 2")
	4
	"""
	# Modify code to print result as JSON
	wrapped = f"""
	import json
	{code}
	print(json.dumps({{"__RESULT__": result}}))
	"""

	execution_result = self.execute(wrapped, timeout=timeout)

	if not execution_result["success"]:
	raise CodeExecutionError(f"Execution failed: {execution_result['error']}")

	# Parse result from stdout
	import json

	try:
	output = execution_result["stdout"].strip()
	if "__RESULT__" in output:
	# Extract JSON line
	for line in output.split("\n"):
	if "__RESULT__" in line:
	data = json.loads(line)
	return data["__RESULT__"]
	raise ValueError("Result not found in output")
	except (json.JSONDecodeError, ValueError) as e:
	logger.warning(
	"failed_to_parse_result", error=str(e), stdout=execution_result["stdout"]
	)
	return execution_result["stdout"]

	def _indent_code(self, code: str, spaces: int) -> str:
	"""Indent code by specified number of spaces."""
	indent = " " * spaces
	return "\n".join(indent + line if line.strip() else line for line in code.split("\n"))

	def _extract_output(self, text: str, start_marker: str, end_marker: str) -> str:
	"""Extract content between markers."""
	try:
	start_idx = text.index(start_marker) + len(start_marker)
	end_idx = text.index(end_marker)
	return text[start_idx:end_idx].strip()
	except ValueError:
	# Markers not found, return original text
	return text.strip()


	@lru_cache(maxsize=1)
	def get_code_executor() -> ModalCodeExecutor:
	"""Get or create singleton code executor instance (thread-safe via lru_cache)."""
	return ModalCodeExecutor()