Spaces:
Running
Running
Commit
·
901acc3
1
Parent(s):
f32a2ca
refactor: address CodeRabbit nitpick suggestions
Browse files- Use Literal type for verdict field (type safety)
- Use lru_cache for thread-safe singleton pattern
- Fix ellipsis only appended when content is truncated
- Clarify docstrings for enable_modal_analysis vs modal_available
- Surface execution failures in verify_sandbox.py demo
- Gate StatisticalAnalyzer integration test on LLM keys
examples/modal_demo/verify_sandbox.py
CHANGED
|
@@ -15,6 +15,15 @@ from src.tools.code_execution import get_code_executor
|
|
| 15 |
from src.utils.config import settings
|
| 16 |
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
async def main() -> None:
|
| 19 |
"""Verify Modal sandbox isolation."""
|
| 20 |
if not settings.modal_available:
|
|
@@ -33,7 +42,7 @@ async def main() -> None:
|
|
| 33 |
print("Test 1: Check hostname (should NOT be your machine)")
|
| 34 |
code1 = "import socket; print(f'Hostname: {socket.gethostname()}')"
|
| 35 |
result1 = await loop.run_in_executor(None, partial(executor.execute, code1))
|
| 36 |
-
|
| 37 |
|
| 38 |
# Test 2: Scientific libraries
|
| 39 |
print("Test 2: Verify scientific libraries")
|
|
@@ -46,7 +55,7 @@ print(f"numpy: {np.__version__}")
|
|
| 46 |
print(f"scipy: {scipy.__version__}")
|
| 47 |
"""
|
| 48 |
result2 = await loop.run_in_executor(None, partial(executor.execute, code2))
|
| 49 |
-
|
| 50 |
|
| 51 |
# Test 3: Network blocked
|
| 52 |
print("Test 3: Verify network isolation")
|
|
@@ -59,7 +68,7 @@ except Exception:
|
|
| 59 |
print("Network: BLOCKED (as expected)")
|
| 60 |
"""
|
| 61 |
result3 = await loop.run_in_executor(None, partial(executor.execute, code3))
|
| 62 |
-
|
| 63 |
|
| 64 |
# Test 4: Real statistics
|
| 65 |
print("Test 4: Execute statistical analysis")
|
|
@@ -76,7 +85,7 @@ print(f"P-value: {p_val:.4f}")
|
|
| 76 |
print(f"Verdict: {'SUPPORTED' if p_val < 0.05 else 'INCONCLUSIVE'}")
|
| 77 |
"""
|
| 78 |
result4 = await loop.run_in_executor(None, partial(executor.execute, code4))
|
| 79 |
-
|
| 80 |
|
| 81 |
print("=" * 60)
|
| 82 |
print("All tests complete - Modal sandbox verified!")
|
|
|
|
| 15 |
from src.utils.config import settings
|
| 16 |
|
| 17 |
|
| 18 |
+
def print_result(result: dict) -> None:
|
| 19 |
+
"""Print execution result, surfacing errors when they occur."""
|
| 20 |
+
if result.get("success"):
|
| 21 |
+
print(f" {result['stdout'].strip()}\n")
|
| 22 |
+
else:
|
| 23 |
+
error = result.get("error") or result.get("stderr", "").strip() or "Unknown error"
|
| 24 |
+
print(f" ERROR: {error}\n")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
async def main() -> None:
|
| 28 |
"""Verify Modal sandbox isolation."""
|
| 29 |
if not settings.modal_available:
|
|
|
|
| 42 |
print("Test 1: Check hostname (should NOT be your machine)")
|
| 43 |
code1 = "import socket; print(f'Hostname: {socket.gethostname()}')"
|
| 44 |
result1 = await loop.run_in_executor(None, partial(executor.execute, code1))
|
| 45 |
+
print_result(result1)
|
| 46 |
|
| 47 |
# Test 2: Scientific libraries
|
| 48 |
print("Test 2: Verify scientific libraries")
|
|
|
|
| 55 |
print(f"scipy: {scipy.__version__}")
|
| 56 |
"""
|
| 57 |
result2 = await loop.run_in_executor(None, partial(executor.execute, code2))
|
| 58 |
+
print_result(result2)
|
| 59 |
|
| 60 |
# Test 3: Network blocked
|
| 61 |
print("Test 3: Verify network isolation")
|
|
|
|
| 68 |
print("Network: BLOCKED (as expected)")
|
| 69 |
"""
|
| 70 |
result3 = await loop.run_in_executor(None, partial(executor.execute, code3))
|
| 71 |
+
print_result(result3)
|
| 72 |
|
| 73 |
# Test 4: Real statistics
|
| 74 |
print("Test 4: Execute statistical analysis")
|
|
|
|
| 85 |
print(f"Verdict: {'SUPPORTED' if p_val < 0.05 else 'INCONCLUSIVE'}")
|
| 86 |
"""
|
| 87 |
result4 = await loop.run_in_executor(None, partial(executor.execute, code4))
|
| 88 |
+
print_result(result4)
|
| 89 |
|
| 90 |
print("=" * 60)
|
| 91 |
print("All tests complete - Modal sandbox verified!")
|
src/services/statistical_analyzer.py
CHANGED
|
@@ -9,8 +9,11 @@ The AnalysisAgent (in src/agents/) wraps this service for magentic mode.
|
|
| 9 |
|
| 10 |
import asyncio
|
| 11 |
import re
|
| 12 |
-
from functools import partial
|
| 13 |
-
from typing import Any
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
from pydantic import BaseModel, Field
|
| 16 |
from pydantic_ai import Agent
|
|
@@ -27,7 +30,7 @@ from src.utils.models import Evidence
|
|
| 27 |
class AnalysisResult(BaseModel):
|
| 28 |
"""Result of statistical analysis."""
|
| 29 |
|
| 30 |
-
verdict:
|
| 31 |
description="SUPPORTED, REFUTED, or INCONCLUSIVE",
|
| 32 |
)
|
| 33 |
confidence: float = Field(ge=0.0, le=1.0, description="Confidence in verdict (0-1)")
|
|
@@ -175,7 +178,9 @@ Generate executable Python code to analyze this evidence."""
|
|
| 175 |
|
| 176 |
lines = []
|
| 177 |
for i, ev in enumerate(evidence[:5], 1):
|
| 178 |
-
|
|
|
|
|
|
|
| 179 |
lines.append(f" Source: {ev.citation.title}")
|
| 180 |
lines.append(f" Relevance: {ev.relevance:.0%}\n")
|
| 181 |
|
|
@@ -191,7 +196,7 @@ Generate executable Python code to analyze this evidence."""
|
|
| 191 |
stdout_upper = stdout.upper()
|
| 192 |
|
| 193 |
# Extract verdict with robust word-boundary matching
|
| 194 |
-
verdict = "INCONCLUSIVE"
|
| 195 |
if re.search(r"\bSUPPORTED\b", stdout_upper) and not re.search(
|
| 196 |
r"\b(?:NOT|UN)SUPPORTED\b", stdout_upper
|
| 197 |
):
|
|
@@ -244,13 +249,7 @@ Generate executable Python code to analyze this evidence."""
|
|
| 244 |
return 0.70 # Default
|
| 245 |
|
| 246 |
|
| 247 |
-
|
| 248 |
-
_analyzer: StatisticalAnalyzer | None = None
|
| 249 |
-
|
| 250 |
-
|
| 251 |
def get_statistical_analyzer() -> StatisticalAnalyzer:
|
| 252 |
-
"""Get or create singleton StatisticalAnalyzer instance."""
|
| 253 |
-
|
| 254 |
-
if _analyzer is None:
|
| 255 |
-
_analyzer = StatisticalAnalyzer()
|
| 256 |
-
return _analyzer
|
|
|
|
| 9 |
|
| 10 |
import asyncio
|
| 11 |
import re
|
| 12 |
+
from functools import lru_cache, partial
|
| 13 |
+
from typing import Any, Literal
|
| 14 |
+
|
| 15 |
+
# Type alias for verdict values
|
| 16 |
+
VerdictType = Literal["SUPPORTED", "REFUTED", "INCONCLUSIVE"]
|
| 17 |
|
| 18 |
from pydantic import BaseModel, Field
|
| 19 |
from pydantic_ai import Agent
|
|
|
|
| 30 |
class AnalysisResult(BaseModel):
|
| 31 |
"""Result of statistical analysis."""
|
| 32 |
|
| 33 |
+
verdict: VerdictType = Field(
|
| 34 |
description="SUPPORTED, REFUTED, or INCONCLUSIVE",
|
| 35 |
)
|
| 36 |
confidence: float = Field(ge=0.0, le=1.0, description="Confidence in verdict (0-1)")
|
|
|
|
| 178 |
|
| 179 |
lines = []
|
| 180 |
for i, ev in enumerate(evidence[:5], 1):
|
| 181 |
+
content = ev.content
|
| 182 |
+
truncated = content[:200] + ("..." if len(content) > 200 else "")
|
| 183 |
+
lines.append(f"{i}. {truncated}")
|
| 184 |
lines.append(f" Source: {ev.citation.title}")
|
| 185 |
lines.append(f" Relevance: {ev.relevance:.0%}\n")
|
| 186 |
|
|
|
|
| 196 |
stdout_upper = stdout.upper()
|
| 197 |
|
| 198 |
# Extract verdict with robust word-boundary matching
|
| 199 |
+
verdict: VerdictType = "INCONCLUSIVE"
|
| 200 |
if re.search(r"\bSUPPORTED\b", stdout_upper) and not re.search(
|
| 201 |
r"\b(?:NOT|UN)SUPPORTED\b", stdout_upper
|
| 202 |
):
|
|
|
|
| 249 |
return 0.70 # Default
|
| 250 |
|
| 251 |
|
| 252 |
+
@lru_cache(maxsize=1)
|
|
|
|
|
|
|
|
|
|
| 253 |
def get_statistical_analyzer() -> StatisticalAnalyzer:
|
| 254 |
+
"""Get or create singleton StatisticalAnalyzer instance (thread-safe via lru_cache)."""
|
| 255 |
+
return StatisticalAnalyzer()
|
|
|
|
|
|
|
|
|
src/utils/config.py
CHANGED
|
@@ -57,12 +57,18 @@ class Settings(BaseSettings):
|
|
| 57 |
modal_token_secret: str | None = Field(default=None, description="Modal token secret")
|
| 58 |
chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")
|
| 59 |
enable_modal_analysis: bool = Field(
|
| 60 |
-
default=False,
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
@property
|
| 64 |
def modal_available(self) -> bool:
|
| 65 |
-
"""Check if Modal credentials are configured.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
return bool(self.modal_token_id and self.modal_token_secret)
|
| 67 |
|
| 68 |
def get_api_key(self) -> str:
|
|
|
|
| 57 |
modal_token_secret: str | None = Field(default=None, description="Modal token secret")
|
| 58 |
chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")
|
| 59 |
enable_modal_analysis: bool = Field(
|
| 60 |
+
default=False,
|
| 61 |
+
description="Opt-in flag to enable Modal analysis. Must also have modal_available=True.",
|
| 62 |
)
|
| 63 |
|
| 64 |
@property
|
| 65 |
def modal_available(self) -> bool:
|
| 66 |
+
"""Check if Modal credentials are configured (credentials check only).
|
| 67 |
+
|
| 68 |
+
Note: This is a credentials check, NOT an opt-in flag.
|
| 69 |
+
Use `enable_modal_analysis` to opt-in, then check `modal_available` for credentials.
|
| 70 |
+
Typical usage: `if settings.enable_modal_analysis and settings.modal_available`
|
| 71 |
+
"""
|
| 72 |
return bool(self.modal_token_id and self.modal_token_secret)
|
| 73 |
|
| 74 |
def get_api_key(self) -> str:
|
tests/integration/test_modal.py
CHANGED
|
@@ -4,6 +4,9 @@ import pytest
|
|
| 4 |
|
| 5 |
from src.utils.config import settings
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
@pytest.mark.integration
|
| 9 |
@pytest.mark.skipif(not settings.modal_available, reason="Modal not configured")
|
|
@@ -28,8 +31,9 @@ class TestModalIntegration:
|
|
| 28 |
assert "6" in result["stdout"]
|
| 29 |
|
| 30 |
@pytest.mark.asyncio
|
|
|
|
| 31 |
async def test_statistical_analyzer_works(self) -> None:
|
| 32 |
-
"""StatisticalAnalyzer should work end-to-end."""
|
| 33 |
from src.services.statistical_analyzer import get_statistical_analyzer
|
| 34 |
from src.utils.models import Citation, Evidence
|
| 35 |
|
|
|
|
| 4 |
|
| 5 |
from src.utils.config import settings
|
| 6 |
|
| 7 |
+
# Check if any LLM API key is available
|
| 8 |
+
_llm_available = bool(settings.openai_api_key or settings.anthropic_api_key)
|
| 9 |
+
|
| 10 |
|
| 11 |
@pytest.mark.integration
|
| 12 |
@pytest.mark.skipif(not settings.modal_available, reason="Modal not configured")
|
|
|
|
| 31 |
assert "6" in result["stdout"]
|
| 32 |
|
| 33 |
@pytest.mark.asyncio
|
| 34 |
+
@pytest.mark.skipif(not _llm_available, reason="LLM API key not configured")
|
| 35 |
async def test_statistical_analyzer_works(self) -> None:
|
| 36 |
+
"""StatisticalAnalyzer should work end-to-end (requires Modal + LLM)."""
|
| 37 |
from src.services.statistical_analyzer import get_statistical_analyzer
|
| 38 |
from src.utils.models import Citation, Evidence
|
| 39 |
|