Spaces:

DataQuests
/

DeepCritical

Running

VibecoderMcSwaggins commited on 12 days ago

Commit

901acc3

1 Parent(s): f32a2ca

refactor: address CodeRabbit nitpick suggestions

- Use Literal type for verdict field (type safety)
- Use lru_cache for thread-safe singleton pattern
- Fix ellipsis only appended when content is truncated
- Clarify docstrings for enable_modal_analysis vs modal_available
- Surface execution failures in verify_sandbox.py demo
- Gate StatisticalAnalyzer integration test on LLM keys

Files changed (4) hide show

examples/modal_demo/verify_sandbox.py +13 -4
src/services/statistical_analyzer.py +13 -14
src/utils/config.py +8 -2
tests/integration/test_modal.py +5 -1

examples/modal_demo/verify_sandbox.py CHANGED Viewed

@@ -15,6 +15,15 @@ from src.tools.code_execution import get_code_executor
 from src.utils.config import settings
 async def main() -> None:
     """Verify Modal sandbox isolation."""
     if not settings.modal_available:
@@ -33,7 +42,7 @@ async def main() -> None:
     print("Test 1: Check hostname (should NOT be your machine)")
     code1 = "import socket; print(f'Hostname: {socket.gethostname()}')"
     result1 = await loop.run_in_executor(None, partial(executor.execute, code1))
-    print(f"  {result1['stdout'].strip()}\n")
     # Test 2: Scientific libraries
     print("Test 2: Verify scientific libraries")
@@ -46,7 +55,7 @@ print(f"numpy: {np.__version__}")
 print(f"scipy: {scipy.__version__}")
 """
     result2 = await loop.run_in_executor(None, partial(executor.execute, code2))
-    print(f"  {result2['stdout'].strip()}\n")
     # Test 3: Network blocked
     print("Test 3: Verify network isolation")
@@ -59,7 +68,7 @@ except Exception:
     print("Network: BLOCKED (as expected)")
 """
     result3 = await loop.run_in_executor(None, partial(executor.execute, code3))
-    print(f"  {result3['stdout'].strip()}\n")
     # Test 4: Real statistics
     print("Test 4: Execute statistical analysis")
@@ -76,7 +85,7 @@ print(f"P-value: {p_val:.4f}")
 print(f"Verdict: {'SUPPORTED' if p_val < 0.05 else 'INCONCLUSIVE'}")
 """
     result4 = await loop.run_in_executor(None, partial(executor.execute, code4))
-    print(f"  {result4['stdout'].strip()}\n")
     print("=" * 60)
     print("All tests complete - Modal sandbox verified!")

 from src.utils.config import settings
+def print_result(result: dict) -> None:
+    """Print execution result, surfacing errors when they occur."""
+    if result.get("success"):
+        print(f"  {result['stdout'].strip()}\n")
+    else:
+        error = result.get("error") or result.get("stderr", "").strip() or "Unknown error"
+        print(f"  ERROR: {error}\n")
 async def main() -> None:
     """Verify Modal sandbox isolation."""
     if not settings.modal_available:
     print("Test 1: Check hostname (should NOT be your machine)")
     code1 = "import socket; print(f'Hostname: {socket.gethostname()}')"
     result1 = await loop.run_in_executor(None, partial(executor.execute, code1))
+    print_result(result1)
     # Test 2: Scientific libraries
     print("Test 2: Verify scientific libraries")
 print(f"scipy: {scipy.__version__}")
 """
     result2 = await loop.run_in_executor(None, partial(executor.execute, code2))
+    print_result(result2)
     # Test 3: Network blocked
     print("Test 3: Verify network isolation")
     print("Network: BLOCKED (as expected)")
 """
     result3 = await loop.run_in_executor(None, partial(executor.execute, code3))
+    print_result(result3)
     # Test 4: Real statistics
     print("Test 4: Execute statistical analysis")
 print(f"Verdict: {'SUPPORTED' if p_val < 0.05 else 'INCONCLUSIVE'}")
 """
     result4 = await loop.run_in_executor(None, partial(executor.execute, code4))
+    print_result(result4)
     print("=" * 60)
     print("All tests complete - Modal sandbox verified!")

src/services/statistical_analyzer.py CHANGED Viewed

@@ -9,8 +9,11 @@ The AnalysisAgent (in src/agents/) wraps this service for magentic mode.
 import asyncio
 import re
-from functools import partial
-from typing import Any
 from pydantic import BaseModel, Field
 from pydantic_ai import Agent
@@ -27,7 +30,7 @@ from src.utils.models import Evidence
 class AnalysisResult(BaseModel):
     """Result of statistical analysis."""
-    verdict: str = Field(
         description="SUPPORTED, REFUTED, or INCONCLUSIVE",
     )
     confidence: float = Field(ge=0.0, le=1.0, description="Confidence in verdict (0-1)")
@@ -175,7 +178,9 @@ Generate executable Python code to analyze this evidence."""
         lines = []
         for i, ev in enumerate(evidence[:5], 1):
-            lines.append(f"{i}. {ev.content[:200]}...")
             lines.append(f"   Source: {ev.citation.title}")
             lines.append(f"   Relevance: {ev.relevance:.0%}\n")
@@ -191,7 +196,7 @@ Generate executable Python code to analyze this evidence."""
         stdout_upper = stdout.upper()
         # Extract verdict with robust word-boundary matching
-        verdict = "INCONCLUSIVE"
         if re.search(r"\bSUPPORTED\b", stdout_upper) and not re.search(
             r"\b(?:NOT|UN)SUPPORTED\b", stdout_upper
         ):
@@ -244,13 +249,7 @@ Generate executable Python code to analyze this evidence."""
         return 0.70  # Default
-# Singleton for reuse
-_analyzer: StatisticalAnalyzer | None = None
 def get_statistical_analyzer() -> StatisticalAnalyzer:
-    """Get or create singleton StatisticalAnalyzer instance."""
-    global _analyzer
-    if _analyzer is None:
-        _analyzer = StatisticalAnalyzer()
-    return _analyzer

 import asyncio
 import re
+from functools import lru_cache, partial
+from typing import Any, Literal
+# Type alias for verdict values
+VerdictType = Literal["SUPPORTED", "REFUTED", "INCONCLUSIVE"]
 from pydantic import BaseModel, Field
 from pydantic_ai import Agent
 class AnalysisResult(BaseModel):
     """Result of statistical analysis."""
+    verdict: VerdictType = Field(
         description="SUPPORTED, REFUTED, or INCONCLUSIVE",
     )
     confidence: float = Field(ge=0.0, le=1.0, description="Confidence in verdict (0-1)")
         lines = []
         for i, ev in enumerate(evidence[:5], 1):
+            content = ev.content
+            truncated = content[:200] + ("..." if len(content) > 200 else "")
+            lines.append(f"{i}. {truncated}")
             lines.append(f"   Source: {ev.citation.title}")
             lines.append(f"   Relevance: {ev.relevance:.0%}\n")
         stdout_upper = stdout.upper()
         # Extract verdict with robust word-boundary matching
+        verdict: VerdictType = "INCONCLUSIVE"
         if re.search(r"\bSUPPORTED\b", stdout_upper) and not re.search(
             r"\b(?:NOT|UN)SUPPORTED\b", stdout_upper
         ):
         return 0.70  # Default
+@lru_cache(maxsize=1)
 def get_statistical_analyzer() -> StatisticalAnalyzer:
+    """Get or create singleton StatisticalAnalyzer instance (thread-safe via lru_cache)."""
+    return StatisticalAnalyzer()

src/utils/config.py CHANGED Viewed

@@ -57,12 +57,18 @@ class Settings(BaseSettings):
     modal_token_secret: str | None = Field(default=None, description="Modal token secret")
     chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")
     enable_modal_analysis: bool = Field(
-        default=False, description="Enable Modal sandbox analysis (Opt-in)"
     )
     @property
     def modal_available(self) -> bool:
-        """Check if Modal credentials are configured."""
         return bool(self.modal_token_id and self.modal_token_secret)
     def get_api_key(self) -> str:

     modal_token_secret: str | None = Field(default=None, description="Modal token secret")
     chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")
     enable_modal_analysis: bool = Field(
+        default=False,
+        description="Opt-in flag to enable Modal analysis. Must also have modal_available=True.",
     )
     @property
     def modal_available(self) -> bool:
+        """Check if Modal credentials are configured (credentials check only).
+        Note: This is a credentials check, NOT an opt-in flag.
+        Use `enable_modal_analysis` to opt-in, then check `modal_available` for credentials.
+        Typical usage: `if settings.enable_modal_analysis and settings.modal_available`
+        """
         return bool(self.modal_token_id and self.modal_token_secret)
     def get_api_key(self) -> str:

tests/integration/test_modal.py CHANGED Viewed

@@ -4,6 +4,9 @@ import pytest
 from src.utils.config import settings
 @pytest.mark.integration
 @pytest.mark.skipif(not settings.modal_available, reason="Modal not configured")
@@ -28,8 +31,9 @@ class TestModalIntegration:
         assert "6" in result["stdout"]
     @pytest.mark.asyncio
     async def test_statistical_analyzer_works(self) -> None:
-        """StatisticalAnalyzer should work end-to-end."""
         from src.services.statistical_analyzer import get_statistical_analyzer
         from src.utils.models import Citation, Evidence

 from src.utils.config import settings
+# Check if any LLM API key is available
+_llm_available = bool(settings.openai_api_key or settings.anthropic_api_key)
 @pytest.mark.integration
 @pytest.mark.skipif(not settings.modal_available, reason="Modal not configured")
         assert "6" in result["stdout"]
     @pytest.mark.asyncio
+    @pytest.mark.skipif(not _llm_available, reason="LLM API key not configured")
     async def test_statistical_analyzer_works(self) -> None:
+        """StatisticalAnalyzer should work end-to-end (requires Modal + LLM)."""
         from src.services.statistical_analyzer import get_statistical_analyzer
         from src.utils.models import Citation, Evidence