Spaces:

DataQuests
/

DeepCritical

Running

VibecoderMcSwaggins commited on 14 days ago

Commit

4732667

1 Parent(s): 35ebd09

fix: pydantic-ai v1 compatibility and Anthropic model update

- Use result.output instead of deprecated result.data (pydantic-ai v1)
- Update Anthropic model to claude-sonnet-4-5
- Remove redundant cast in judges.py
- Add orchestrator_demo example for end-to-end testing
- Fix NCBI_API_KEY placeholder handling in pubmed.py
- Update tests to use .output instead of .data

Files changed (7) hide show

examples/README.md +18 -13
examples/orchestrator_demo/run_agent.py +82 -0
src/agent_factory/judges.py +2 -2
src/tools/pubmed.py +3 -0
src/utils/config.py +1 -3
tests/unit/agent_factory/test_judges.py +2 -2
tests/unit/utils/test_config.py +2 -2

examples/README.md CHANGED Viewed

@@ -2,22 +2,27 @@
 Demo scripts for DeepCritical functionality.
-## search_demo
-Demonstrates Phase 2 search functionality:
 ```bash
-# Run with default query (metformin + Alzheimer's)
-uv run python examples/search_demo/run_search.py
-# Run with custom query
-uv run python examples/search_demo/run_search.py "aspirin heart disease"
 ```
-**What it does:**
-- Searches PubMed (biomedical literature)
-- Searches DuckDuckGo (web)
-- Runs both in parallel (scatter-gather)
-- Returns evidence with citations
-**Optional:** Set `NCBI_API_KEY` in `.env` for higher PubMed rate limits.

 Demo scripts for DeepCritical functionality.
+## 1. Search Demo (Phase 2)
+Demonstrates parallel search across PubMed and Web. **No API keys required.**
 ```bash
+uv run python examples/search_demo/run_search.py "metformin cancer"
 ```
+## 2. Agent Demo (Phase 4)
+Demonstrates the full search-judge-synthesize loop.
+**Option A: Mock Mode (No Keys)**
+Test the logic/mechanics without an LLM.
+```bash
+uv run python examples/orchestrator_demo/run_agent.py "metformin cancer" --mock
+```
+**Option B: Real Mode (Requires Keys)**
+Uses the real LLM Judge to evaluate evidence.
+Requires `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` in `.env`.
+```bash
+uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
+```

examples/orchestrator_demo/run_agent.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/usr/bin/env python3
+"""
+Demo: Full DeepCritical Agent Loop (Search + Judge + Orchestrator).
+This script demonstrates Phase 4 functionality:
+- Iterative Search (PubMed + Web)
+- Evidence Evaluation (Judge Agent)
+- Orchestration Loop
+- Final Synthesis
+Usage:
+    # Run with Mock Judge (No API Key needed)
+    uv run python examples/orchestrator_demo/run_agent.py "metformin cancer" --mock
+    # Run with Real Judge (Requires OPENAI_API_KEY or ANTHROPIC_API_KEY)
+    uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
+"""
+import argparse
+import asyncio
+import os
+import sys
+from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
+from src.orchestrator import Orchestrator
+from src.tools.pubmed import PubMedTool
+from src.tools.search_handler import SearchHandler
+from src.tools.websearch import WebTool
+from src.utils.models import OrchestratorConfig
+async def main() -> None:
+    """Run the agent demo."""
+    parser = argparse.ArgumentParser(description="Run DeepCritical Agent CLI")
+    parser.add_argument("query", help="Research query (e.g., 'metformin cancer')")
+    parser.add_argument("--mock", action="store_true", help="Use Mock Judge (no API key needed)")
+    parser.add_argument("--iterations", type=int, default=3, help="Max iterations")
+    args = parser.parse_args()
+    # Check for keys if not mocking
+    if not args.mock and not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
+        print("Error: No API key found. Set OPENAI_API_KEY or ANTHROPIC_API_KEY, or use --mock.")
+        sys.exit(1)
+    print(f"\n{'='*60}")
+    print("DeepCritical Agent Demo")
+    print(f"Query: {args.query}")
+    print(f"Mode: {'MOCK' if args.mock else 'REAL (LLM)'}")
+    print(f"{ '='*60}\n")
+    # 1. Setup Search Tools
+    search_handler = SearchHandler(tools=[PubMedTool(), WebTool()], timeout=30.0)
+    # 2. Setup Judge
+    judge_handler: JudgeHandler | MockJudgeHandler
+    if args.mock:
+        judge_handler = MockJudgeHandler()
+    else:
+        judge_handler = JudgeHandler()
+    # 3. Setup Orchestrator
+    config = OrchestratorConfig(max_iterations=args.iterations)
+    orchestrator = Orchestrator(
+        search_handler=search_handler, judge_handler=judge_handler, config=config
+    )
+    # 4. Run Loop
+    try:
+        async for event in orchestrator.run(args.query):
+            # Print event with icon
+            print(event.to_markdown().replace("**", ""))
+            # If we got data, print a snippet
+            if event.type == "search_complete" and event.data:
+                print(f"   -> Found {event.data.get('new_count', 0)} new items")
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+if __name__ == "__main__":
+    asyncio.run(main())

src/agent_factory/judges.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Judge handler for evidence assessment using PydanticAI."""
-from typing import Any, cast
 import structlog
 from pydantic_ai import Agent
@@ -86,7 +86,7 @@ class JudgeHandler:
         try:
             # Run the agent with structured output
             result = await self.agent.run(user_prompt)
-            assessment = cast(JudgeAssessment, result.data)  # type: ignore[attr-defined]
             logger.info(
                 "Assessment complete",

 """Judge handler for evidence assessment using PydanticAI."""
+from typing import Any
 import structlog
 from pydantic_ai import Agent
         try:
             # Run the agent with structured output
             result = await self.agent.run(user_prompt)
+            assessment = result.output
             logger.info(
                 "Assessment complete",

src/tools/pubmed.py CHANGED Viewed

@@ -21,6 +21,9 @@ class PubMedTool:
     def __init__(self, api_key: str | None = None) -> None:
         self.api_key = api_key or settings.ncbi_api_key
         self._last_request_time = 0.0
     @property

     def __init__(self, api_key: str | None = None) -> None:
         self.api_key = api_key or settings.ncbi_api_key
+        # Ignore placeholder values from .env.example
+        if self.api_key and "your-ncbi-key-here" in self.api_key:
+            self.api_key = None
         self._last_request_time = 0.0
     @property

src/utils/config.py CHANGED Viewed

@@ -27,9 +27,7 @@ class Settings(BaseSettings):
         default="openai", description="Which LLM provider to use"
     )
     openai_model: str = Field(default="gpt-4o", description="OpenAI model name")
-    anthropic_model: str = Field(
-        default="claude-3-5-sonnet-20241022", description="Anthropic model"
-    )
     # PubMed Configuration
     ncbi_api_key: str | None = Field(

         default="openai", description="Which LLM provider to use"
     )
     openai_model: str = Field(default="gpt-4o", description="OpenAI model name")
+    anthropic_model: str = Field(default="claude-sonnet-4-5", description="Anthropic model")
     # PubMed Configuration
     ncbi_api_key: str | None = Field(

tests/unit/agent_factory/test_judges.py CHANGED Viewed

@@ -34,7 +34,7 @@ class TestJudgeHandler:
         # Mock the PydanticAI agent
         mock_result = MagicMock()
-        mock_result.data = mock_assessment
         with (
             patch("src.agent_factory.judges.get_model") as mock_get_model,
@@ -88,7 +88,7 @@ class TestJudgeHandler:
         )
         mock_result = MagicMock()
-        mock_result.data = mock_assessment
         with (
             patch("src.agent_factory.judges.get_model") as mock_get_model,

         # Mock the PydanticAI agent
         mock_result = MagicMock()
+        mock_result.output = mock_assessment
         with (
             patch("src.agent_factory.judges.get_model") as mock_get_model,
         )
         mock_result = MagicMock()
+        mock_result.output = mock_assessment
         with (
             patch("src.agent_factory.judges.get_model") as mock_get_model,

tests/unit/utils/test_config.py CHANGED Viewed

@@ -40,7 +40,7 @@ class TestSettings:
     def test_get_api_key_openai_missing_raises(self):
         """get_api_key should raise ConfigurationError when OpenAI key is not set."""
         with patch.dict(os.environ, {"LLM_PROVIDER": "openai"}, clear=True):
-            settings = Settings()
             with pytest.raises(ConfigurationError, match="OPENAI_API_KEY not set"):
                 settings.get_api_key()
@@ -55,6 +55,6 @@ class TestSettings:
     def test_get_api_key_anthropic_missing_raises(self):
         """get_api_key should raise ConfigurationError when Anthropic key is not set."""
         with patch.dict(os.environ, {"LLM_PROVIDER": "anthropic"}, clear=True):
-            settings = Settings()
             with pytest.raises(ConfigurationError, match="ANTHROPIC_API_KEY not set"):
                 settings.get_api_key()

     def test_get_api_key_openai_missing_raises(self):
         """get_api_key should raise ConfigurationError when OpenAI key is not set."""
         with patch.dict(os.environ, {"LLM_PROVIDER": "openai"}, clear=True):
+            settings = Settings(_env_file=None)
             with pytest.raises(ConfigurationError, match="OPENAI_API_KEY not set"):
                 settings.get_api_key()
     def test_get_api_key_anthropic_missing_raises(self):
         """get_api_key should raise ConfigurationError when Anthropic key is not set."""
         with patch.dict(os.environ, {"LLM_PROVIDER": "anthropic"}, clear=True):
+            settings = Settings(_env_file=None)
             with pytest.raises(ConfigurationError, match="ANTHROPIC_API_KEY not set"):
                 settings.get_api_key()