VibecoderMcSwaggins commited on
Commit
4732667
·
1 Parent(s): 35ebd09

fix: pydantic-ai v1 compatibility and Anthropic model update

Browse files

- Use result.output instead of deprecated result.data (pydantic-ai v1)
- Update Anthropic model to claude-sonnet-4-5
- Remove redundant cast in judges.py
- Add orchestrator_demo example for end-to-end testing
- Fix NCBI_API_KEY placeholder handling in pubmed.py
- Update tests to use .output instead of .data

examples/README.md CHANGED
@@ -2,22 +2,27 @@
2
 
3
  Demo scripts for DeepCritical functionality.
4
 
5
- ## search_demo
6
 
7
- Demonstrates Phase 2 search functionality:
8
 
9
  ```bash
10
- # Run with default query (metformin + Alzheimer's)
11
- uv run python examples/search_demo/run_search.py
12
-
13
- # Run with custom query
14
- uv run python examples/search_demo/run_search.py "aspirin heart disease"
15
  ```
16
 
17
- **What it does:**
18
- - Searches PubMed (biomedical literature)
19
- - Searches DuckDuckGo (web)
20
- - Runs both in parallel (scatter-gather)
21
- - Returns evidence with citations
 
 
 
 
22
 
23
- **Optional:** Set `NCBI_API_KEY` in `.env` for higher PubMed rate limits.
 
 
 
 
 
 
2
 
3
  Demo scripts for DeepCritical functionality.
4
 
5
+ ## 1. Search Demo (Phase 2)
6
 
7
+ Demonstrates parallel search across PubMed and Web. **No API keys required.**
8
 
9
  ```bash
10
+ uv run python examples/search_demo/run_search.py "metformin cancer"
 
 
 
 
11
  ```
12
 
13
+ ## 2. Agent Demo (Phase 4)
14
+
15
+ Demonstrates the full search-judge-synthesize loop.
16
+
17
+ **Option A: Mock Mode (No Keys)**
18
+ Test the logic/mechanics without an LLM.
19
+ ```bash
20
+ uv run python examples/orchestrator_demo/run_agent.py "metformin cancer" --mock
21
+ ```
22
 
23
+ **Option B: Real Mode (Requires Keys)**
24
+ Uses the real LLM Judge to evaluate evidence.
25
+ Requires `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` in `.env`.
26
+ ```bash
27
+ uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
28
+ ```
examples/orchestrator_demo/run_agent.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demo: Full DeepCritical Agent Loop (Search + Judge + Orchestrator).
4
+
5
+ This script demonstrates Phase 4 functionality:
6
+ - Iterative Search (PubMed + Web)
7
+ - Evidence Evaluation (Judge Agent)
8
+ - Orchestration Loop
9
+ - Final Synthesis
10
+
11
+ Usage:
12
+ # Run with Mock Judge (No API Key needed)
13
+ uv run python examples/orchestrator_demo/run_agent.py "metformin cancer" --mock
14
+
15
+ # Run with Real Judge (Requires OPENAI_API_KEY or ANTHROPIC_API_KEY)
16
+ uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
17
+ """
18
+
19
+ import argparse
20
+ import asyncio
21
+ import os
22
+ import sys
23
+
24
+ from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
25
+ from src.orchestrator import Orchestrator
26
+ from src.tools.pubmed import PubMedTool
27
+ from src.tools.search_handler import SearchHandler
28
+ from src.tools.websearch import WebTool
29
+ from src.utils.models import OrchestratorConfig
30
+
31
+
32
+ async def main() -> None:
33
+ """Run the agent demo."""
34
+ parser = argparse.ArgumentParser(description="Run DeepCritical Agent CLI")
35
+ parser.add_argument("query", help="Research query (e.g., 'metformin cancer')")
36
+ parser.add_argument("--mock", action="store_true", help="Use Mock Judge (no API key needed)")
37
+ parser.add_argument("--iterations", type=int, default=3, help="Max iterations")
38
+ args = parser.parse_args()
39
+
40
+ # Check for keys if not mocking
41
+ if not args.mock and not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
42
+ print("Error: No API key found. Set OPENAI_API_KEY or ANTHROPIC_API_KEY, or use --mock.")
43
+ sys.exit(1)
44
+
45
+ print(f"\n{'='*60}")
46
+ print("DeepCritical Agent Demo")
47
+ print(f"Query: {args.query}")
48
+ print(f"Mode: {'MOCK' if args.mock else 'REAL (LLM)'}")
49
+ print(f"{ '='*60}\n")
50
+
51
+ # 1. Setup Search Tools
52
+ search_handler = SearchHandler(tools=[PubMedTool(), WebTool()], timeout=30.0)
53
+
54
+ # 2. Setup Judge
55
+ judge_handler: JudgeHandler | MockJudgeHandler
56
+ if args.mock:
57
+ judge_handler = MockJudgeHandler()
58
+ else:
59
+ judge_handler = JudgeHandler()
60
+
61
+ # 3. Setup Orchestrator
62
+ config = OrchestratorConfig(max_iterations=args.iterations)
63
+ orchestrator = Orchestrator(
64
+ search_handler=search_handler, judge_handler=judge_handler, config=config
65
+ )
66
+
67
+ # 4. Run Loop
68
+ try:
69
+ async for event in orchestrator.run(args.query):
70
+ # Print event with icon
71
+ print(event.to_markdown().replace("**", ""))
72
+
73
+ # If we got data, print a snippet
74
+ if event.type == "search_complete" and event.data:
75
+ print(f" -> Found {event.data.get('new_count', 0)} new items")
76
+
77
+ except Exception as e:
78
+ print(f"\n❌ Error: {e}")
79
+
80
+
81
+ if __name__ == "__main__":
82
+ asyncio.run(main())
src/agent_factory/judges.py CHANGED
@@ -1,6 +1,6 @@
1
  """Judge handler for evidence assessment using PydanticAI."""
2
 
3
- from typing import Any, cast
4
 
5
  import structlog
6
  from pydantic_ai import Agent
@@ -86,7 +86,7 @@ class JudgeHandler:
86
  try:
87
  # Run the agent with structured output
88
  result = await self.agent.run(user_prompt)
89
- assessment = cast(JudgeAssessment, result.data) # type: ignore[attr-defined]
90
 
91
  logger.info(
92
  "Assessment complete",
 
1
  """Judge handler for evidence assessment using PydanticAI."""
2
 
3
+ from typing import Any
4
 
5
  import structlog
6
  from pydantic_ai import Agent
 
86
  try:
87
  # Run the agent with structured output
88
  result = await self.agent.run(user_prompt)
89
+ assessment = result.output
90
 
91
  logger.info(
92
  "Assessment complete",
src/tools/pubmed.py CHANGED
@@ -21,6 +21,9 @@ class PubMedTool:
21
 
22
  def __init__(self, api_key: str | None = None) -> None:
23
  self.api_key = api_key or settings.ncbi_api_key
 
 
 
24
  self._last_request_time = 0.0
25
 
26
  @property
 
21
 
22
  def __init__(self, api_key: str | None = None) -> None:
23
  self.api_key = api_key or settings.ncbi_api_key
24
+ # Ignore placeholder values from .env.example
25
+ if self.api_key and "your-ncbi-key-here" in self.api_key:
26
+ self.api_key = None
27
  self._last_request_time = 0.0
28
 
29
  @property
src/utils/config.py CHANGED
@@ -27,9 +27,7 @@ class Settings(BaseSettings):
27
  default="openai", description="Which LLM provider to use"
28
  )
29
  openai_model: str = Field(default="gpt-4o", description="OpenAI model name")
30
- anthropic_model: str = Field(
31
- default="claude-3-5-sonnet-20241022", description="Anthropic model"
32
- )
33
 
34
  # PubMed Configuration
35
  ncbi_api_key: str | None = Field(
 
27
  default="openai", description="Which LLM provider to use"
28
  )
29
  openai_model: str = Field(default="gpt-4o", description="OpenAI model name")
30
+ anthropic_model: str = Field(default="claude-sonnet-4-5", description="Anthropic model")
 
 
31
 
32
  # PubMed Configuration
33
  ncbi_api_key: str | None = Field(
tests/unit/agent_factory/test_judges.py CHANGED
@@ -34,7 +34,7 @@ class TestJudgeHandler:
34
 
35
  # Mock the PydanticAI agent
36
  mock_result = MagicMock()
37
- mock_result.data = mock_assessment
38
 
39
  with (
40
  patch("src.agent_factory.judges.get_model") as mock_get_model,
@@ -88,7 +88,7 @@ class TestJudgeHandler:
88
  )
89
 
90
  mock_result = MagicMock()
91
- mock_result.data = mock_assessment
92
 
93
  with (
94
  patch("src.agent_factory.judges.get_model") as mock_get_model,
 
34
 
35
  # Mock the PydanticAI agent
36
  mock_result = MagicMock()
37
+ mock_result.output = mock_assessment
38
 
39
  with (
40
  patch("src.agent_factory.judges.get_model") as mock_get_model,
 
88
  )
89
 
90
  mock_result = MagicMock()
91
+ mock_result.output = mock_assessment
92
 
93
  with (
94
  patch("src.agent_factory.judges.get_model") as mock_get_model,
tests/unit/utils/test_config.py CHANGED
@@ -40,7 +40,7 @@ class TestSettings:
40
  def test_get_api_key_openai_missing_raises(self):
41
  """get_api_key should raise ConfigurationError when OpenAI key is not set."""
42
  with patch.dict(os.environ, {"LLM_PROVIDER": "openai"}, clear=True):
43
- settings = Settings()
44
  with pytest.raises(ConfigurationError, match="OPENAI_API_KEY not set"):
45
  settings.get_api_key()
46
 
@@ -55,6 +55,6 @@ class TestSettings:
55
  def test_get_api_key_anthropic_missing_raises(self):
56
  """get_api_key should raise ConfigurationError when Anthropic key is not set."""
57
  with patch.dict(os.environ, {"LLM_PROVIDER": "anthropic"}, clear=True):
58
- settings = Settings()
59
  with pytest.raises(ConfigurationError, match="ANTHROPIC_API_KEY not set"):
60
  settings.get_api_key()
 
40
  def test_get_api_key_openai_missing_raises(self):
41
  """get_api_key should raise ConfigurationError when OpenAI key is not set."""
42
  with patch.dict(os.environ, {"LLM_PROVIDER": "openai"}, clear=True):
43
+ settings = Settings(_env_file=None)
44
  with pytest.raises(ConfigurationError, match="OPENAI_API_KEY not set"):
45
  settings.get_api_key()
46
 
 
55
  def test_get_api_key_anthropic_missing_raises(self):
56
  """get_api_key should raise ConfigurationError when Anthropic key is not set."""
57
  with patch.dict(os.environ, {"LLM_PROVIDER": "anthropic"}, clear=True):
58
+ settings = Settings(_env_file=None)
59
  with pytest.raises(ConfigurationError, match="ANTHROPIC_API_KEY not set"):
60
  settings.get_api_key()