Spaces:
Running
Running
Commit
·
4732667
1
Parent(s):
35ebd09
fix: pydantic-ai v1 compatibility and Anthropic model update
Browse files- Use result.output instead of deprecated result.data (pydantic-ai v1)
- Update Anthropic model to claude-sonnet-4-5
- Remove redundant cast in judges.py
- Add orchestrator_demo example for end-to-end testing
- Fix NCBI_API_KEY placeholder handling in pubmed.py
- Update tests to use .output instead of .data
- examples/README.md +18 -13
- examples/orchestrator_demo/run_agent.py +82 -0
- src/agent_factory/judges.py +2 -2
- src/tools/pubmed.py +3 -0
- src/utils/config.py +1 -3
- tests/unit/agent_factory/test_judges.py +2 -2
- tests/unit/utils/test_config.py +2 -2
examples/README.md
CHANGED
|
@@ -2,22 +2,27 @@
|
|
| 2 |
|
| 3 |
Demo scripts for DeepCritical functionality.
|
| 4 |
|
| 5 |
-
##
|
| 6 |
|
| 7 |
-
Demonstrates
|
| 8 |
|
| 9 |
```bash
|
| 10 |
-
|
| 11 |
-
uv run python examples/search_demo/run_search.py
|
| 12 |
-
|
| 13 |
-
# Run with custom query
|
| 14 |
-
uv run python examples/search_demo/run_search.py "aspirin heart disease"
|
| 15 |
```
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
Demo scripts for DeepCritical functionality.
|
| 4 |
|
| 5 |
+
## 1. Search Demo (Phase 2)
|
| 6 |
|
| 7 |
+
Demonstrates parallel search across PubMed and Web. **No API keys required.**
|
| 8 |
|
| 9 |
```bash
|
| 10 |
+
uv run python examples/search_demo/run_search.py "metformin cancer"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
```
|
| 12 |
|
| 13 |
+
## 2. Agent Demo (Phase 4)
|
| 14 |
+
|
| 15 |
+
Demonstrates the full search-judge-synthesize loop.
|
| 16 |
+
|
| 17 |
+
**Option A: Mock Mode (No Keys)**
|
| 18 |
+
Test the logic/mechanics without an LLM.
|
| 19 |
+
```bash
|
| 20 |
+
uv run python examples/orchestrator_demo/run_agent.py "metformin cancer" --mock
|
| 21 |
+
```
|
| 22 |
|
| 23 |
+
**Option B: Real Mode (Requires Keys)**
|
| 24 |
+
Uses the real LLM Judge to evaluate evidence.
|
| 25 |
+
Requires `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` in `.env`.
|
| 26 |
+
```bash
|
| 27 |
+
uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
|
| 28 |
+
```
|
examples/orchestrator_demo/run_agent.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Demo: Full DeepCritical Agent Loop (Search + Judge + Orchestrator).
|
| 4 |
+
|
| 5 |
+
This script demonstrates Phase 4 functionality:
|
| 6 |
+
- Iterative Search (PubMed + Web)
|
| 7 |
+
- Evidence Evaluation (Judge Agent)
|
| 8 |
+
- Orchestration Loop
|
| 9 |
+
- Final Synthesis
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
# Run with Mock Judge (No API Key needed)
|
| 13 |
+
uv run python examples/orchestrator_demo/run_agent.py "metformin cancer" --mock
|
| 14 |
+
|
| 15 |
+
# Run with Real Judge (Requires OPENAI_API_KEY or ANTHROPIC_API_KEY)
|
| 16 |
+
uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import argparse
|
| 20 |
+
import asyncio
|
| 21 |
+
import os
|
| 22 |
+
import sys
|
| 23 |
+
|
| 24 |
+
from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
|
| 25 |
+
from src.orchestrator import Orchestrator
|
| 26 |
+
from src.tools.pubmed import PubMedTool
|
| 27 |
+
from src.tools.search_handler import SearchHandler
|
| 28 |
+
from src.tools.websearch import WebTool
|
| 29 |
+
from src.utils.models import OrchestratorConfig
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
async def main() -> None:
|
| 33 |
+
"""Run the agent demo."""
|
| 34 |
+
parser = argparse.ArgumentParser(description="Run DeepCritical Agent CLI")
|
| 35 |
+
parser.add_argument("query", help="Research query (e.g., 'metformin cancer')")
|
| 36 |
+
parser.add_argument("--mock", action="store_true", help="Use Mock Judge (no API key needed)")
|
| 37 |
+
parser.add_argument("--iterations", type=int, default=3, help="Max iterations")
|
| 38 |
+
args = parser.parse_args()
|
| 39 |
+
|
| 40 |
+
# Check for keys if not mocking
|
| 41 |
+
if not args.mock and not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
|
| 42 |
+
print("Error: No API key found. Set OPENAI_API_KEY or ANTHROPIC_API_KEY, or use --mock.")
|
| 43 |
+
sys.exit(1)
|
| 44 |
+
|
| 45 |
+
print(f"\n{'='*60}")
|
| 46 |
+
print("DeepCritical Agent Demo")
|
| 47 |
+
print(f"Query: {args.query}")
|
| 48 |
+
print(f"Mode: {'MOCK' if args.mock else 'REAL (LLM)'}")
|
| 49 |
+
print(f"{ '='*60}\n")
|
| 50 |
+
|
| 51 |
+
# 1. Setup Search Tools
|
| 52 |
+
search_handler = SearchHandler(tools=[PubMedTool(), WebTool()], timeout=30.0)
|
| 53 |
+
|
| 54 |
+
# 2. Setup Judge
|
| 55 |
+
judge_handler: JudgeHandler | MockJudgeHandler
|
| 56 |
+
if args.mock:
|
| 57 |
+
judge_handler = MockJudgeHandler()
|
| 58 |
+
else:
|
| 59 |
+
judge_handler = JudgeHandler()
|
| 60 |
+
|
| 61 |
+
# 3. Setup Orchestrator
|
| 62 |
+
config = OrchestratorConfig(max_iterations=args.iterations)
|
| 63 |
+
orchestrator = Orchestrator(
|
| 64 |
+
search_handler=search_handler, judge_handler=judge_handler, config=config
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# 4. Run Loop
|
| 68 |
+
try:
|
| 69 |
+
async for event in orchestrator.run(args.query):
|
| 70 |
+
# Print event with icon
|
| 71 |
+
print(event.to_markdown().replace("**", ""))
|
| 72 |
+
|
| 73 |
+
# If we got data, print a snippet
|
| 74 |
+
if event.type == "search_complete" and event.data:
|
| 75 |
+
print(f" -> Found {event.data.get('new_count', 0)} new items")
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"\n❌ Error: {e}")
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
asyncio.run(main())
|
src/agent_factory/judges.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""Judge handler for evidence assessment using PydanticAI."""
|
| 2 |
|
| 3 |
-
from typing import Any
|
| 4 |
|
| 5 |
import structlog
|
| 6 |
from pydantic_ai import Agent
|
|
@@ -86,7 +86,7 @@ class JudgeHandler:
|
|
| 86 |
try:
|
| 87 |
# Run the agent with structured output
|
| 88 |
result = await self.agent.run(user_prompt)
|
| 89 |
-
assessment =
|
| 90 |
|
| 91 |
logger.info(
|
| 92 |
"Assessment complete",
|
|
|
|
| 1 |
"""Judge handler for evidence assessment using PydanticAI."""
|
| 2 |
|
| 3 |
+
from typing import Any
|
| 4 |
|
| 5 |
import structlog
|
| 6 |
from pydantic_ai import Agent
|
|
|
|
| 86 |
try:
|
| 87 |
# Run the agent with structured output
|
| 88 |
result = await self.agent.run(user_prompt)
|
| 89 |
+
assessment = result.output
|
| 90 |
|
| 91 |
logger.info(
|
| 92 |
"Assessment complete",
|
src/tools/pubmed.py
CHANGED
|
@@ -21,6 +21,9 @@ class PubMedTool:
|
|
| 21 |
|
| 22 |
def __init__(self, api_key: str | None = None) -> None:
|
| 23 |
self.api_key = api_key or settings.ncbi_api_key
|
|
|
|
|
|
|
|
|
|
| 24 |
self._last_request_time = 0.0
|
| 25 |
|
| 26 |
@property
|
|
|
|
| 21 |
|
| 22 |
def __init__(self, api_key: str | None = None) -> None:
|
| 23 |
self.api_key = api_key or settings.ncbi_api_key
|
| 24 |
+
# Ignore placeholder values from .env.example
|
| 25 |
+
if self.api_key and "your-ncbi-key-here" in self.api_key:
|
| 26 |
+
self.api_key = None
|
| 27 |
self._last_request_time = 0.0
|
| 28 |
|
| 29 |
@property
|
src/utils/config.py
CHANGED
|
@@ -27,9 +27,7 @@ class Settings(BaseSettings):
|
|
| 27 |
default="openai", description="Which LLM provider to use"
|
| 28 |
)
|
| 29 |
openai_model: str = Field(default="gpt-4o", description="OpenAI model name")
|
| 30 |
-
anthropic_model: str = Field(
|
| 31 |
-
default="claude-3-5-sonnet-20241022", description="Anthropic model"
|
| 32 |
-
)
|
| 33 |
|
| 34 |
# PubMed Configuration
|
| 35 |
ncbi_api_key: str | None = Field(
|
|
|
|
| 27 |
default="openai", description="Which LLM provider to use"
|
| 28 |
)
|
| 29 |
openai_model: str = Field(default="gpt-4o", description="OpenAI model name")
|
| 30 |
+
anthropic_model: str = Field(default="claude-sonnet-4-5", description="Anthropic model")
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# PubMed Configuration
|
| 33 |
ncbi_api_key: str | None = Field(
|
tests/unit/agent_factory/test_judges.py
CHANGED
|
@@ -34,7 +34,7 @@ class TestJudgeHandler:
|
|
| 34 |
|
| 35 |
# Mock the PydanticAI agent
|
| 36 |
mock_result = MagicMock()
|
| 37 |
-
mock_result.
|
| 38 |
|
| 39 |
with (
|
| 40 |
patch("src.agent_factory.judges.get_model") as mock_get_model,
|
|
@@ -88,7 +88,7 @@ class TestJudgeHandler:
|
|
| 88 |
)
|
| 89 |
|
| 90 |
mock_result = MagicMock()
|
| 91 |
-
mock_result.
|
| 92 |
|
| 93 |
with (
|
| 94 |
patch("src.agent_factory.judges.get_model") as mock_get_model,
|
|
|
|
| 34 |
|
| 35 |
# Mock the PydanticAI agent
|
| 36 |
mock_result = MagicMock()
|
| 37 |
+
mock_result.output = mock_assessment
|
| 38 |
|
| 39 |
with (
|
| 40 |
patch("src.agent_factory.judges.get_model") as mock_get_model,
|
|
|
|
| 88 |
)
|
| 89 |
|
| 90 |
mock_result = MagicMock()
|
| 91 |
+
mock_result.output = mock_assessment
|
| 92 |
|
| 93 |
with (
|
| 94 |
patch("src.agent_factory.judges.get_model") as mock_get_model,
|
tests/unit/utils/test_config.py
CHANGED
|
@@ -40,7 +40,7 @@ class TestSettings:
|
|
| 40 |
def test_get_api_key_openai_missing_raises(self):
|
| 41 |
"""get_api_key should raise ConfigurationError when OpenAI key is not set."""
|
| 42 |
with patch.dict(os.environ, {"LLM_PROVIDER": "openai"}, clear=True):
|
| 43 |
-
settings = Settings()
|
| 44 |
with pytest.raises(ConfigurationError, match="OPENAI_API_KEY not set"):
|
| 45 |
settings.get_api_key()
|
| 46 |
|
|
@@ -55,6 +55,6 @@ class TestSettings:
|
|
| 55 |
def test_get_api_key_anthropic_missing_raises(self):
|
| 56 |
"""get_api_key should raise ConfigurationError when Anthropic key is not set."""
|
| 57 |
with patch.dict(os.environ, {"LLM_PROVIDER": "anthropic"}, clear=True):
|
| 58 |
-
settings = Settings()
|
| 59 |
with pytest.raises(ConfigurationError, match="ANTHROPIC_API_KEY not set"):
|
| 60 |
settings.get_api_key()
|
|
|
|
| 40 |
def test_get_api_key_openai_missing_raises(self):
|
| 41 |
"""get_api_key should raise ConfigurationError when OpenAI key is not set."""
|
| 42 |
with patch.dict(os.environ, {"LLM_PROVIDER": "openai"}, clear=True):
|
| 43 |
+
settings = Settings(_env_file=None)
|
| 44 |
with pytest.raises(ConfigurationError, match="OPENAI_API_KEY not set"):
|
| 45 |
settings.get_api_key()
|
| 46 |
|
|
|
|
| 55 |
def test_get_api_key_anthropic_missing_raises(self):
|
| 56 |
"""get_api_key should raise ConfigurationError when Anthropic key is not set."""
|
| 57 |
with patch.dict(os.environ, {"LLM_PROVIDER": "anthropic"}, clear=True):
|
| 58 |
+
settings = Settings(_env_file=None)
|
| 59 |
with pytest.raises(ConfigurationError, match="ANTHROPIC_API_KEY not set"):
|
| 60 |
settings.get_api_key()
|