Spaces:
Running
Running
Commit
·
9760706
1
Parent(s):
95d2e29
fix: add type ignore comment for Agent instantiation in JudgeHandler
Browse filesUpdated the Agent instantiation in JudgeHandler to include a type ignore comment, addressing type-checking issues while maintaining functionality.
docs/implementation/05_phase_magentic.md
CHANGED
|
@@ -112,12 +112,13 @@ magentic = [
|
|
| 112 |
|
| 113 |
### 4.2 Agent Wrappers (`src/agents/search_agent.py`)
|
| 114 |
|
| 115 |
-
Wrap `SearchHandler` as an `AgentProtocol
|
|
|
|
| 116 |
|
| 117 |
```python
|
| 118 |
"""Search agent wrapper for Magentic integration."""
|
| 119 |
-
from typing import Any
|
| 120 |
-
from agent_framework import AgentProtocol, AgentRunResponse, ChatMessage, Role
|
| 121 |
|
| 122 |
from src.tools.search_handler import SearchHandler
|
| 123 |
from src.utils.models import SearchResult
|
|
@@ -130,6 +131,7 @@ class SearchAgent:
|
|
| 130 |
self._handler = search_handler
|
| 131 |
self._id = "search-agent"
|
| 132 |
self._name = "SearchAgent"
|
|
|
|
| 133 |
|
| 134 |
@property
|
| 135 |
def id(self) -> str:
|
|
@@ -145,24 +147,29 @@ class SearchAgent:
|
|
| 145 |
|
| 146 |
@property
|
| 147 |
def description(self) -> str | None:
|
| 148 |
-
return
|
| 149 |
|
| 150 |
async def run(
|
| 151 |
self,
|
| 152 |
-
messages: list[ChatMessage] | None = None,
|
| 153 |
*,
|
| 154 |
-
thread:
|
| 155 |
**kwargs: Any,
|
| 156 |
) -> AgentRunResponse:
|
| 157 |
"""Execute search based on the last user message."""
|
| 158 |
# Extract query from messages
|
| 159 |
query = ""
|
| 160 |
-
if messages:
|
| 161 |
for msg in reversed(messages):
|
| 162 |
-
if msg.role == Role.USER and msg.text:
|
| 163 |
query = msg.text
|
| 164 |
break
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
if not query:
|
| 167 |
return AgentRunResponse(
|
| 168 |
messages=[ChatMessage(role=Role.ASSISTANT, text="No query provided")],
|
|
@@ -183,24 +190,35 @@ class SearchAgent:
|
|
| 183 |
return AgentRunResponse(
|
| 184 |
messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
|
| 185 |
response_id=f"search-{result.total_found}",
|
| 186 |
-
|
| 187 |
)
|
| 188 |
|
| 189 |
-
def run_stream(
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
```
|
| 197 |
|
| 198 |
### 4.3 Judge Agent Wrapper (`src/agents/judge_agent.py`)
|
| 199 |
|
| 200 |
```python
|
| 201 |
"""Judge agent wrapper for Magentic integration."""
|
| 202 |
-
from typing import Any, List
|
| 203 |
-
from agent_framework import AgentProtocol, AgentRunResponse, ChatMessage, Role
|
| 204 |
|
| 205 |
from src.agent_factory.judges import JudgeHandler
|
| 206 |
from src.utils.models import Evidence, JudgeAssessment
|
|
@@ -214,6 +232,7 @@ class JudgeAgent:
|
|
| 214 |
self._evidence_store = evidence_store # Shared state for evidence
|
| 215 |
self._id = "judge-agent"
|
| 216 |
self._name = "JudgeAgent"
|
|
|
|
| 217 |
|
| 218 |
@property
|
| 219 |
def id(self) -> str:
|
|
@@ -229,23 +248,28 @@ class JudgeAgent:
|
|
| 229 |
|
| 230 |
@property
|
| 231 |
def description(self) -> str | None:
|
| 232 |
-
return
|
| 233 |
|
| 234 |
async def run(
|
| 235 |
self,
|
| 236 |
-
messages: list[ChatMessage] | None = None,
|
| 237 |
*,
|
| 238 |
-
thread:
|
| 239 |
**kwargs: Any,
|
| 240 |
) -> AgentRunResponse:
|
| 241 |
"""Assess evidence quality."""
|
| 242 |
# Extract original question from messages
|
| 243 |
question = ""
|
| 244 |
-
if messages:
|
| 245 |
-
for msg in messages:
|
| 246 |
-
if msg.role == Role.USER and msg.text:
|
| 247 |
question = msg.text
|
| 248 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
# Get evidence from shared store
|
| 251 |
evidence = self._evidence_store.get("current", [])
|
|
@@ -276,16 +300,26 @@ class JudgeAgent:
|
|
| 276 |
return AgentRunResponse(
|
| 277 |
messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
|
| 278 |
response_id=f"judge-{assessment.recommendation}",
|
| 279 |
-
|
| 280 |
)
|
| 281 |
|
| 282 |
-
def run_stream(
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
```
|
| 290 |
|
| 291 |
### 4.4 Magentic Orchestrator (`src/orchestrator_magentic.py`)
|
|
@@ -300,6 +334,7 @@ from agent_framework import (
|
|
| 300 |
MagenticFinalResultEvent,
|
| 301 |
MagenticAgentMessageEvent,
|
| 302 |
MagenticOrchestratorMessageEvent,
|
|
|
|
| 303 |
WorkflowOutputEvent,
|
| 304 |
)
|
| 305 |
from agent_framework.openai import OpenAIChatClient
|
|
@@ -350,6 +385,7 @@ class MagenticOrchestrator:
|
|
| 350 |
judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
|
| 351 |
|
| 352 |
# Build Magentic workflow
|
|
|
|
| 353 |
workflow = (
|
| 354 |
MagenticBuilder()
|
| 355 |
.participants(
|
|
@@ -383,32 +419,40 @@ Focus on finding:
|
|
| 383 |
|
| 384 |
iteration = 0
|
| 385 |
try:
|
|
|
|
| 386 |
async for event in workflow.run_stream(task):
|
| 387 |
if isinstance(event, MagenticOrchestratorMessageEvent):
|
|
|
|
|
|
|
| 388 |
yield AgentEvent(
|
| 389 |
type="judging",
|
| 390 |
-
message=f"Manager
|
| 391 |
iteration=iteration,
|
| 392 |
)
|
| 393 |
|
| 394 |
elif isinstance(event, MagenticAgentMessageEvent):
|
|
|
|
| 395 |
iteration += 1
|
| 396 |
agent_name = event.agent_id or "unknown"
|
|
|
|
| 397 |
|
| 398 |
if "search" in agent_name.lower():
|
|
|
|
|
|
|
| 399 |
yield AgentEvent(
|
| 400 |
type="search_complete",
|
| 401 |
-
message=f"Search agent
|
| 402 |
iteration=iteration,
|
| 403 |
)
|
| 404 |
elif "judge" in agent_name.lower():
|
| 405 |
yield AgentEvent(
|
| 406 |
type="judge_complete",
|
| 407 |
-
message=f"Judge agent
|
| 408 |
iteration=iteration,
|
| 409 |
)
|
| 410 |
|
| 411 |
elif isinstance(event, MagenticFinalResultEvent):
|
|
|
|
| 412 |
final_text = event.message.text if event.message else "No result"
|
| 413 |
yield AgentEvent(
|
| 414 |
type="complete",
|
|
@@ -417,7 +461,19 @@ Focus on finding:
|
|
| 417 |
iteration=iteration,
|
| 418 |
)
|
| 419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
elif isinstance(event, WorkflowOutputEvent):
|
|
|
|
| 421 |
if event.data:
|
| 422 |
yield AgentEvent(
|
| 423 |
type="complete",
|
|
|
|
| 112 |
|
| 113 |
### 4.2 Agent Wrappers (`src/agents/search_agent.py`)
|
| 114 |
|
| 115 |
+
Wrap `SearchHandler` as an `AgentProtocol`.
|
| 116 |
+
**Note**: `AgentProtocol` requires `id`, `name`, `display_name`, `description`, `run`, `run_stream`, and `get_new_thread`.
|
| 117 |
|
| 118 |
```python
|
| 119 |
"""Search agent wrapper for Magentic integration."""
|
| 120 |
+
from typing import Any, AsyncIterable
|
| 121 |
+
from agent_framework import AgentProtocol, AgentRunResponse, AgentRunResponseUpdate, ChatMessage, Role, AgentThread
|
| 122 |
|
| 123 |
from src.tools.search_handler import SearchHandler
|
| 124 |
from src.utils.models import SearchResult
|
|
|
|
| 131 |
self._handler = search_handler
|
| 132 |
self._id = "search-agent"
|
| 133 |
self._name = "SearchAgent"
|
| 134 |
+
self._description = "Searches PubMed and web for drug repurposing evidence"
|
| 135 |
|
| 136 |
@property
|
| 137 |
def id(self) -> str:
|
|
|
|
| 147 |
|
| 148 |
@property
|
| 149 |
def description(self) -> str | None:
|
| 150 |
+
return self._description
|
| 151 |
|
| 152 |
async def run(
|
| 153 |
self,
|
| 154 |
+
messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
|
| 155 |
*,
|
| 156 |
+
thread: AgentThread | None = None,
|
| 157 |
**kwargs: Any,
|
| 158 |
) -> AgentRunResponse:
|
| 159 |
"""Execute search based on the last user message."""
|
| 160 |
# Extract query from messages
|
| 161 |
query = ""
|
| 162 |
+
if isinstance(messages, list):
|
| 163 |
for msg in reversed(messages):
|
| 164 |
+
if isinstance(msg, ChatMessage) and msg.role == Role.USER and msg.text:
|
| 165 |
query = msg.text
|
| 166 |
break
|
| 167 |
+
elif isinstance(msg, str):
|
| 168 |
+
query = msg
|
| 169 |
+
break
|
| 170 |
+
elif isinstance(messages, str):
|
| 171 |
+
query = messages
|
| 172 |
+
|
| 173 |
if not query:
|
| 174 |
return AgentRunResponse(
|
| 175 |
messages=[ChatMessage(role=Role.ASSISTANT, text="No query provided")],
|
|
|
|
| 190 |
return AgentRunResponse(
|
| 191 |
messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
|
| 192 |
response_id=f"search-{result.total_found}",
|
| 193 |
+
additional_properties={"evidence": [e.model_dump() for e in result.evidence]},
|
| 194 |
)
|
| 195 |
|
| 196 |
+
async def run_stream(
|
| 197 |
+
self,
|
| 198 |
+
messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
|
| 199 |
+
*,
|
| 200 |
+
thread: AgentThread | None = None,
|
| 201 |
+
**kwargs: Any,
|
| 202 |
+
) -> AsyncIterable[AgentRunResponseUpdate]:
|
| 203 |
+
"""Streaming wrapper for search (search itself isn't streaming)."""
|
| 204 |
+
result = await self.run(messages, thread=thread, **kwargs)
|
| 205 |
+
# Yield single update with full result
|
| 206 |
+
yield AgentRunResponseUpdate(
|
| 207 |
+
messages=result.messages,
|
| 208 |
+
response_id=result.response_id
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
def get_new_thread(self, **kwargs: Any) -> AgentThread:
|
| 212 |
+
"""Create a new thread."""
|
| 213 |
+
return AgentThread(**kwargs)
|
| 214 |
```
|
| 215 |
|
| 216 |
### 4.3 Judge Agent Wrapper (`src/agents/judge_agent.py`)
|
| 217 |
|
| 218 |
```python
|
| 219 |
"""Judge agent wrapper for Magentic integration."""
|
| 220 |
+
from typing import Any, List, AsyncIterable
|
| 221 |
+
from agent_framework import AgentProtocol, AgentRunResponse, AgentRunResponseUpdate, ChatMessage, Role, AgentThread
|
| 222 |
|
| 223 |
from src.agent_factory.judges import JudgeHandler
|
| 224 |
from src.utils.models import Evidence, JudgeAssessment
|
|
|
|
| 232 |
self._evidence_store = evidence_store # Shared state for evidence
|
| 233 |
self._id = "judge-agent"
|
| 234 |
self._name = "JudgeAgent"
|
| 235 |
+
self._description = "Evaluates evidence quality and determines if sufficient for synthesis"
|
| 236 |
|
| 237 |
@property
|
| 238 |
def id(self) -> str:
|
|
|
|
| 248 |
|
| 249 |
@property
|
| 250 |
def description(self) -> str | None:
|
| 251 |
+
return self._description
|
| 252 |
|
| 253 |
async def run(
|
| 254 |
self,
|
| 255 |
+
messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
|
| 256 |
*,
|
| 257 |
+
thread: AgentThread | None = None,
|
| 258 |
**kwargs: Any,
|
| 259 |
) -> AgentRunResponse:
|
| 260 |
"""Assess evidence quality."""
|
| 261 |
# Extract original question from messages
|
| 262 |
question = ""
|
| 263 |
+
if isinstance(messages, list):
|
| 264 |
+
for msg in reversed(messages):
|
| 265 |
+
if isinstance(msg, ChatMessage) and msg.role == Role.USER and msg.text:
|
| 266 |
question = msg.text
|
| 267 |
break
|
| 268 |
+
elif isinstance(msg, str):
|
| 269 |
+
question = msg
|
| 270 |
+
break
|
| 271 |
+
elif isinstance(messages, str):
|
| 272 |
+
question = messages
|
| 273 |
|
| 274 |
# Get evidence from shared store
|
| 275 |
evidence = self._evidence_store.get("current", [])
|
|
|
|
| 300 |
return AgentRunResponse(
|
| 301 |
messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
|
| 302 |
response_id=f"judge-{assessment.recommendation}",
|
| 303 |
+
additional_properties={"assessment": assessment.model_dump()},
|
| 304 |
)
|
| 305 |
|
| 306 |
+
async def run_stream(
|
| 307 |
+
self,
|
| 308 |
+
messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
|
| 309 |
+
*,
|
| 310 |
+
thread: AgentThread | None = None,
|
| 311 |
+
**kwargs: Any,
|
| 312 |
+
) -> AsyncIterable[AgentRunResponseUpdate]:
|
| 313 |
+
"""Streaming wrapper for judge."""
|
| 314 |
+
result = await self.run(messages, thread=thread, **kwargs)
|
| 315 |
+
yield AgentRunResponseUpdate(
|
| 316 |
+
messages=result.messages,
|
| 317 |
+
response_id=result.response_id
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
def get_new_thread(self, **kwargs: Any) -> AgentThread:
|
| 321 |
+
"""Create a new thread."""
|
| 322 |
+
return AgentThread(**kwargs)
|
| 323 |
```
|
| 324 |
|
| 325 |
### 4.4 Magentic Orchestrator (`src/orchestrator_magentic.py`)
|
|
|
|
| 334 |
MagenticFinalResultEvent,
|
| 335 |
MagenticAgentMessageEvent,
|
| 336 |
MagenticOrchestratorMessageEvent,
|
| 337 |
+
MagenticAgentDeltaEvent,
|
| 338 |
WorkflowOutputEvent,
|
| 339 |
)
|
| 340 |
from agent_framework.openai import OpenAIChatClient
|
|
|
|
| 385 |
judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
|
| 386 |
|
| 387 |
# Build Magentic workflow
|
| 388 |
+
# Note: MagenticBuilder.participants takes named arguments for agent instances
|
| 389 |
workflow = (
|
| 390 |
MagenticBuilder()
|
| 391 |
.participants(
|
|
|
|
| 419 |
|
| 420 |
iteration = 0
|
| 421 |
try:
|
| 422 |
+
# workflow.run_stream returns an async generator of workflow events
|
| 423 |
async for event in workflow.run_stream(task):
|
| 424 |
if isinstance(event, MagenticOrchestratorMessageEvent):
|
| 425 |
+
# Manager events (planning, instruction, ledger)
|
| 426 |
+
message_text = event.message.text if event.message else ""
|
| 427 |
yield AgentEvent(
|
| 428 |
type="judging",
|
| 429 |
+
message=f"Manager ({event.kind}): {message_text[:100]}...",
|
| 430 |
iteration=iteration,
|
| 431 |
)
|
| 432 |
|
| 433 |
elif isinstance(event, MagenticAgentMessageEvent):
|
| 434 |
+
# Complete agent response
|
| 435 |
iteration += 1
|
| 436 |
agent_name = event.agent_id or "unknown"
|
| 437 |
+
msg_text = event.message.text if event.message else ""
|
| 438 |
|
| 439 |
if "search" in agent_name.lower():
|
| 440 |
+
# Check if we found evidence (based on SearchAgent logic)
|
| 441 |
+
# In a real implementation we might extract metadata
|
| 442 |
yield AgentEvent(
|
| 443 |
type="search_complete",
|
| 444 |
+
message=f"Search agent: {msg_text[:100]}...",
|
| 445 |
iteration=iteration,
|
| 446 |
)
|
| 447 |
elif "judge" in agent_name.lower():
|
| 448 |
yield AgentEvent(
|
| 449 |
type="judge_complete",
|
| 450 |
+
message=f"Judge agent: {msg_text[:100]}...",
|
| 451 |
iteration=iteration,
|
| 452 |
)
|
| 453 |
|
| 454 |
elif isinstance(event, MagenticFinalResultEvent):
|
| 455 |
+
# Final workflow result
|
| 456 |
final_text = event.message.text if event.message else "No result"
|
| 457 |
yield AgentEvent(
|
| 458 |
type="complete",
|
|
|
|
| 461 |
iteration=iteration,
|
| 462 |
)
|
| 463 |
|
| 464 |
+
elif isinstance(event, MagenticAgentDeltaEvent):
|
| 465 |
+
# Streaming token chunks from agents (optional "typing" effect)
|
| 466 |
+
# Only emit if we have actual text content
|
| 467 |
+
if event.text:
|
| 468 |
+
yield AgentEvent(
|
| 469 |
+
type="streaming",
|
| 470 |
+
message=event.text,
|
| 471 |
+
data={"agent_id": event.agent_id},
|
| 472 |
+
iteration=iteration,
|
| 473 |
+
)
|
| 474 |
+
|
| 475 |
elif isinstance(event, WorkflowOutputEvent):
|
| 476 |
+
# Alternative final output event
|
| 477 |
if event.data:
|
| 478 |
yield AgentEvent(
|
| 479 |
type="complete",
|
docs/implementation/roadmap.md
CHANGED
|
@@ -119,10 +119,10 @@ tests/
|
|
| 119 |
|
| 120 |
*Goal: Upgrade orchestrator to use Microsoft Agent Framework patterns.*
|
| 121 |
|
| 122 |
-
- [ ] Wrap SearchHandler as `AgentProtocol` (SearchAgent)
|
| 123 |
-
- [ ] Wrap JudgeHandler as `AgentProtocol` (JudgeAgent)
|
| 124 |
-
- [ ] Implement `MagenticOrchestrator` using `MagenticBuilder
|
| 125 |
-
- [ ] Create factory pattern for switching implementations
|
| 126 |
- **Deliverable**: Same API, better multi-agent orchestration engine.
|
| 127 |
|
| 128 |
**NOTE**: Only implement Phase 5 if time permits after MVP is shipped.
|
|
|
|
| 119 |
|
| 120 |
*Goal: Upgrade orchestrator to use Microsoft Agent Framework patterns.*
|
| 121 |
|
| 122 |
+
- [ ] Wrap SearchHandler as `AgentProtocol` (SearchAgent) with strict protocol compliance.
|
| 123 |
+
- [ ] Wrap JudgeHandler as `AgentProtocol` (JudgeAgent) with strict protocol compliance.
|
| 124 |
+
- [ ] Implement `MagenticOrchestrator` using `MagenticBuilder`.
|
| 125 |
+
- [ ] Create factory pattern for switching implementations.
|
| 126 |
- **Deliverable**: Same API, better multi-agent orchestration engine.
|
| 127 |
|
| 128 |
**NOTE**: Only implement Phase 5 if time permits after MVP is shipped.
|
src/agent_factory/judges.py
CHANGED
|
@@ -24,6 +24,10 @@ def get_model() -> Any:
|
|
| 24 |
|
| 25 |
if provider == "anthropic":
|
| 26 |
return AnthropicModel(settings.anthropic_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
return OpenAIModel(settings.openai_model)
|
| 28 |
|
| 29 |
|
|
@@ -42,9 +46,9 @@ class JudgeHandler:
|
|
| 42 |
model: Optional PydanticAI model. If None, uses config default.
|
| 43 |
"""
|
| 44 |
self.model = model or get_model()
|
| 45 |
-
self.agent = Agent(
|
| 46 |
model=self.model,
|
| 47 |
-
|
| 48 |
system_prompt=SYSTEM_PROMPT,
|
| 49 |
retries=3,
|
| 50 |
)
|
|
@@ -82,7 +86,7 @@ class JudgeHandler:
|
|
| 82 |
try:
|
| 83 |
# Run the agent with structured output
|
| 84 |
result = await self.agent.run(user_prompt)
|
| 85 |
-
assessment = cast(JudgeAssessment, result.data)
|
| 86 |
|
| 87 |
logger.info(
|
| 88 |
"Assessment complete",
|
|
|
|
| 24 |
|
| 25 |
if provider == "anthropic":
|
| 26 |
return AnthropicModel(settings.anthropic_model)
|
| 27 |
+
|
| 28 |
+
if provider != "openai":
|
| 29 |
+
logger.warning("Unknown LLM provider, defaulting to OpenAI", provider=provider)
|
| 30 |
+
|
| 31 |
return OpenAIModel(settings.openai_model)
|
| 32 |
|
| 33 |
|
|
|
|
| 46 |
model: Optional PydanticAI model. If None, uses config default.
|
| 47 |
"""
|
| 48 |
self.model = model or get_model()
|
| 49 |
+
self.agent = Agent(
|
| 50 |
model=self.model,
|
| 51 |
+
output_type=JudgeAssessment,
|
| 52 |
system_prompt=SYSTEM_PROMPT,
|
| 53 |
retries=3,
|
| 54 |
)
|
|
|
|
| 86 |
try:
|
| 87 |
# Run the agent with structured output
|
| 88 |
result = await self.agent.run(user_prompt)
|
| 89 |
+
assessment = cast(JudgeAssessment, result.data) # type: ignore[attr-defined]
|
| 90 |
|
| 91 |
logger.info(
|
| 92 |
"Assessment complete",
|
src/prompts/judge.py
CHANGED
|
@@ -45,22 +45,21 @@ def format_user_prompt(question: str, evidence: list[Evidence]) -> str:
|
|
| 45 |
Formatted prompt string
|
| 46 |
"""
|
| 47 |
max_content_len = 1500
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
f"### Evidence {i + 1}\n"
|
| 51 |
f"**Source**: {e.citation.source.upper()} - {e.citation.title}\n"
|
| 52 |
f"**URL**: {e.citation.url}\n"
|
| 53 |
f"**Date**: {e.citation.date}\n"
|
| 54 |
-
f"**Content**:\n{
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
f"**URL**: {e.citation.url}\n"
|
| 59 |
-
f"**Date**: {e.citation.date}\n"
|
| 60 |
-
f"**Content**:\n{e.content}"
|
| 61 |
-
for i, e in enumerate(evidence)
|
| 62 |
-
]
|
| 63 |
-
)
|
| 64 |
|
| 65 |
return f"""## Research Question
|
| 66 |
{question}
|
|
|
|
| 45 |
Formatted prompt string
|
| 46 |
"""
|
| 47 |
max_content_len = 1500
|
| 48 |
+
|
| 49 |
+
def format_single_evidence(i: int, e: Evidence) -> str:
|
| 50 |
+
content = e.content
|
| 51 |
+
if len(content) > max_content_len:
|
| 52 |
+
content = content[:max_content_len] + "..."
|
| 53 |
+
|
| 54 |
+
return (
|
| 55 |
f"### Evidence {i + 1}\n"
|
| 56 |
f"**Source**: {e.citation.source.upper()} - {e.citation.title}\n"
|
| 57 |
f"**URL**: {e.citation.url}\n"
|
| 58 |
f"**Date**: {e.citation.date}\n"
|
| 59 |
+
f"**Content**:\n{content}"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
evidence_text = "\n\n".join([format_single_evidence(i, e) for i, e in enumerate(evidence)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
return f"""## Research Question
|
| 65 |
{question}
|
tests/unit/agent_factory/test_judges.py
CHANGED
|
@@ -172,6 +172,8 @@ class TestMockJudgeHandler:
|
|
| 172 |
assert handler.last_evidence is not None
|
| 173 |
assert len(handler.last_evidence) == expected_evidence_len
|
| 174 |
assert result.details.mechanism_score == expected_mech_score
|
|
|
|
|
|
|
| 175 |
|
| 176 |
@pytest.mark.asyncio
|
| 177 |
async def test_mock_handler_custom_response(self):
|
|
|
|
| 172 |
assert handler.last_evidence is not None
|
| 173 |
assert len(handler.last_evidence) == expected_evidence_len
|
| 174 |
assert result.details.mechanism_score == expected_mech_score
|
| 175 |
+
assert result.sufficient is False
|
| 176 |
+
assert result.recommendation == "continue"
|
| 177 |
|
| 178 |
@pytest.mark.asyncio
|
| 179 |
async def test_mock_handler_custom_response(self):
|