Spaces:

DataQuests
/

DeepCritical

Running

VibecoderMcSwaggins commited on 12 days ago

Commit

9760706

1 Parent(s): 95d2e29

fix: add type ignore comment for Agent instantiation in JudgeHandler

Updated the Agent instantiation in JudgeHandler to include a type ignore comment, addressing type-checking issues while maintaining functionality.

Files changed (5) hide show

docs/implementation/05_phase_magentic.md +92 -36
docs/implementation/roadmap.md +4 -4
src/agent_factory/judges.py +7 -3
src/prompts/judge.py +11 -12
tests/unit/agent_factory/test_judges.py +2 -0

docs/implementation/05_phase_magentic.md CHANGED Viewed

@@ -112,12 +112,13 @@ magentic = [
 ### 4.2 Agent Wrappers (`src/agents/search_agent.py`)
-Wrap `SearchHandler` as an `AgentProtocol`:
 ```python
 """Search agent wrapper for Magentic integration."""
-from typing import Any
-from agent_framework import AgentProtocol, AgentRunResponse, ChatMessage, Role
 from src.tools.search_handler import SearchHandler
 from src.utils.models import SearchResult
@@ -130,6 +131,7 @@ class SearchAgent:
         self._handler = search_handler
         self._id = "search-agent"
         self._name = "SearchAgent"
     @property
     def id(self) -> str:
@@ -145,24 +147,29 @@ class SearchAgent:
     @property
     def description(self) -> str | None:
-        return "Searches PubMed and web for drug repurposing evidence"
     async def run(
         self,
-        messages: list[ChatMessage] | None = None,
         *,
-        thread: Any = None,
         **kwargs: Any,
     ) -> AgentRunResponse:
         """Execute search based on the last user message."""
         # Extract query from messages
         query = ""
-        if messages:
             for msg in reversed(messages):
-                if msg.role == Role.USER and msg.text:
                     query = msg.text
                     break
         if not query:
             return AgentRunResponse(
                 messages=[ChatMessage(role=Role.ASSISTANT, text="No query provided")],
@@ -183,24 +190,35 @@ class SearchAgent:
         return AgentRunResponse(
             messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
             response_id=f"search-{result.total_found}",
-            metadata={"evidence": [e.model_dump() for e in result.evidence]},
         )
-    def run_stream(self, messages=None, *, thread=None, **kwargs):
-        """Streaming not implemented for search."""
-        async def _stream():
-            result = await self.run(messages, thread=thread, **kwargs)
-            from agent_framework import AgentRunResponseUpdate
-            yield AgentRunResponseUpdate(messages=result.messages)
-        return _stream()
 ```
 ### 4.3 Judge Agent Wrapper (`src/agents/judge_agent.py`)
 ```python
 """Judge agent wrapper for Magentic integration."""
-from typing import Any, List
-from agent_framework import AgentProtocol, AgentRunResponse, ChatMessage, Role
 from src.agent_factory.judges import JudgeHandler
 from src.utils.models import Evidence, JudgeAssessment
@@ -214,6 +232,7 @@ class JudgeAgent:
         self._evidence_store = evidence_store  # Shared state for evidence
         self._id = "judge-agent"
         self._name = "JudgeAgent"
     @property
     def id(self) -> str:
@@ -229,23 +248,28 @@ class JudgeAgent:
     @property
     def description(self) -> str | None:
-        return "Evaluates evidence quality and determines if sufficient for synthesis"
     async def run(
         self,
-        messages: list[ChatMessage] | None = None,
         *,
-        thread: Any = None,
         **kwargs: Any,
     ) -> AgentRunResponse:
         """Assess evidence quality."""
         # Extract original question from messages
         question = ""
-        if messages:
-            for msg in messages:
-                if msg.role == Role.USER and msg.text:
                     question = msg.text
                     break
         # Get evidence from shared store
         evidence = self._evidence_store.get("current", [])
@@ -276,16 +300,26 @@ class JudgeAgent:
         return AgentRunResponse(
             messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
             response_id=f"judge-{assessment.recommendation}",
-            metadata={"assessment": assessment.model_dump()},
         )
-    def run_stream(self, messages=None, *, thread=None, **kwargs):
-        """Streaming not implemented for judge."""
-        async def _stream():
-            result = await self.run(messages, thread=thread, **kwargs)
-            from agent_framework import AgentRunResponseUpdate
-            yield AgentRunResponseUpdate(messages=result.messages)
-        return _stream()
 ```
 ### 4.4 Magentic Orchestrator (`src/orchestrator_magentic.py`)
@@ -300,6 +334,7 @@ from agent_framework import (
     MagenticFinalResultEvent,
     MagenticAgentMessageEvent,
     MagenticOrchestratorMessageEvent,
     WorkflowOutputEvent,
 )
 from agent_framework.openai import OpenAIChatClient
@@ -350,6 +385,7 @@ class MagenticOrchestrator:
         judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
         # Build Magentic workflow
         workflow = (
             MagenticBuilder()
             .participants(
@@ -383,32 +419,40 @@ Focus on finding:
         iteration = 0
         try:
             async for event in workflow.run_stream(task):
                 if isinstance(event, MagenticOrchestratorMessageEvent):
                     yield AgentEvent(
                         type="judging",
-                        message=f"Manager: {event.kind}",
                         iteration=iteration,
                     )
                 elif isinstance(event, MagenticAgentMessageEvent):
                     iteration += 1
                     agent_name = event.agent_id or "unknown"
                     if "search" in agent_name.lower():
                         yield AgentEvent(
                             type="search_complete",
-                            message=f"Search agent responded",
                             iteration=iteration,
                         )
                     elif "judge" in agent_name.lower():
                         yield AgentEvent(
                             type="judge_complete",
-                            message=f"Judge agent evaluated evidence",
                             iteration=iteration,
                         )
                 elif isinstance(event, MagenticFinalResultEvent):
                     final_text = event.message.text if event.message else "No result"
                     yield AgentEvent(
                         type="complete",
@@ -417,7 +461,19 @@ Focus on finding:
                         iteration=iteration,
                     )
                 elif isinstance(event, WorkflowOutputEvent):
                     if event.data:
                         yield AgentEvent(
                             type="complete",

 ### 4.2 Agent Wrappers (`src/agents/search_agent.py`)
+Wrap `SearchHandler` as an `AgentProtocol`.
+**Note**: `AgentProtocol` requires `id`, `name`, `display_name`, `description`, `run`, `run_stream`, and `get_new_thread`.
 ```python
 """Search agent wrapper for Magentic integration."""
+from typing import Any, AsyncIterable
+from agent_framework import AgentProtocol, AgentRunResponse, AgentRunResponseUpdate, ChatMessage, Role, AgentThread
 from src.tools.search_handler import SearchHandler
 from src.utils.models import SearchResult
         self._handler = search_handler
         self._id = "search-agent"
         self._name = "SearchAgent"
+        self._description = "Searches PubMed and web for drug repurposing evidence"
     @property
     def id(self) -> str:
     @property
     def description(self) -> str | None:
+        return self._description
     async def run(
         self,
+        messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
         *,
+        thread: AgentThread | None = None,
         **kwargs: Any,
     ) -> AgentRunResponse:
         """Execute search based on the last user message."""
         # Extract query from messages
         query = ""
+        if isinstance(messages, list):
             for msg in reversed(messages):
+                if isinstance(msg, ChatMessage) and msg.role == Role.USER and msg.text:
                     query = msg.text
                     break
+                elif isinstance(msg, str):
+                    query = msg
+                    break
+        elif isinstance(messages, str):
+            query = messages
         if not query:
             return AgentRunResponse(
                 messages=[ChatMessage(role=Role.ASSISTANT, text="No query provided")],
         return AgentRunResponse(
             messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
             response_id=f"search-{result.total_found}",
+            additional_properties={"evidence": [e.model_dump() for e in result.evidence]},
         )
+    async def run_stream(
+        self,
+        messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
+        *,
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterable[AgentRunResponseUpdate]:
+        """Streaming wrapper for search (search itself isn't streaming)."""
+        result = await self.run(messages, thread=thread, **kwargs)
+        # Yield single update with full result
+        yield AgentRunResponseUpdate(
+            messages=result.messages,
+            response_id=result.response_id
+        )
+    def get_new_thread(self, **kwargs: Any) -> AgentThread:
+        """Create a new thread."""
+        return AgentThread(**kwargs)
 ```
 ### 4.3 Judge Agent Wrapper (`src/agents/judge_agent.py`)
 ```python
 """Judge agent wrapper for Magentic integration."""
+from typing import Any, List, AsyncIterable
+from agent_framework import AgentProtocol, AgentRunResponse, AgentRunResponseUpdate, ChatMessage, Role, AgentThread
 from src.agent_factory.judges import JudgeHandler
 from src.utils.models import Evidence, JudgeAssessment
         self._evidence_store = evidence_store  # Shared state for evidence
         self._id = "judge-agent"
         self._name = "JudgeAgent"
+        self._description = "Evaluates evidence quality and determines if sufficient for synthesis"
     @property
     def id(self) -> str:
     @property
     def description(self) -> str | None:
+        return self._description
     async def run(
         self,
+        messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
         *,
+        thread: AgentThread | None = None,
         **kwargs: Any,
     ) -> AgentRunResponse:
         """Assess evidence quality."""
         # Extract original question from messages
         question = ""
+        if isinstance(messages, list):
+            for msg in reversed(messages):
+                if isinstance(msg, ChatMessage) and msg.role == Role.USER and msg.text:
                     question = msg.text
                     break
+                elif isinstance(msg, str):
+                    question = msg
+                    break
+        elif isinstance(messages, str):
+            question = messages
         # Get evidence from shared store
         evidence = self._evidence_store.get("current", [])
         return AgentRunResponse(
             messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
             response_id=f"judge-{assessment.recommendation}",
+            additional_properties={"assessment": assessment.model_dump()},
         )
+    async def run_stream(
+        self,
+        messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
+        *,
+        thread: AgentThread | None = None,
+        **kwargs: Any,
+    ) -> AsyncIterable[AgentRunResponseUpdate]:
+        """Streaming wrapper for judge."""
+        result = await self.run(messages, thread=thread, **kwargs)
+        yield AgentRunResponseUpdate(
+            messages=result.messages,
+            response_id=result.response_id
+        )
+    def get_new_thread(self, **kwargs: Any) -> AgentThread:
+        """Create a new thread."""
+        return AgentThread(**kwargs)
 ```
 ### 4.4 Magentic Orchestrator (`src/orchestrator_magentic.py`)
     MagenticFinalResultEvent,
     MagenticAgentMessageEvent,
     MagenticOrchestratorMessageEvent,
+    MagenticAgentDeltaEvent,
     WorkflowOutputEvent,
 )
 from agent_framework.openai import OpenAIChatClient
         judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
         # Build Magentic workflow
+        # Note: MagenticBuilder.participants takes named arguments for agent instances
         workflow = (
             MagenticBuilder()
             .participants(
         iteration = 0
         try:
+            # workflow.run_stream returns an async generator of workflow events
             async for event in workflow.run_stream(task):
                 if isinstance(event, MagenticOrchestratorMessageEvent):
+                    # Manager events (planning, instruction, ledger)
+                    message_text = event.message.text if event.message else ""
                     yield AgentEvent(
                         type="judging",
+                        message=f"Manager ({event.kind}): {message_text[:100]}...",
                         iteration=iteration,
                     )
                 elif isinstance(event, MagenticAgentMessageEvent):
+                    # Complete agent response
                     iteration += 1
                     agent_name = event.agent_id or "unknown"
+                    msg_text = event.message.text if event.message else ""
                     if "search" in agent_name.lower():
+                        # Check if we found evidence (based on SearchAgent logic)
+                        # In a real implementation we might extract metadata
                         yield AgentEvent(
                             type="search_complete",
+                            message=f"Search agent: {msg_text[:100]}...",
                             iteration=iteration,
                         )
                     elif "judge" in agent_name.lower():
                         yield AgentEvent(
                             type="judge_complete",
+                            message=f"Judge agent: {msg_text[:100]}...",
                             iteration=iteration,
                         )
                 elif isinstance(event, MagenticFinalResultEvent):
+                    # Final workflow result
                     final_text = event.message.text if event.message else "No result"
                     yield AgentEvent(
                         type="complete",
                         iteration=iteration,
                     )
+                elif isinstance(event, MagenticAgentDeltaEvent):
+                    # Streaming token chunks from agents (optional "typing" effect)
+                    # Only emit if we have actual text content
+                    if event.text:
+                        yield AgentEvent(
+                            type="streaming",
+                            message=event.text,
+                            data={"agent_id": event.agent_id},
+                            iteration=iteration,
+                        )
                 elif isinstance(event, WorkflowOutputEvent):
+                    # Alternative final output event
                     if event.data:
                         yield AgentEvent(
                             type="complete",

docs/implementation/roadmap.md CHANGED Viewed

@@ -119,10 +119,10 @@ tests/
 *Goal: Upgrade orchestrator to use Microsoft Agent Framework patterns.*
-- [ ] Wrap SearchHandler as `AgentProtocol` (SearchAgent)
-- [ ] Wrap JudgeHandler as `AgentProtocol` (JudgeAgent)
-- [ ] Implement `MagenticOrchestrator` using `MagenticBuilder`
-- [ ] Create factory pattern for switching implementations
 - **Deliverable**: Same API, better multi-agent orchestration engine.
 **NOTE**: Only implement Phase 5 if time permits after MVP is shipped.

 *Goal: Upgrade orchestrator to use Microsoft Agent Framework patterns.*
+- [ ] Wrap SearchHandler as `AgentProtocol` (SearchAgent) with strict protocol compliance.
+- [ ] Wrap JudgeHandler as `AgentProtocol` (JudgeAgent) with strict protocol compliance.
+- [ ] Implement `MagenticOrchestrator` using `MagenticBuilder`.
+- [ ] Create factory pattern for switching implementations.
 - **Deliverable**: Same API, better multi-agent orchestration engine.
 **NOTE**: Only implement Phase 5 if time permits after MVP is shipped.

src/agent_factory/judges.py CHANGED Viewed

@@ -24,6 +24,10 @@ def get_model() -> Any:
     if provider == "anthropic":
         return AnthropicModel(settings.anthropic_model)
     return OpenAIModel(settings.openai_model)
@@ -42,9 +46,9 @@ class JudgeHandler:
             model: Optional PydanticAI model. If None, uses config default.
         """
         self.model = model or get_model()
-        self.agent = Agent(  # type: ignore[call-overload]
             model=self.model,
-            result_type=JudgeAssessment,
             system_prompt=SYSTEM_PROMPT,
             retries=3,
         )
@@ -82,7 +86,7 @@ class JudgeHandler:
         try:
             # Run the agent with structured output
             result = await self.agent.run(user_prompt)
-            assessment = cast(JudgeAssessment, result.data)
             logger.info(
                 "Assessment complete",

     if provider == "anthropic":
         return AnthropicModel(settings.anthropic_model)
+    if provider != "openai":
+        logger.warning("Unknown LLM provider, defaulting to OpenAI", provider=provider)
     return OpenAIModel(settings.openai_model)
             model: Optional PydanticAI model. If None, uses config default.
         """
         self.model = model or get_model()
+        self.agent = Agent(
             model=self.model,
+            output_type=JudgeAssessment,
             system_prompt=SYSTEM_PROMPT,
             retries=3,
         )
         try:
             # Run the agent with structured output
             result = await self.agent.run(user_prompt)
+            assessment = cast(JudgeAssessment, result.data)  # type: ignore[attr-defined]
             logger.info(
                 "Assessment complete",

src/prompts/judge.py CHANGED Viewed

@@ -45,22 +45,21 @@ def format_user_prompt(question: str, evidence: list[Evidence]) -> str:
         Formatted prompt string
     """
     max_content_len = 1500
-    evidence_text = "\n\n".join(
-        [
             f"### Evidence {i + 1}\n"
             f"**Source**: {e.citation.source.upper()} - {e.citation.title}\n"
             f"**URL**: {e.citation.url}\n"
             f"**Date**: {e.citation.date}\n"
-            f"**Content**:\n{e.content[:max_content_len]}..."
-            if len(e.content) > max_content_len
-            else f"### Evidence {i + 1}\n"
-            f"**Source**: {e.citation.source.upper()} - {e.citation.title}\n"
-            f"**URL**: {e.citation.url}\n"
-            f"**Date**: {e.citation.date}\n"
-            f"**Content**:\n{e.content}"
-            for i, e in enumerate(evidence)
-        ]
-    )
     return f"""## Research Question
 {question}

         Formatted prompt string
     """
     max_content_len = 1500
+    def format_single_evidence(i: int, e: Evidence) -> str:
+        content = e.content
+        if len(content) > max_content_len:
+            content = content[:max_content_len] + "..."
+        return (
             f"### Evidence {i + 1}\n"
             f"**Source**: {e.citation.source.upper()} - {e.citation.title}\n"
             f"**URL**: {e.citation.url}\n"
             f"**Date**: {e.citation.date}\n"
+            f"**Content**:\n{content}"
+        )
+    evidence_text = "\n\n".join([format_single_evidence(i, e) for i, e in enumerate(evidence)])
     return f"""## Research Question
 {question}

tests/unit/agent_factory/test_judges.py CHANGED Viewed

@@ -172,6 +172,8 @@ class TestMockJudgeHandler:
         assert handler.last_evidence is not None
         assert len(handler.last_evidence) == expected_evidence_len
         assert result.details.mechanism_score == expected_mech_score
     @pytest.mark.asyncio
     async def test_mock_handler_custom_response(self):

         assert handler.last_evidence is not None
         assert len(handler.last_evidence) == expected_evidence_len
         assert result.details.mechanism_score == expected_mech_score
+        assert result.sufficient is False
+        assert result.recommendation == "continue"
     @pytest.mark.asyncio
     async def test_mock_handler_custom_response(self):