VibecoderMcSwaggins commited on
Commit
645a051
·
1 Parent(s): fb7b8d7

feat: implement Magentic ChatAgent pattern with semantic state management

Browse files

- Add src/agents/state.py: Thread-safe MagenticState with contextvars
- Evidence store for structured citation access
- EmbeddingService integration for semantic deduplication

- Add src/agents/tools.py: AIFunction tools that update shared state
- search_pubmed, search_clinical_trials, search_preprints
- get_bibliography for ReportAgent citations
- Tools return strings to LLM AND update state

- Add src/agents/magentic_agents.py: ChatAgent factories
- SearchAgent with search tools
- JudgeAgent, HypothesisAgent, ReportAgent
- Each agent has internal OpenAIChatClient

- Update src/orchestrator_magentic.py: Use ChatAgent pattern
- Initialize MagenticState at workflow start
- Properly stream events from MagenticBuilder

- Fix type errors for pre-commit mypy compatibility

Implements Phase 5 spec for correct Microsoft Agent Framework integration.

src/agents/magentic_agents.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Magentic-compatible agents using ChatAgent pattern."""
2
+
3
+ from agent_framework import ChatAgent
4
+ from agent_framework.openai import OpenAIChatClient
5
+
6
+ from src.agents.tools import (
7
+ get_bibliography,
8
+ search_clinical_trials,
9
+ search_preprints,
10
+ search_pubmed,
11
+ )
12
+ from src.utils.config import settings
13
+
14
+
15
+ def create_search_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
16
+ """Create a search agent with internal LLM and search tools.
17
+
18
+ Args:
19
+ chat_client: Optional custom chat client. If None, uses default.
20
+
21
+ Returns:
22
+ ChatAgent configured for biomedical search
23
+ """
24
+ client = chat_client or OpenAIChatClient(
25
+ model_id="gpt-4o-mini", # Fast, cheap for tool orchestration
26
+ api_key=settings.openai_api_key,
27
+ )
28
+
29
+ return ChatAgent(
30
+ name="SearchAgent",
31
+ description=(
32
+ "Searches biomedical databases (PubMed, ClinicalTrials.gov, bioRxiv) "
33
+ "for drug repurposing evidence"
34
+ ),
35
+ instructions="""You are a biomedical search specialist. When asked to find evidence:
36
+
37
+ 1. Analyze the request to determine what to search for
38
+ 2. Extract key search terms (drug names, disease names, mechanisms)
39
+ 3. Use the appropriate search tools:
40
+ - search_pubmed for peer-reviewed papers
41
+ - search_clinical_trials for clinical studies
42
+ - search_preprints for cutting-edge findings
43
+ 4. Summarize what you found and highlight key evidence
44
+
45
+ Be thorough - search multiple databases when appropriate.
46
+ Focus on finding: mechanisms of action, clinical evidence, and specific drug candidates.""",
47
+ chat_client=client,
48
+ tools=[search_pubmed, search_clinical_trials, search_preprints],
49
+ temperature=0.3, # More deterministic for tool use
50
+ )
51
+
52
+
53
+ def create_judge_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
54
+ """Create a judge agent that evaluates evidence quality.
55
+
56
+ Args:
57
+ chat_client: Optional custom chat client. If None, uses default.
58
+
59
+ Returns:
60
+ ChatAgent configured for evidence assessment
61
+ """
62
+ client = chat_client or OpenAIChatClient(
63
+ model_id="gpt-4o", # Better model for nuanced judgment
64
+ api_key=settings.openai_api_key,
65
+ )
66
+
67
+ return ChatAgent(
68
+ name="JudgeAgent",
69
+ description="Evaluates evidence quality and determines if sufficient for synthesis",
70
+ instructions="""You are an evidence quality assessor. When asked to evaluate:
71
+
72
+ 1. Review all evidence presented in the conversation
73
+ 2. Score on two dimensions (0-10 each):
74
+ - Mechanism Score: How well is the biological mechanism explained?
75
+ - Clinical Score: How strong is the clinical/preclinical evidence?
76
+ 3. Determine if evidence is SUFFICIENT for a final report:
77
+ - Sufficient: Clear mechanism + supporting clinical data
78
+ - Insufficient: Gaps in mechanism OR weak clinical evidence
79
+ 4. If insufficient, suggest specific search queries to fill gaps
80
+
81
+ Be rigorous but fair. Look for:
82
+ - Molecular targets and pathways
83
+ - Animal model studies
84
+ - Human clinical trials
85
+ - Safety data
86
+ - Drug-drug interactions""",
87
+ chat_client=client,
88
+ temperature=0.2, # Consistent judgments
89
+ )
90
+
91
+
92
+ def create_hypothesis_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
93
+ """Create a hypothesis generation agent.
94
+
95
+ Args:
96
+ chat_client: Optional custom chat client. If None, uses default.
97
+
98
+ Returns:
99
+ ChatAgent configured for hypothesis generation
100
+ """
101
+ client = chat_client or OpenAIChatClient(
102
+ model_id="gpt-4o",
103
+ api_key=settings.openai_api_key,
104
+ )
105
+
106
+ return ChatAgent(
107
+ name="HypothesisAgent",
108
+ description="Generates mechanistic hypotheses for drug repurposing",
109
+ instructions="""You are a biomedical hypothesis generator. Based on evidence:
110
+
111
+ 1. Identify the key molecular targets involved
112
+ 2. Map the biological pathways affected
113
+ 3. Generate testable hypotheses in this format:
114
+
115
+ DRUG -> TARGET -> PATHWAY -> THERAPEUTIC EFFECT
116
+
117
+ Example:
118
+ Metformin -> AMPK activation -> mTOR inhibition -> Reduced tau phosphorylation
119
+
120
+ 4. Explain the rationale for each hypothesis
121
+ 5. Suggest what additional evidence would support or refute it
122
+
123
+ Focus on mechanistic plausibility and existing evidence.""",
124
+ chat_client=client,
125
+ temperature=0.5, # Some creativity for hypothesis generation
126
+ )
127
+
128
+
129
+ def create_report_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent:
130
+ """Create a report synthesis agent.
131
+
132
+ Args:
133
+ chat_client: Optional custom chat client. If None, uses default.
134
+
135
+ Returns:
136
+ ChatAgent configured for report generation
137
+ """
138
+ client = chat_client or OpenAIChatClient(
139
+ model_id="gpt-4o",
140
+ api_key=settings.openai_api_key,
141
+ )
142
+
143
+ return ChatAgent(
144
+ name="ReportAgent",
145
+ description="Synthesizes research findings into structured reports",
146
+ instructions="""You are a scientific report writer. When asked to synthesize:
147
+
148
+ Generate a structured report with these sections:
149
+
150
+ ## Executive Summary
151
+ Brief overview of findings and recommendation
152
+
153
+ ## Methodology
154
+ Databases searched, queries used, evidence reviewed
155
+
156
+ ## Key Findings
157
+ ### Mechanism of Action
158
+ - Molecular targets
159
+ - Biological pathways
160
+ - Proposed mechanism
161
+
162
+ ### Clinical Evidence
163
+ - Preclinical studies
164
+ - Clinical trials
165
+ - Safety profile
166
+
167
+ ## Drug Candidates
168
+ List specific drugs with repurposing potential
169
+
170
+ ## Limitations
171
+ Gaps in evidence, conflicting data, caveats
172
+
173
+ ## Conclusion
174
+ Final recommendation with confidence level
175
+
176
+ ## References
177
+ Use the 'get_bibliography' tool to fetch the complete list of citations.
178
+ Format them as a numbered list.
179
+
180
+ Be comprehensive but concise. Cite evidence for all claims.""",
181
+ chat_client=client,
182
+ tools=[get_bibliography],
183
+ temperature=0.3,
184
+ )
src/agents/state.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Thread-safe state management for Magentic agents.
2
+
3
+ Uses contextvars to ensure isolation between concurrent requests (e.g., multiple users
4
+ searching simultaneously via Gradio).
5
+ """
6
+
7
+ from contextvars import ContextVar
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from src.utils.models import Citation, Evidence
13
+
14
+ if TYPE_CHECKING:
15
+ from src.services.embeddings import EmbeddingService
16
+
17
+
18
+ class MagenticState(BaseModel):
19
+ """Mutable state for a Magentic workflow session."""
20
+
21
+ evidence: list[Evidence] = Field(default_factory=list)
22
+ # Type as Any to avoid circular imports/runtime resolution issues
23
+ # The actual object injected will be an EmbeddingService instance
24
+ embedding_service: Any = None
25
+
26
+ model_config = {"arbitrary_types_allowed": True}
27
+
28
+ def add_evidence(self, new_evidence: list[Evidence]) -> int:
29
+ """Add new evidence, deduplicating by URL.
30
+
31
+ Returns:
32
+ Number of *new* items added.
33
+ """
34
+ existing_urls = {e.citation.url for e in self.evidence}
35
+ count = 0
36
+ for item in new_evidence:
37
+ if item.citation.url not in existing_urls:
38
+ self.evidence.append(item)
39
+ existing_urls.add(item.citation.url)
40
+ count += 1
41
+ return count
42
+
43
+ async def search_related(self, query: str, n_results: int = 5) -> list[Evidence]:
44
+ """Search for semantically related evidence using the embedding service."""
45
+ if not self.embedding_service:
46
+ return []
47
+
48
+ results = await self.embedding_service.search_similar(query, n_results=n_results)
49
+
50
+ # Convert dict results back to Evidence objects
51
+ evidence_list = []
52
+ for item in results:
53
+ meta = item.get("metadata", {})
54
+ authors_str = meta.get("authors", "")
55
+ authors = [a.strip() for a in authors_str.split(",") if a.strip()]
56
+
57
+ ev = Evidence(
58
+ content=item["content"],
59
+ citation=Citation(
60
+ title=meta.get("title", "Related Evidence"),
61
+ url=item["id"],
62
+ source="pubmed", # Defaulting to pubmed if unknown
63
+ date=meta.get("date", "n.d."),
64
+ authors=authors,
65
+ ),
66
+ relevance=max(0.0, 1.0 - item.get("distance", 0.5)),
67
+ )
68
+ evidence_list.append(ev)
69
+
70
+ return evidence_list
71
+
72
+
73
+ # The ContextVar holds the MagenticState for the current execution context
74
+ _magentic_state_var: ContextVar[MagenticState | None] = ContextVar("magentic_state", default=None)
75
+
76
+
77
+ def init_magentic_state(embedding_service: "EmbeddingService | None" = None) -> MagenticState:
78
+ """Initialize a new state for the current context."""
79
+ state = MagenticState(embedding_service=embedding_service)
80
+ _magentic_state_var.set(state)
81
+ return state
82
+
83
+
84
+ def get_magentic_state() -> MagenticState:
85
+ """Get the current state. Raises RuntimeError if not initialized."""
86
+ state = _magentic_state_var.get()
87
+ if state is None:
88
+ # Auto-initialize if missing (e.g. during tests or simple scripts)
89
+ return init_magentic_state()
90
+ return state
src/agents/tools.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tool functions for Magentic agents.
2
+
3
+ These functions are decorated with @ai_function to be callable by the ChatAgent's internal LLM.
4
+ They also interact with the thread-safe MagenticState to persist evidence.
5
+ """
6
+
7
+ from agent_framework import ai_function
8
+
9
+ from src.agents.state import get_magentic_state
10
+ from src.tools.biorxiv import BioRxivTool
11
+ from src.tools.clinicaltrials import ClinicalTrialsTool
12
+ from src.tools.pubmed import PubMedTool
13
+
14
+ # Singleton tool instances (stateless wrappers)
15
+ _pubmed = PubMedTool()
16
+ _clinicaltrials = ClinicalTrialsTool()
17
+ _biorxiv = BioRxivTool()
18
+
19
+
20
+ @ai_function # type: ignore[arg-type, misc]
21
+ async def search_pubmed(query: str, max_results: int = 10) -> str:
22
+ """Search PubMed for biomedical research papers.
23
+
24
+ Use this tool to find peer-reviewed scientific literature about
25
+ drugs, diseases, mechanisms of action, and clinical studies.
26
+
27
+ Args:
28
+ query: Search keywords (e.g., "metformin alzheimer mechanism")
29
+ max_results: Maximum results to return (default 10)
30
+
31
+ Returns:
32
+ Formatted list of papers with titles, abstracts, and citations
33
+ """
34
+ state = get_magentic_state()
35
+
36
+ # 1. Execute raw search
37
+ results = await _pubmed.search(query, max_results)
38
+ if not results:
39
+ return f"No PubMed results found for: {query}"
40
+
41
+ # 2. Semantic Deduplication & Expansion (The "Digital Twin" Brain)
42
+ display_results = results
43
+ if state.embedding_service:
44
+ # Deduplicate against what we just found vs what's in the DB
45
+ unique_results = await state.embedding_service.deduplicate(results)
46
+
47
+ # Search for related context in the vector DB (previous searches)
48
+ related = await state.search_related(query, n_results=3)
49
+
50
+ # Combine unique new results + relevant historical results
51
+ display_results = unique_results + related
52
+
53
+ # 3. Update State (Persist for ReportAgent)
54
+ # We add *all* found results to state, not just the displayed ones
55
+ new_count = state.add_evidence(results)
56
+
57
+ # 4. Format Output for LLM
58
+ output = [f"Found {len(results)} results ({new_count} new stored):\n"]
59
+
60
+ # Limit display to avoid context window overflow, but state has everything
61
+ limit = min(len(display_results), max_results)
62
+
63
+ for i, r in enumerate(display_results[:limit], 1):
64
+ title = r.citation.title
65
+ date = r.citation.date
66
+ source = r.citation.source
67
+ content_clean = r.content[:300].replace("\n", " ")
68
+ url = r.citation.url
69
+
70
+ output.append(f"{i}. **{title}** ({date})")
71
+ output.append(f" Source: {source} | {url}")
72
+ output.append(f" {content_clean}...")
73
+ output.append("")
74
+
75
+ return "\n".join(output)
76
+
77
+
78
+ @ai_function # type: ignore[arg-type, misc]
79
+ async def search_clinical_trials(query: str, max_results: int = 10) -> str:
80
+ """Search ClinicalTrials.gov for clinical studies.
81
+
82
+ Use this tool to find ongoing and completed clinical trials
83
+ for drug repurposing candidates.
84
+
85
+ Args:
86
+ query: Search terms (e.g., "metformin cancer phase 3")
87
+ max_results: Maximum results to return (default 10)
88
+
89
+ Returns:
90
+ Formatted list of clinical trials with status and details
91
+ """
92
+ state = get_magentic_state()
93
+
94
+ results = await _clinicaltrials.search(query, max_results)
95
+ if not results:
96
+ return f"No clinical trials found for: {query}"
97
+
98
+ # Update state
99
+ new_count = state.add_evidence(results)
100
+
101
+ output = [f"Found {len(results)} clinical trials ({new_count} new stored):\n"]
102
+ for i, r in enumerate(results[:max_results], 1):
103
+ title = r.citation.title
104
+ date = r.citation.date
105
+ source = r.citation.source
106
+ content_clean = r.content[:300].replace("\n", " ")
107
+ url = r.citation.url
108
+
109
+ output.append(f"{i}. **{title}**")
110
+ output.append(f" Status: {source} | Date: {date}")
111
+ output.append(f" {content_clean}...")
112
+ output.append(f" URL: {url}\n")
113
+
114
+ return "\n".join(output)
115
+
116
+
117
+ @ai_function # type: ignore[arg-type, misc]
118
+ async def search_preprints(query: str, max_results: int = 10) -> str:
119
+ """Search bioRxiv/medRxiv for preprint papers.
120
+
121
+ Use this tool to find the latest research that hasn't been
122
+ peer-reviewed yet. Good for cutting-edge findings.
123
+
124
+ Args:
125
+ query: Search terms (e.g., "long covid treatment")
126
+ max_results: Maximum results to return (default 10)
127
+
128
+ Returns:
129
+ Formatted list of preprints with abstracts and links
130
+ """
131
+ state = get_magentic_state()
132
+
133
+ results = await _biorxiv.search(query, max_results)
134
+ if not results:
135
+ return f"No preprints found for: {query}"
136
+
137
+ # Update state
138
+ new_count = state.add_evidence(results)
139
+
140
+ output = [f"Found {len(results)} preprints ({new_count} new stored):\n"]
141
+ for i, r in enumerate(results[:max_results], 1):
142
+ title = r.citation.title
143
+ date = r.citation.date
144
+ source = r.citation.source
145
+ content_clean = r.content[:300].replace("\n", " ")
146
+ url = r.citation.url
147
+
148
+ output.append(f"{i}. **{title}**")
149
+ output.append(f" Server: {source} | Date: {date}")
150
+ output.append(f" {content_clean}...")
151
+ output.append(f" URL: {url}\n")
152
+
153
+ return "\n".join(output)
154
+
155
+
156
+ @ai_function # type: ignore[arg-type, misc]
157
+ async def get_bibliography() -> str:
158
+ """Get the full list of collected evidence for the bibliography.
159
+
160
+ Use this tool when generating the final report to get the complete
161
+ list of references.
162
+
163
+ Returns:
164
+ Formatted bibliography string.
165
+ """
166
+ state = get_magentic_state()
167
+ if not state.evidence:
168
+ return "No evidence collected."
169
+
170
+ output = ["## References"]
171
+ for i, ev in enumerate(state.evidence, 1):
172
+ output.append(f"{i}. {ev.citation.formatted}")
173
+ output.append(f" URL: {ev.citation.url}")
174
+
175
+ return "\n".join(output)
src/orchestrator_factory.py CHANGED
@@ -5,18 +5,10 @@ from typing import Any, Literal
5
  from src.orchestrator import JudgeHandlerProtocol, Orchestrator, SearchHandlerProtocol
6
  from src.utils.models import OrchestratorConfig
7
 
8
- # Define protocols again or import if they were in a shared place.
9
-
10
- # Since they are in src/orchestrator.py, we can import them?
11
-
12
- # But SearchHandler and JudgeHandler in arguments are concrete classes in the type hint,
13
-
14
- # which satisfy the protocol.
15
-
16
 
17
  def create_orchestrator(
18
- search_handler: SearchHandlerProtocol,
19
- judge_handler: JudgeHandlerProtocol,
20
  config: OrchestratorConfig | None = None,
21
  mode: Literal["simple", "magentic"] = "simple",
22
  ) -> Any:
@@ -24,27 +16,33 @@ def create_orchestrator(
24
  Create an orchestrator instance.
25
 
26
  Args:
27
- search_handler: The search handler
28
- judge_handler: The judge handler
29
  config: Optional configuration
30
- mode: "simple" for Phase 4 loop, "magentic" for Phase 5 multi-agent
31
 
32
  Returns:
33
- Orchestrator instance (same interface regardless of mode)
 
 
 
 
34
  """
35
  if mode == "magentic":
36
  try:
37
  from src.orchestrator_magentic import MagenticOrchestrator
38
 
39
  return MagenticOrchestrator(
40
- search_handler=search_handler,
41
- judge_handler=judge_handler,
42
  max_rounds=config.max_iterations if config else 10,
43
  )
44
  except ImportError:
45
  # Fallback to simple if agent-framework not installed
46
  pass
47
 
 
 
 
 
48
  return Orchestrator(
49
  search_handler=search_handler,
50
  judge_handler=judge_handler,
 
5
  from src.orchestrator import JudgeHandlerProtocol, Orchestrator, SearchHandlerProtocol
6
  from src.utils.models import OrchestratorConfig
7
 
 
 
 
 
 
 
 
 
8
 
9
  def create_orchestrator(
10
+ search_handler: SearchHandlerProtocol | None = None,
11
+ judge_handler: JudgeHandlerProtocol | None = None,
12
  config: OrchestratorConfig | None = None,
13
  mode: Literal["simple", "magentic"] = "simple",
14
  ) -> Any:
 
16
  Create an orchestrator instance.
17
 
18
  Args:
19
+ search_handler: The search handler (required for simple mode)
20
+ judge_handler: The judge handler (required for simple mode)
21
  config: Optional configuration
22
+ mode: "simple" for Phase 4 loop, "magentic" for ChatAgent-based multi-agent
23
 
24
  Returns:
25
+ Orchestrator instance
26
+
27
+ Note:
28
+ Magentic mode does NOT use search_handler/judge_handler.
29
+ It creates ChatAgent instances with internal LLMs that call tools directly.
30
  """
31
  if mode == "magentic":
32
  try:
33
  from src.orchestrator_magentic import MagenticOrchestrator
34
 
35
  return MagenticOrchestrator(
 
 
36
  max_rounds=config.max_iterations if config else 10,
37
  )
38
  except ImportError:
39
  # Fallback to simple if agent-framework not installed
40
  pass
41
 
42
+ # Simple mode requires handlers
43
+ if search_handler is None or judge_handler is None:
44
+ raise ValueError("Simple mode requires search_handler and judge_handler")
45
+
46
  return Orchestrator(
47
  search_handler=search_handler,
48
  judge_handler=judge_handler,
src/orchestrator_magentic.py CHANGED
@@ -1,18 +1,9 @@
1
- """Magentic-based orchestrator for DeepCritical.
2
-
3
- NOTE: Magentic mode currently requires OpenAI API keys. The MagenticBuilder's
4
- standard manager uses OpenAIChatClient. Anthropic support may be added when
5
- the agent_framework provides an AnthropicChatClient.
6
- """
7
 
8
  from collections.abc import AsyncGenerator
9
  from typing import TYPE_CHECKING, Any
10
 
11
  import structlog
12
-
13
- if TYPE_CHECKING:
14
- from src.services.embeddings import EmbeddingService
15
-
16
  from agent_framework import (
17
  MagenticAgentDeltaEvent,
18
  MagenticAgentMessageEvent,
@@ -23,45 +14,49 @@ from agent_framework import (
23
  )
24
  from agent_framework.openai import OpenAIChatClient
25
 
26
- from src.agents.hypothesis_agent import HypothesisAgent
27
- from src.agents.judge_agent import JudgeAgent
28
- from src.agents.report_agent import ReportAgent
29
- from src.agents.search_agent import SearchAgent
30
- from src.orchestrator import JudgeHandlerProtocol, SearchHandlerProtocol
 
 
31
  from src.utils.config import settings
32
  from src.utils.exceptions import ConfigurationError
33
- from src.utils.models import AgentEvent, Evidence
34
-
35
- logger = structlog.get_logger()
36
 
 
 
37
 
38
- def _truncate(text: str, max_len: int = 100) -> str:
39
- """Truncate text with ellipsis only if needed."""
40
- return f"{text[:max_len]}..." if len(text) > max_len else text
41
 
42
 
43
  class MagenticOrchestrator:
44
  """
45
- Magentic-based orchestrator - same API as Orchestrator.
46
-
47
- Uses Microsoft Agent Framework's MagenticBuilder for multi-agent coordination.
48
 
49
- Note:
50
- Magentic mode requires OPENAI_API_KEY. The MagenticBuilder's standard
51
- manager currently only supports OpenAI. If you have only an Anthropic
52
- key, use the "simple" orchestrator mode instead.
53
  """
54
 
55
  def __init__(
56
  self,
57
- search_handler: SearchHandlerProtocol,
58
- judge_handler: JudgeHandlerProtocol,
59
  max_rounds: int = 10,
 
60
  ) -> None:
61
- self._search_handler = search_handler
62
- self._judge_handler = judge_handler
 
 
 
 
 
 
 
 
 
63
  self._max_rounds = max_rounds
64
- self._evidence_store: dict[str, list[Evidence]] = {"current": []}
65
 
66
  def _init_embedding_service(self) -> "EmbeddingService | None":
67
  """Initialize embedding service if available."""
@@ -77,19 +72,19 @@ class MagenticOrchestrator:
77
  logger.warning("Failed to initialize embedding service", error=str(e))
78
  return None
79
 
80
- def _build_workflow(
81
- self,
82
- search_agent: SearchAgent,
83
- hypothesis_agent: HypothesisAgent,
84
- judge_agent: JudgeAgent,
85
- report_agent: ReportAgent,
86
- ) -> Any:
87
- """Build the Magentic workflow with participants."""
88
- if not settings.openai_api_key:
89
- raise ConfigurationError(
90
- "Magentic mode requires OPENAI_API_KEY. "
91
- "Set the key or use mode='simple' with Anthropic."
92
- )
93
 
94
  return (
95
  MagenticBuilder()
@@ -100,9 +95,7 @@ class MagenticOrchestrator:
100
  reporter=report_agent,
101
  )
102
  .with_standard_manager(
103
- chat_client=OpenAIChatClient(
104
- model_id=settings.openai_model, api_key=settings.openai_api_key
105
- ),
106
  max_round_count=self._max_rounds,
107
  max_stall_count=3,
108
  max_reset_count=2,
@@ -110,46 +103,15 @@ class MagenticOrchestrator:
110
  .build()
111
  )
112
 
113
- def _format_task(self, query: str, has_embeddings: bool) -> str:
114
- """Format the task instruction for the manager."""
115
- semantic_note = ""
116
- if has_embeddings:
117
- semantic_note = """
118
- The system has semantic search enabled. When evidence is found:
119
- 1. Related concepts will be automatically surfaced
120
- 2. Duplicates are removed by meaning, not just URL
121
- 3. Use the surfaced related concepts to refine searches
122
- """
123
- return f"""Research drug repurposing opportunities for: {query}
124
- {semantic_note}
125
- Workflow:
126
- 1. SearcherAgent: Find initial evidence from PubMed and web. SEND ONLY A SIMPLE KEYWORD QUERY.
127
- 2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect).
128
- 3. SearcherAgent: Use hypothesis-suggested queries for targeted search.
129
- 4. JudgeAgent: Evaluate if evidence supports hypotheses.
130
- 5. If sufficient -> ReportAgent: Generate structured research report.
131
- 6. If not sufficient -> Repeat from step 1 with refined queries.
132
-
133
- Focus on:
134
- - Identifying specific molecular targets
135
- - Understanding mechanism of action
136
- - Finding supporting/contradicting evidence for hypotheses
137
-
138
- The final output should be a complete research report with:
139
- - Executive summary
140
- - Methodology
141
- - Hypotheses tested
142
- - Mechanistic and clinical findings
143
- - Drug candidates
144
- - Limitations
145
- - Conclusion with references
146
- """
147
-
148
  async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
149
  """
150
- Run the Magentic workflow - same API as simple Orchestrator.
 
 
 
151
 
152
- Yields AgentEvent objects for real-time UI updates.
 
153
  """
154
  logger.info("Starting Magentic orchestrator", query=query)
155
 
@@ -159,20 +121,27 @@ The final output should be a complete research report with:
159
  iteration=0,
160
  )
161
 
162
- # Initialize services and agents
163
  embedding_service = self._init_embedding_service()
164
- search_agent = SearchAgent(
165
- self._search_handler, self._evidence_store, embedding_service=embedding_service
166
- )
167
- judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
168
- hypothesis_agent = HypothesisAgent(
169
- self._evidence_store, embedding_service=embedding_service
170
- )
171
- report_agent = ReportAgent(self._evidence_store, embedding_service=embedding_service)
172
 
173
- # Build workflow and task
174
- workflow = self._build_workflow(search_agent, hypothesis_agent, judge_agent, report_agent)
175
- task = self._format_task(query, embedding_service is not None)
 
 
 
 
 
 
 
 
 
 
176
 
177
  iteration = 0
178
  try:
@@ -182,6 +151,7 @@ The final output should be a complete research report with:
182
  if isinstance(event, MagenticAgentMessageEvent):
183
  iteration += 1
184
  yield agent_event
 
185
  except Exception as e:
186
  logger.error("Magentic workflow failed", error=str(e))
187
  yield AgentEvent(
@@ -191,35 +161,41 @@ The final output should be a complete research report with:
191
  )
192
 
193
  def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
194
- """Process a workflow event and return an AgentEvent if applicable."""
195
  if isinstance(event, MagenticOrchestratorMessageEvent):
196
- message_text = (
197
- event.message.text if event.message and hasattr(event.message, "text") else ""
198
- )
199
- kind = getattr(event, "kind", "manager")
200
- if message_text:
201
  return AgentEvent(
202
  type="judging",
203
- message=f"Manager ({kind}): {_truncate(message_text)}",
204
  iteration=iteration,
205
  )
206
 
207
  elif isinstance(event, MagenticAgentMessageEvent):
208
  agent_name = event.agent_id or "unknown"
209
- msg_text = (
210
- event.message.text if event.message and hasattr(event.message, "text") else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  )
212
- return self._agent_message_event(agent_name, msg_text, iteration + 1)
213
 
214
  elif isinstance(event, MagenticFinalResultEvent):
215
- final_text = (
216
- event.message.text
217
- if event.message and hasattr(event.message, "text")
218
- else "No result"
219
- )
220
  return AgentEvent(
221
  type="complete",
222
- message=final_text,
223
  data={"iterations": iteration},
224
  iteration=iteration,
225
  )
@@ -242,35 +218,3 @@ The final output should be a complete research report with:
242
  )
243
 
244
  return None
245
-
246
- def _agent_message_event(self, agent_name: str, msg_text: str, iteration: int) -> AgentEvent:
247
- """Create an AgentEvent for an agent message."""
248
- if "search" in agent_name.lower():
249
- return AgentEvent(
250
- type="search_complete",
251
- message=f"Search agent: {_truncate(msg_text)}",
252
- iteration=iteration,
253
- )
254
- elif "hypothes" in agent_name.lower():
255
- return AgentEvent(
256
- type="hypothesizing",
257
- message=f"Hypothesis agent: {_truncate(msg_text)}",
258
- iteration=iteration,
259
- )
260
- elif "judge" in agent_name.lower():
261
- return AgentEvent(
262
- type="judge_complete",
263
- message=f"Judge agent: {_truncate(msg_text)}",
264
- iteration=iteration,
265
- )
266
- elif "report" in agent_name.lower():
267
- return AgentEvent(
268
- type="synthesizing",
269
- message=f"Report agent: {_truncate(msg_text)}" if msg_text else "Report generated.",
270
- iteration=iteration,
271
- )
272
- return AgentEvent(
273
- type="judging",
274
- message=f"{agent_name}: {_truncate(msg_text)}",
275
- iteration=iteration,
276
- )
 
1
+ """Magentic-based orchestrator using ChatAgent pattern."""
 
 
 
 
 
2
 
3
  from collections.abc import AsyncGenerator
4
  from typing import TYPE_CHECKING, Any
5
 
6
  import structlog
 
 
 
 
7
  from agent_framework import (
8
  MagenticAgentDeltaEvent,
9
  MagenticAgentMessageEvent,
 
14
  )
15
  from agent_framework.openai import OpenAIChatClient
16
 
17
+ from src.agents.magentic_agents import (
18
+ create_hypothesis_agent,
19
+ create_judge_agent,
20
+ create_report_agent,
21
+ create_search_agent,
22
+ )
23
+ from src.agents.state import init_magentic_state
24
  from src.utils.config import settings
25
  from src.utils.exceptions import ConfigurationError
26
+ from src.utils.models import AgentEvent
 
 
27
 
28
+ if TYPE_CHECKING:
29
+ from src.services.embeddings import EmbeddingService
30
 
31
+ logger = structlog.get_logger()
 
 
32
 
33
 
34
  class MagenticOrchestrator:
35
  """
36
+ Magentic-based orchestrator using ChatAgent pattern.
 
 
37
 
38
+ Each agent has an internal LLM that understands natural language
39
+ instructions from the manager and can call tools appropriately.
 
 
40
  """
41
 
42
  def __init__(
43
  self,
 
 
44
  max_rounds: int = 10,
45
+ chat_client: OpenAIChatClient | None = None,
46
  ) -> None:
47
+ """Initialize orchestrator.
48
+
49
+ Args:
50
+ max_rounds: Maximum coordination rounds
51
+ chat_client: Optional shared chat client for agents
52
+ """
53
+ if not settings.openai_api_key:
54
+ raise ConfigurationError(
55
+ "Magentic mode requires OPENAI_API_KEY. " "Set the key or use mode='simple'."
56
+ )
57
+
58
  self._max_rounds = max_rounds
59
+ self._chat_client = chat_client
60
 
61
  def _init_embedding_service(self) -> "EmbeddingService | None":
62
  """Initialize embedding service if available."""
 
72
  logger.warning("Failed to initialize embedding service", error=str(e))
73
  return None
74
 
75
+ def _build_workflow(self) -> Any:
76
+ """Build the Magentic workflow with ChatAgent participants."""
77
+ # Create agents with internal LLMs
78
+ search_agent = create_search_agent(self._chat_client)
79
+ judge_agent = create_judge_agent(self._chat_client)
80
+ hypothesis_agent = create_hypothesis_agent(self._chat_client)
81
+ report_agent = create_report_agent(self._chat_client)
82
+
83
+ # Manager chat client (orchestrates the agents)
84
+ manager_client = OpenAIChatClient(
85
+ model_id="gpt-4o", # Good model for planning/coordination
86
+ api_key=settings.openai_api_key,
87
+ )
88
 
89
  return (
90
  MagenticBuilder()
 
95
  reporter=report_agent,
96
  )
97
  .with_standard_manager(
98
+ chat_client=manager_client,
 
 
99
  max_round_count=self._max_rounds,
100
  max_stall_count=3,
101
  max_reset_count=2,
 
103
  .build()
104
  )
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
107
  """
108
+ Run the Magentic workflow.
109
+
110
+ Args:
111
+ query: User's research question
112
 
113
+ Yields:
114
+ AgentEvent objects for real-time UI updates
115
  """
116
  logger.info("Starting Magentic orchestrator", query=query)
117
 
 
121
  iteration=0,
122
  )
123
 
124
+ # Initialize context state
125
  embedding_service = self._init_embedding_service()
126
+ init_magentic_state(embedding_service)
127
+
128
+ workflow = self._build_workflow()
129
+
130
+ task = f"""Research drug repurposing opportunities for: {query}
 
 
 
131
 
132
+ Workflow:
133
+ 1. SearchAgent: Find evidence from PubMed, ClinicalTrials.gov, and bioRxiv
134
+ 2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect)
135
+ 3. JudgeAgent: Evaluate if evidence is sufficient
136
+ 4. If insufficient -> SearchAgent refines search based on gaps
137
+ 5. If sufficient -> ReportAgent synthesizes final report
138
+
139
+ Focus on:
140
+ - Identifying specific molecular targets
141
+ - Understanding mechanism of action
142
+ - Finding clinical evidence supporting hypotheses
143
+
144
+ The final output should be a structured research report."""
145
 
146
  iteration = 0
147
  try:
 
151
  if isinstance(event, MagenticAgentMessageEvent):
152
  iteration += 1
153
  yield agent_event
154
+
155
  except Exception as e:
156
  logger.error("Magentic workflow failed", error=str(e))
157
  yield AgentEvent(
 
161
  )
162
 
163
  def _process_event(self, event: Any, iteration: int) -> AgentEvent | None:
164
+ """Process workflow event into AgentEvent."""
165
  if isinstance(event, MagenticOrchestratorMessageEvent):
166
+ text = event.message.text if event.message else ""
167
+ if text:
 
 
 
168
  return AgentEvent(
169
  type="judging",
170
+ message=f"Manager ({event.kind}): {text[:200]}...",
171
  iteration=iteration,
172
  )
173
 
174
  elif isinstance(event, MagenticAgentMessageEvent):
175
  agent_name = event.agent_id or "unknown"
176
+ text = event.message.text if event.message else ""
177
+
178
+ event_type = "judging"
179
+ if "search" in agent_name.lower():
180
+ event_type = "search_complete"
181
+ elif "judge" in agent_name.lower():
182
+ event_type = "judge_complete"
183
+ elif "hypothes" in agent_name.lower():
184
+ event_type = "hypothesizing"
185
+ elif "report" in agent_name.lower():
186
+ event_type = "synthesizing"
187
+
188
+ return AgentEvent(
189
+ type=event_type, # type: ignore[arg-type]
190
+ message=f"{agent_name}: {text[:200]}...",
191
+ iteration=iteration + 1,
192
  )
 
193
 
194
  elif isinstance(event, MagenticFinalResultEvent):
195
+ text = event.message.text if event.message else "No result"
 
 
 
 
196
  return AgentEvent(
197
  type="complete",
198
+ message=text,
199
  data={"iterations": iteration},
200
  iteration=iteration,
201
  )
 
218
  )
219
 
220
  return None