DeepCritical / src /utils /models.py
VibecoderMcSwaggins's picture
feat: implement phase 13 modal pipeline integration
7cc8b69
raw
history blame
10.7 kB
"""Data models for the Search feature."""
from datetime import UTC, datetime
from typing import Any, ClassVar, Literal
from pydantic import BaseModel, Field
# Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11)
SourceName = Literal["pubmed", "clinicaltrials", "biorxiv"]
class Citation(BaseModel):
"""A citation to a source document."""
source: SourceName = Field(description="Where this came from")
title: str = Field(min_length=1, max_length=500)
url: str = Field(description="URL to the source")
date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
authors: list[str] = Field(default_factory=list)
MAX_AUTHORS_IN_CITATION: ClassVar[int] = 3
@property
def formatted(self) -> str:
"""Format as a citation string."""
author_str = ", ".join(self.authors[: self.MAX_AUTHORS_IN_CITATION])
if len(self.authors) > self.MAX_AUTHORS_IN_CITATION:
author_str += " et al."
return f"{author_str} ({self.date}). {self.title}. {self.source.upper()}"
class Evidence(BaseModel):
"""A piece of evidence retrieved from search."""
content: str = Field(min_length=1, description="The actual text content")
citation: Citation
relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1")
model_config = {"frozen": True}
class SearchResult(BaseModel):
"""Result of a search operation."""
query: str
evidence: list[Evidence]
sources_searched: list[SourceName]
total_found: int
errors: list[str] = Field(default_factory=list)
class AssessmentDetails(BaseModel):
"""Detailed assessment of evidence quality."""
mechanism_score: int = Field(
...,
ge=0,
le=10,
description="How well does the evidence explain the mechanism? 0-10",
)
mechanism_reasoning: str = Field(
..., min_length=10, description="Explanation of mechanism score"
)
clinical_evidence_score: int = Field(
...,
ge=0,
le=10,
description="Strength of clinical/preclinical evidence. 0-10",
)
clinical_reasoning: str = Field(
..., min_length=10, description="Explanation of clinical evidence score"
)
drug_candidates: list[str] = Field(
default_factory=list, description="List of specific drug candidates mentioned"
)
key_findings: list[str] = Field(
default_factory=list, description="Key findings from the evidence"
)
class JudgeAssessment(BaseModel):
"""Complete assessment from the Judge."""
details: AssessmentDetails
sufficient: bool = Field(..., description="Is evidence sufficient to provide a recommendation?")
confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence in the assessment (0-1)")
recommendation: Literal["continue", "synthesize"] = Field(
...,
description="continue = need more evidence, synthesize = ready to answer",
)
next_search_queries: list[str] = Field(
default_factory=list, description="If continue, what queries to search next"
)
reasoning: str = Field(
..., min_length=20, description="Overall reasoning for the recommendation"
)
class AgentEvent(BaseModel):
"""Event emitted by the orchestrator for UI streaming."""
type: Literal[
"started",
"searching",
"search_complete",
"judging",
"judge_complete",
"looping",
"synthesizing",
"complete",
"error",
"streaming",
"hypothesizing",
"analyzing", # NEW for Phase 13
"analysis_complete", # NEW for Phase 13
]
message: str
data: Any = None
timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC))
iteration: int = 0
def to_markdown(self) -> str:
"""Format event as markdown for chat display."""
icons = {
"started": "🚀",
"searching": "🔍",
"search_complete": "📚",
"judging": "🧠",
"judge_complete": "✅",
"looping": "🔄",
"synthesizing": "📝",
"complete": "🎉",
"error": "❌",
"streaming": "📡",
"hypothesizing": "🔬", # NEW
"analyzing": "📊", # NEW
"analysis_complete": "📈", # NEW
}
icon = icons.get(self.type, "•")
return f"{icon} **{self.type.upper()}**: {self.message}"
class MechanismHypothesis(BaseModel):
"""A scientific hypothesis about drug mechanism."""
drug: str = Field(description="The drug being studied")
target: str = Field(description="Molecular target (e.g., AMPK, mTOR)")
pathway: str = Field(description="Biological pathway affected")
effect: str = Field(description="Downstream effect on disease")
confidence: float = Field(ge=0, le=1, description="Confidence in hypothesis")
supporting_evidence: list[str] = Field(
default_factory=list, description="PMIDs or URLs supporting this hypothesis"
)
contradicting_evidence: list[str] = Field(
default_factory=list, description="PMIDs or URLs contradicting this hypothesis"
)
search_suggestions: list[str] = Field(
default_factory=list, description="Suggested searches to test this hypothesis"
)
def to_search_queries(self) -> list[str]:
"""Generate search queries to test this hypothesis."""
return [
f"{self.drug} {self.target}",
f"{self.target} {self.pathway}",
f"{self.pathway} {self.effect}",
*self.search_suggestions,
]
class HypothesisAssessment(BaseModel):
"""Assessment of evidence against hypotheses."""
hypotheses: list[MechanismHypothesis]
primary_hypothesis: MechanismHypothesis | None = Field(
default=None, description="Most promising hypothesis based on current evidence"
)
knowledge_gaps: list[str] = Field(description="What we don't know yet")
recommended_searches: list[str] = Field(description="Searches to fill knowledge gaps")
class ReportSection(BaseModel):
"""A section of the research report."""
title: str
content: str
# Reserved for future inline citation tracking within sections
citations: list[str] = Field(default_factory=list)
class ResearchReport(BaseModel):
"""Structured scientific report."""
title: str = Field(description="Report title")
executive_summary: str = Field(
description="One-paragraph summary for quick reading", min_length=100, max_length=1000
)
research_question: str = Field(description="Clear statement of what was investigated")
methodology: ReportSection = Field(description="How the research was conducted")
hypotheses_tested: list[dict[str, Any]] = Field(
description="Hypotheses with supporting/contradicting evidence counts"
)
mechanistic_findings: ReportSection = Field(description="Findings about drug mechanisms")
clinical_findings: ReportSection = Field(
description="Findings from clinical/preclinical studies"
)
drug_candidates: list[str] = Field(description="Identified drug candidates")
limitations: list[str] = Field(description="Study limitations")
conclusion: str = Field(description="Overall conclusion")
references: list[dict[str, str]] = Field(
description="Formatted references with title, authors, source, URL"
)
# Metadata
sources_searched: list[str] = Field(default_factory=list)
total_papers_reviewed: int = 0
search_iterations: int = 0
confidence_score: float = Field(ge=0, le=1)
def to_markdown(self) -> str:
"""Render report as markdown."""
sections = [
f"# {self.title}\n",
f"## Executive Summary\n{self.executive_summary}\n",
f"## Research Question\n{self.research_question}\n",
f"## Methodology\n{self.methodology.content}\n",
]
# Hypotheses
sections.append("## Hypotheses Tested\n")
if not self.hypotheses_tested:
sections.append("*No hypotheses tested yet.*\n")
for h in self.hypotheses_tested:
supported = h.get("supported", 0)
contradicted = h.get("contradicted", 0)
if supported == 0 and contradicted == 0:
status = "❓ Untested"
elif supported > contradicted:
status = "✅ Supported"
else:
status = "⚠️ Mixed"
sections.append(
f"- **{h.get('mechanism', 'Unknown')}** ({status}): "
f"{supported} supporting, {contradicted} contradicting\n"
)
# Findings
sections.append(f"## Mechanistic Findings\n{self.mechanistic_findings.content}\n")
sections.append(f"## Clinical Findings\n{self.clinical_findings.content}\n")
# Drug candidates
sections.append("## Drug Candidates\n")
if self.drug_candidates:
for drug in self.drug_candidates:
sections.append(f"- **{drug}**\n")
else:
sections.append("*No drug candidates identified.*\n")
# Limitations
sections.append("## Limitations\n")
if self.limitations:
for lim in self.limitations:
sections.append(f"- {lim}\n")
else:
sections.append("*No limitations documented.*\n")
# Conclusion
sections.append(f"## Conclusion\n{self.conclusion}\n")
# References
sections.append("## References\n")
if self.references:
for i, ref in enumerate(self.references, 1):
sections.append(
f"{i}. {ref.get('authors', 'Unknown')}. "
f"*{ref.get('title', 'Untitled')}*. "
f"{ref.get('source', '')} ({ref.get('date', '')}). "
f"[Link]({ref.get('url', '#')})\n"
)
else:
sections.append("*No references available.*\n")
# Metadata footer
sections.append("\n---\n")
sections.append(
f"*Report generated from {self.total_papers_reviewed} papers "
f"across {self.search_iterations} search iterations. "
f"Confidence: {self.confidence_score:.0%}*"
)
return "\n".join(sections)
class OrchestratorConfig(BaseModel):
"""Configuration for the orchestrator."""
max_iterations: int = Field(default=5, ge=1, le=10)
max_results_per_tool: int = Field(default=10, ge=1, le=50)
search_timeout: float = Field(default=30.0, ge=5.0, le=120.0)