Spaces:
Running
Running
| """Data models for the Search feature.""" | |
| from datetime import UTC, datetime | |
| from typing import Any, ClassVar, Literal | |
| from pydantic import BaseModel, Field | |
| # Centralized source type - add new sources here (e.g., "biorxiv" in Phase 11) | |
| SourceName = Literal["pubmed", "clinicaltrials", "biorxiv"] | |
| class Citation(BaseModel): | |
| """A citation to a source document.""" | |
| source: SourceName = Field(description="Where this came from") | |
| title: str = Field(min_length=1, max_length=500) | |
| url: str = Field(description="URL to the source") | |
| date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')") | |
| authors: list[str] = Field(default_factory=list) | |
| MAX_AUTHORS_IN_CITATION: ClassVar[int] = 3 | |
| def formatted(self) -> str: | |
| """Format as a citation string.""" | |
| author_str = ", ".join(self.authors[: self.MAX_AUTHORS_IN_CITATION]) | |
| if len(self.authors) > self.MAX_AUTHORS_IN_CITATION: | |
| author_str += " et al." | |
| return f"{author_str} ({self.date}). {self.title}. {self.source.upper()}" | |
| class Evidence(BaseModel): | |
| """A piece of evidence retrieved from search.""" | |
| content: str = Field(min_length=1, description="The actual text content") | |
| citation: Citation | |
| relevance: float = Field(default=0.0, ge=0.0, le=1.0, description="Relevance score 0-1") | |
| model_config = {"frozen": True} | |
| class SearchResult(BaseModel): | |
| """Result of a search operation.""" | |
| query: str | |
| evidence: list[Evidence] | |
| sources_searched: list[SourceName] | |
| total_found: int | |
| errors: list[str] = Field(default_factory=list) | |
| class AssessmentDetails(BaseModel): | |
| """Detailed assessment of evidence quality.""" | |
| mechanism_score: int = Field( | |
| ..., | |
| ge=0, | |
| le=10, | |
| description="How well does the evidence explain the mechanism? 0-10", | |
| ) | |
| mechanism_reasoning: str = Field( | |
| ..., min_length=10, description="Explanation of mechanism score" | |
| ) | |
| clinical_evidence_score: int = Field( | |
| ..., | |
| ge=0, | |
| le=10, | |
| description="Strength of clinical/preclinical evidence. 0-10", | |
| ) | |
| clinical_reasoning: str = Field( | |
| ..., min_length=10, description="Explanation of clinical evidence score" | |
| ) | |
| drug_candidates: list[str] = Field( | |
| default_factory=list, description="List of specific drug candidates mentioned" | |
| ) | |
| key_findings: list[str] = Field( | |
| default_factory=list, description="Key findings from the evidence" | |
| ) | |
| class JudgeAssessment(BaseModel): | |
| """Complete assessment from the Judge.""" | |
| details: AssessmentDetails | |
| sufficient: bool = Field(..., description="Is evidence sufficient to provide a recommendation?") | |
| confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence in the assessment (0-1)") | |
| recommendation: Literal["continue", "synthesize"] = Field( | |
| ..., | |
| description="continue = need more evidence, synthesize = ready to answer", | |
| ) | |
| next_search_queries: list[str] = Field( | |
| default_factory=list, description="If continue, what queries to search next" | |
| ) | |
| reasoning: str = Field( | |
| ..., min_length=20, description="Overall reasoning for the recommendation" | |
| ) | |
| class AgentEvent(BaseModel): | |
| """Event emitted by the orchestrator for UI streaming.""" | |
| type: Literal[ | |
| "started", | |
| "searching", | |
| "search_complete", | |
| "judging", | |
| "judge_complete", | |
| "looping", | |
| "synthesizing", | |
| "complete", | |
| "error", | |
| "streaming", | |
| "hypothesizing", | |
| "analyzing", # NEW for Phase 13 | |
| "analysis_complete", # NEW for Phase 13 | |
| ] | |
| message: str | |
| data: Any = None | |
| timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC)) | |
| iteration: int = 0 | |
| def to_markdown(self) -> str: | |
| """Format event as markdown for chat display.""" | |
| icons = { | |
| "started": "🚀", | |
| "searching": "🔍", | |
| "search_complete": "📚", | |
| "judging": "🧠", | |
| "judge_complete": "✅", | |
| "looping": "🔄", | |
| "synthesizing": "📝", | |
| "complete": "🎉", | |
| "error": "❌", | |
| "streaming": "📡", | |
| "hypothesizing": "🔬", # NEW | |
| "analyzing": "📊", # NEW | |
| "analysis_complete": "📈", # NEW | |
| } | |
| icon = icons.get(self.type, "•") | |
| return f"{icon} **{self.type.upper()}**: {self.message}" | |
| class MechanismHypothesis(BaseModel): | |
| """A scientific hypothesis about drug mechanism.""" | |
| drug: str = Field(description="The drug being studied") | |
| target: str = Field(description="Molecular target (e.g., AMPK, mTOR)") | |
| pathway: str = Field(description="Biological pathway affected") | |
| effect: str = Field(description="Downstream effect on disease") | |
| confidence: float = Field(ge=0, le=1, description="Confidence in hypothesis") | |
| supporting_evidence: list[str] = Field( | |
| default_factory=list, description="PMIDs or URLs supporting this hypothesis" | |
| ) | |
| contradicting_evidence: list[str] = Field( | |
| default_factory=list, description="PMIDs or URLs contradicting this hypothesis" | |
| ) | |
| search_suggestions: list[str] = Field( | |
| default_factory=list, description="Suggested searches to test this hypothesis" | |
| ) | |
| def to_search_queries(self) -> list[str]: | |
| """Generate search queries to test this hypothesis.""" | |
| return [ | |
| f"{self.drug} {self.target}", | |
| f"{self.target} {self.pathway}", | |
| f"{self.pathway} {self.effect}", | |
| *self.search_suggestions, | |
| ] | |
| class HypothesisAssessment(BaseModel): | |
| """Assessment of evidence against hypotheses.""" | |
| hypotheses: list[MechanismHypothesis] | |
| primary_hypothesis: MechanismHypothesis | None = Field( | |
| default=None, description="Most promising hypothesis based on current evidence" | |
| ) | |
| knowledge_gaps: list[str] = Field(description="What we don't know yet") | |
| recommended_searches: list[str] = Field(description="Searches to fill knowledge gaps") | |
| class ReportSection(BaseModel): | |
| """A section of the research report.""" | |
| title: str | |
| content: str | |
| # Reserved for future inline citation tracking within sections | |
| citations: list[str] = Field(default_factory=list) | |
| class ResearchReport(BaseModel): | |
| """Structured scientific report.""" | |
| title: str = Field(description="Report title") | |
| executive_summary: str = Field( | |
| description="One-paragraph summary for quick reading", min_length=100, max_length=1000 | |
| ) | |
| research_question: str = Field(description="Clear statement of what was investigated") | |
| methodology: ReportSection = Field(description="How the research was conducted") | |
| hypotheses_tested: list[dict[str, Any]] = Field( | |
| description="Hypotheses with supporting/contradicting evidence counts" | |
| ) | |
| mechanistic_findings: ReportSection = Field(description="Findings about drug mechanisms") | |
| clinical_findings: ReportSection = Field( | |
| description="Findings from clinical/preclinical studies" | |
| ) | |
| drug_candidates: list[str] = Field(description="Identified drug candidates") | |
| limitations: list[str] = Field(description="Study limitations") | |
| conclusion: str = Field(description="Overall conclusion") | |
| references: list[dict[str, str]] = Field( | |
| description="Formatted references with title, authors, source, URL" | |
| ) | |
| # Metadata | |
| sources_searched: list[str] = Field(default_factory=list) | |
| total_papers_reviewed: int = 0 | |
| search_iterations: int = 0 | |
| confidence_score: float = Field(ge=0, le=1) | |
| def to_markdown(self) -> str: | |
| """Render report as markdown.""" | |
| sections = [ | |
| f"# {self.title}\n", | |
| f"## Executive Summary\n{self.executive_summary}\n", | |
| f"## Research Question\n{self.research_question}\n", | |
| f"## Methodology\n{self.methodology.content}\n", | |
| ] | |
| # Hypotheses | |
| sections.append("## Hypotheses Tested\n") | |
| if not self.hypotheses_tested: | |
| sections.append("*No hypotheses tested yet.*\n") | |
| for h in self.hypotheses_tested: | |
| supported = h.get("supported", 0) | |
| contradicted = h.get("contradicted", 0) | |
| if supported == 0 and contradicted == 0: | |
| status = "❓ Untested" | |
| elif supported > contradicted: | |
| status = "✅ Supported" | |
| else: | |
| status = "⚠️ Mixed" | |
| sections.append( | |
| f"- **{h.get('mechanism', 'Unknown')}** ({status}): " | |
| f"{supported} supporting, {contradicted} contradicting\n" | |
| ) | |
| # Findings | |
| sections.append(f"## Mechanistic Findings\n{self.mechanistic_findings.content}\n") | |
| sections.append(f"## Clinical Findings\n{self.clinical_findings.content}\n") | |
| # Drug candidates | |
| sections.append("## Drug Candidates\n") | |
| if self.drug_candidates: | |
| for drug in self.drug_candidates: | |
| sections.append(f"- **{drug}**\n") | |
| else: | |
| sections.append("*No drug candidates identified.*\n") | |
| # Limitations | |
| sections.append("## Limitations\n") | |
| if self.limitations: | |
| for lim in self.limitations: | |
| sections.append(f"- {lim}\n") | |
| else: | |
| sections.append("*No limitations documented.*\n") | |
| # Conclusion | |
| sections.append(f"## Conclusion\n{self.conclusion}\n") | |
| # References | |
| sections.append("## References\n") | |
| if self.references: | |
| for i, ref in enumerate(self.references, 1): | |
| sections.append( | |
| f"{i}. {ref.get('authors', 'Unknown')}. " | |
| f"*{ref.get('title', 'Untitled')}*. " | |
| f"{ref.get('source', '')} ({ref.get('date', '')}). " | |
| f"[Link]({ref.get('url', '#')})\n" | |
| ) | |
| else: | |
| sections.append("*No references available.*\n") | |
| # Metadata footer | |
| sections.append("\n---\n") | |
| sections.append( | |
| f"*Report generated from {self.total_papers_reviewed} papers " | |
| f"across {self.search_iterations} search iterations. " | |
| f"Confidence: {self.confidence_score:.0%}*" | |
| ) | |
| return "\n".join(sections) | |
| class OrchestratorConfig(BaseModel): | |
| """Configuration for the orchestrator.""" | |
| max_iterations: int = Field(default=5, ge=1, le=10) | |
| max_results_per_tool: int = Field(default=10, ge=1, le=50) | |
| search_timeout: float = Field(default=30.0, ge=5.0, le=120.0) | |