Spaces:
Running
Running
Commit
·
32e3b61
1
Parent(s):
572b569
fix(phase2): address CodeRabbit review feedback + add examples
Browse filesCodeRabbit fixes:
- Fix deprecated asyncio.get_event_loop() → get_running_loop()
- Remove dead flatten() function from search_handler.py
- Remove redundant getattr() in pubmed.py
- Add truncation rationale comments
New:
- Add examples/search_demo/ with run_search.py demo script
- Add examples/README.md
- examples/README.md +23 -0
- examples/search_demo/run_search.py +65 -0
- src/tools/pubmed.py +8 -4
- src/tools/search_handler.py +0 -5
- src/tools/websearch.py +4 -1
examples/README.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Examples
|
| 2 |
+
|
| 3 |
+
Demo scripts for DeepCritical functionality.
|
| 4 |
+
|
| 5 |
+
## search_demo
|
| 6 |
+
|
| 7 |
+
Demonstrates Phase 2 search functionality:
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
# Run with default query (metformin + Alzheimer's)
|
| 11 |
+
uv run python examples/search_demo/run_search.py
|
| 12 |
+
|
| 13 |
+
# Run with custom query
|
| 14 |
+
uv run python examples/search_demo/run_search.py "aspirin heart disease"
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
**What it does:**
|
| 18 |
+
- Searches PubMed (biomedical literature)
|
| 19 |
+
- Searches DuckDuckGo (web)
|
| 20 |
+
- Runs both in parallel (scatter-gather)
|
| 21 |
+
- Returns evidence with citations
|
| 22 |
+
|
| 23 |
+
**Optional:** Set `NCBI_API_KEY` in `.env` for higher PubMed rate limits.
|
examples/search_demo/run_search.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Demo: Search for drug repurposing evidence.
|
| 4 |
+
|
| 5 |
+
This script demonstrates Phase 2 functionality:
|
| 6 |
+
- PubMed search (biomedical literature)
|
| 7 |
+
- Web search (DuckDuckGo)
|
| 8 |
+
- SearchHandler (parallel scatter-gather orchestration)
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
# From project root:
|
| 12 |
+
uv run python examples/search_demo/run_search.py
|
| 13 |
+
|
| 14 |
+
# With custom query:
|
| 15 |
+
uv run python examples/search_demo/run_search.py "metformin cancer"
|
| 16 |
+
|
| 17 |
+
Requirements:
|
| 18 |
+
- Optional: NCBI_API_KEY in .env for higher PubMed rate limits
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import asyncio
|
| 22 |
+
import sys
|
| 23 |
+
|
| 24 |
+
from src.tools.pubmed import PubMedTool
|
| 25 |
+
from src.tools.search_handler import SearchHandler
|
| 26 |
+
from src.tools.websearch import WebTool
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
async def main(query: str) -> None:
|
| 30 |
+
"""Run search demo with the given query."""
|
| 31 |
+
print(f"\n{'='*60}")
|
| 32 |
+
print("DeepCritical Search Demo")
|
| 33 |
+
print(f"Query: {query}")
|
| 34 |
+
print(f"{'='*60}\n")
|
| 35 |
+
|
| 36 |
+
# Initialize tools
|
| 37 |
+
pubmed = PubMedTool()
|
| 38 |
+
web = WebTool()
|
| 39 |
+
handler = SearchHandler(tools=[pubmed, web], timeout=30.0)
|
| 40 |
+
|
| 41 |
+
# Execute search
|
| 42 |
+
print("Searching PubMed and Web in parallel...")
|
| 43 |
+
result = await handler.execute(query, max_results_per_tool=5)
|
| 44 |
+
|
| 45 |
+
# Display results
|
| 46 |
+
print(f"\n{'='*60}")
|
| 47 |
+
print(f"Results: {result.total_found} pieces of evidence")
|
| 48 |
+
print(f"Sources: {', '.join(result.sources_searched)}")
|
| 49 |
+
if result.errors:
|
| 50 |
+
print(f"Errors: {result.errors}")
|
| 51 |
+
print(f"{'='*60}\n")
|
| 52 |
+
|
| 53 |
+
for i, evidence in enumerate(result.evidence, 1):
|
| 54 |
+
print(f"[{i}] {evidence.citation.source.upper()}: {evidence.citation.title[:80]}...")
|
| 55 |
+
print(f" URL: {evidence.citation.url}")
|
| 56 |
+
print(f" Content: {evidence.content[:150]}...")
|
| 57 |
+
print()
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
# Default query or use command line arg
|
| 62 |
+
default_query = "metformin Alzheimer's disease drug repurposing"
|
| 63 |
+
query = sys.argv[1] if len(sys.argv) > 1 else default_query
|
| 64 |
+
|
| 65 |
+
asyncio.run(main(query))
|
src/tools/pubmed.py
CHANGED
|
@@ -20,7 +20,7 @@ class PubMedTool:
|
|
| 20 |
HTTP_TOO_MANY_REQUESTS = 429
|
| 21 |
|
| 22 |
def __init__(self, api_key: str | None = None) -> None:
|
| 23 |
-
self.api_key = api_key or
|
| 24 |
self._last_request_time = 0.0
|
| 25 |
|
| 26 |
@property
|
|
@@ -29,11 +29,12 @@ class PubMedTool:
|
|
| 29 |
|
| 30 |
async def _rate_limit(self) -> None:
|
| 31 |
"""Enforce NCBI rate limiting."""
|
| 32 |
-
|
|
|
|
| 33 |
elapsed = now - self._last_request_time
|
| 34 |
if elapsed < self.RATE_LIMIT_DELAY:
|
| 35 |
await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
|
| 36 |
-
self._last_request_time =
|
| 37 |
|
| 38 |
def _build_params(self, **kwargs: Any) -> dict[str, Any]:
|
| 39 |
"""Build request params with optional API key."""
|
|
@@ -174,8 +175,11 @@ class PubMedTool:
|
|
| 174 |
if last:
|
| 175 |
authors.append(f"{last} {first}".strip())
|
| 176 |
|
|
|
|
|
|
|
|
|
|
| 177 |
return Evidence(
|
| 178 |
-
content=abstract[:2000],
|
| 179 |
citation=Citation(
|
| 180 |
source="pubmed",
|
| 181 |
title=title[:500],
|
|
|
|
| 20 |
HTTP_TOO_MANY_REQUESTS = 429
|
| 21 |
|
| 22 |
def __init__(self, api_key: str | None = None) -> None:
|
| 23 |
+
self.api_key = api_key or settings.ncbi_api_key
|
| 24 |
self._last_request_time = 0.0
|
| 25 |
|
| 26 |
@property
|
|
|
|
| 29 |
|
| 30 |
async def _rate_limit(self) -> None:
|
| 31 |
"""Enforce NCBI rate limiting."""
|
| 32 |
+
loop = asyncio.get_running_loop()
|
| 33 |
+
now = loop.time()
|
| 34 |
elapsed = now - self._last_request_time
|
| 35 |
if elapsed < self.RATE_LIMIT_DELAY:
|
| 36 |
await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
|
| 37 |
+
self._last_request_time = loop.time()
|
| 38 |
|
| 39 |
def _build_params(self, **kwargs: Any) -> dict[str, Any]:
|
| 40 |
"""Build request params with optional API key."""
|
|
|
|
| 175 |
if last:
|
| 176 |
authors.append(f"{last} {first}".strip())
|
| 177 |
|
| 178 |
+
# Truncation rationale: LLM context limits + cost optimization
|
| 179 |
+
# - Abstract: 2000 chars (~500 tokens) captures key findings
|
| 180 |
+
# - Title: 500 chars covers even verbose journal titles
|
| 181 |
return Evidence(
|
| 182 |
+
content=abstract[:2000],
|
| 183 |
citation=Citation(
|
| 184 |
source="pubmed",
|
| 185 |
title=title[:500],
|
src/tools/search_handler.py
CHANGED
|
@@ -12,11 +12,6 @@ from src.utils.models import Evidence, SearchResult
|
|
| 12 |
logger = structlog.get_logger()
|
| 13 |
|
| 14 |
|
| 15 |
-
def flatten(nested: list[list[Evidence]]) -> list[Evidence]:
|
| 16 |
-
"""Flatten a list of lists into a single list."""
|
| 17 |
-
return [item for sublist in nested for item in sublist]
|
| 18 |
-
|
| 19 |
-
|
| 20 |
class SearchHandler:
|
| 21 |
"""Orchestrates parallel searches across multiple tools."""
|
| 22 |
|
|
|
|
| 12 |
logger = structlog.get_logger()
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
class SearchHandler:
|
| 16 |
"""Orchestrates parallel searches across multiple tools."""
|
| 17 |
|
src/tools/websearch.py
CHANGED
|
@@ -25,7 +25,7 @@ class WebTool:
|
|
| 25 |
|
| 26 |
Note: duckduckgo-search is synchronous, so we run it in executor.
|
| 27 |
"""
|
| 28 |
-
loop = asyncio.
|
| 29 |
try:
|
| 30 |
results = await loop.run_in_executor(
|
| 31 |
None,
|
|
@@ -42,6 +42,9 @@ class WebTool:
|
|
| 42 |
with DDGS() as ddgs:
|
| 43 |
results: list[dict[str, Any]] = list(ddgs.text(query, max_results=max_results))
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
for result in results:
|
| 46 |
evidence_list.append(
|
| 47 |
Evidence(
|
|
|
|
| 25 |
|
| 26 |
Note: duckduckgo-search is synchronous, so we run it in executor.
|
| 27 |
"""
|
| 28 |
+
loop = asyncio.get_running_loop()
|
| 29 |
try:
|
| 30 |
results = await loop.run_in_executor(
|
| 31 |
None,
|
|
|
|
| 42 |
with DDGS() as ddgs:
|
| 43 |
results: list[dict[str, Any]] = list(ddgs.text(query, max_results=max_results))
|
| 44 |
|
| 45 |
+
# Truncation rationale: LLM context limits + cost optimization
|
| 46 |
+
# - Content: 1000 chars (~250 tokens) - web snippets are shorter than abstracts
|
| 47 |
+
# - Title: 500 chars covers most web page titles
|
| 48 |
for result in results:
|
| 49 |
evidence_list.append(
|
| 50 |
Evidence(
|