VibecoderMcSwaggins commited on
Commit
32e3b61
·
1 Parent(s): 572b569

fix(phase2): address CodeRabbit review feedback + add examples

Browse files

CodeRabbit fixes:
- Fix deprecated asyncio.get_event_loop() → get_running_loop()
- Remove dead flatten() function from search_handler.py
- Remove redundant getattr() in pubmed.py
- Add truncation rationale comments

New:
- Add examples/search_demo/ with run_search.py demo script
- Add examples/README.md

examples/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Examples
2
+
3
+ Demo scripts for DeepCritical functionality.
4
+
5
+ ## search_demo
6
+
7
+ Demonstrates Phase 2 search functionality:
8
+
9
+ ```bash
10
+ # Run with default query (metformin + Alzheimer's)
11
+ uv run python examples/search_demo/run_search.py
12
+
13
+ # Run with custom query
14
+ uv run python examples/search_demo/run_search.py "aspirin heart disease"
15
+ ```
16
+
17
+ **What it does:**
18
+ - Searches PubMed (biomedical literature)
19
+ - Searches DuckDuckGo (web)
20
+ - Runs both in parallel (scatter-gather)
21
+ - Returns evidence with citations
22
+
23
+ **Optional:** Set `NCBI_API_KEY` in `.env` for higher PubMed rate limits.
examples/search_demo/run_search.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demo: Search for drug repurposing evidence.
4
+
5
+ This script demonstrates Phase 2 functionality:
6
+ - PubMed search (biomedical literature)
7
+ - Web search (DuckDuckGo)
8
+ - SearchHandler (parallel scatter-gather orchestration)
9
+
10
+ Usage:
11
+ # From project root:
12
+ uv run python examples/search_demo/run_search.py
13
+
14
+ # With custom query:
15
+ uv run python examples/search_demo/run_search.py "metformin cancer"
16
+
17
+ Requirements:
18
+ - Optional: NCBI_API_KEY in .env for higher PubMed rate limits
19
+ """
20
+
21
+ import asyncio
22
+ import sys
23
+
24
+ from src.tools.pubmed import PubMedTool
25
+ from src.tools.search_handler import SearchHandler
26
+ from src.tools.websearch import WebTool
27
+
28
+
29
+ async def main(query: str) -> None:
30
+ """Run search demo with the given query."""
31
+ print(f"\n{'='*60}")
32
+ print("DeepCritical Search Demo")
33
+ print(f"Query: {query}")
34
+ print(f"{'='*60}\n")
35
+
36
+ # Initialize tools
37
+ pubmed = PubMedTool()
38
+ web = WebTool()
39
+ handler = SearchHandler(tools=[pubmed, web], timeout=30.0)
40
+
41
+ # Execute search
42
+ print("Searching PubMed and Web in parallel...")
43
+ result = await handler.execute(query, max_results_per_tool=5)
44
+
45
+ # Display results
46
+ print(f"\n{'='*60}")
47
+ print(f"Results: {result.total_found} pieces of evidence")
48
+ print(f"Sources: {', '.join(result.sources_searched)}")
49
+ if result.errors:
50
+ print(f"Errors: {result.errors}")
51
+ print(f"{'='*60}\n")
52
+
53
+ for i, evidence in enumerate(result.evidence, 1):
54
+ print(f"[{i}] {evidence.citation.source.upper()}: {evidence.citation.title[:80]}...")
55
+ print(f" URL: {evidence.citation.url}")
56
+ print(f" Content: {evidence.content[:150]}...")
57
+ print()
58
+
59
+
60
+ if __name__ == "__main__":
61
+ # Default query or use command line arg
62
+ default_query = "metformin Alzheimer's disease drug repurposing"
63
+ query = sys.argv[1] if len(sys.argv) > 1 else default_query
64
+
65
+ asyncio.run(main(query))
src/tools/pubmed.py CHANGED
@@ -20,7 +20,7 @@ class PubMedTool:
20
  HTTP_TOO_MANY_REQUESTS = 429
21
 
22
  def __init__(self, api_key: str | None = None) -> None:
23
- self.api_key = api_key or getattr(settings, "ncbi_api_key", None)
24
  self._last_request_time = 0.0
25
 
26
  @property
@@ -29,11 +29,12 @@ class PubMedTool:
29
 
30
  async def _rate_limit(self) -> None:
31
  """Enforce NCBI rate limiting."""
32
- now = asyncio.get_event_loop().time()
 
33
  elapsed = now - self._last_request_time
34
  if elapsed < self.RATE_LIMIT_DELAY:
35
  await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
36
- self._last_request_time = asyncio.get_event_loop().time()
37
 
38
  def _build_params(self, **kwargs: Any) -> dict[str, Any]:
39
  """Build request params with optional API key."""
@@ -174,8 +175,11 @@ class PubMedTool:
174
  if last:
175
  authors.append(f"{last} {first}".strip())
176
 
 
 
 
177
  return Evidence(
178
- content=abstract[:2000], # Truncate long abstracts
179
  citation=Citation(
180
  source="pubmed",
181
  title=title[:500],
 
20
  HTTP_TOO_MANY_REQUESTS = 429
21
 
22
  def __init__(self, api_key: str | None = None) -> None:
23
+ self.api_key = api_key or settings.ncbi_api_key
24
  self._last_request_time = 0.0
25
 
26
  @property
 
29
 
30
  async def _rate_limit(self) -> None:
31
  """Enforce NCBI rate limiting."""
32
+ loop = asyncio.get_running_loop()
33
+ now = loop.time()
34
  elapsed = now - self._last_request_time
35
  if elapsed < self.RATE_LIMIT_DELAY:
36
  await asyncio.sleep(self.RATE_LIMIT_DELAY - elapsed)
37
+ self._last_request_time = loop.time()
38
 
39
  def _build_params(self, **kwargs: Any) -> dict[str, Any]:
40
  """Build request params with optional API key."""
 
175
  if last:
176
  authors.append(f"{last} {first}".strip())
177
 
178
+ # Truncation rationale: LLM context limits + cost optimization
179
+ # - Abstract: 2000 chars (~500 tokens) captures key findings
180
+ # - Title: 500 chars covers even verbose journal titles
181
  return Evidence(
182
+ content=abstract[:2000],
183
  citation=Citation(
184
  source="pubmed",
185
  title=title[:500],
src/tools/search_handler.py CHANGED
@@ -12,11 +12,6 @@ from src.utils.models import Evidence, SearchResult
12
  logger = structlog.get_logger()
13
 
14
 
15
- def flatten(nested: list[list[Evidence]]) -> list[Evidence]:
16
- """Flatten a list of lists into a single list."""
17
- return [item for sublist in nested for item in sublist]
18
-
19
-
20
  class SearchHandler:
21
  """Orchestrates parallel searches across multiple tools."""
22
 
 
12
  logger = structlog.get_logger()
13
 
14
 
 
 
 
 
 
15
  class SearchHandler:
16
  """Orchestrates parallel searches across multiple tools."""
17
 
src/tools/websearch.py CHANGED
@@ -25,7 +25,7 @@ class WebTool:
25
 
26
  Note: duckduckgo-search is synchronous, so we run it in executor.
27
  """
28
- loop = asyncio.get_event_loop()
29
  try:
30
  results = await loop.run_in_executor(
31
  None,
@@ -42,6 +42,9 @@ class WebTool:
42
  with DDGS() as ddgs:
43
  results: list[dict[str, Any]] = list(ddgs.text(query, max_results=max_results))
44
 
 
 
 
45
  for result in results:
46
  evidence_list.append(
47
  Evidence(
 
25
 
26
  Note: duckduckgo-search is synchronous, so we run it in executor.
27
  """
28
+ loop = asyncio.get_running_loop()
29
  try:
30
  results = await loop.run_in_executor(
31
  None,
 
42
  with DDGS() as ddgs:
43
  results: list[dict[str, Any]] = list(ddgs.text(query, max_results=max_results))
44
 
45
+ # Truncation rationale: LLM context limits + cost optimization
46
+ # - Content: 1000 chars (~250 tokens) - web snippets are shorter than abstracts
47
+ # - Title: 500 chars covers most web page titles
48
  for result in results:
49
  evidence_list.append(
50
  Evidence(