This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .env.example +97 -80
  2. .github/README.md +65 -0
  3. .github/workflows/ci.yml +80 -0
  4. .github/workflows/docs.yml +61 -0
  5. .gitignore +5 -3
  6. .pre-commit-config.yaml +54 -0
  7. =0.22.0 +0 -0
  8. =0.22.0, +0 -0
  9. Makefile +51 -0
  10. README.md +86 -26
  11. .cursorrules → dev/.cursorrules +1 -0
  12. dev/AGENTS.txt +236 -0
  13. dev/Makefile +51 -0
  14. dev/docs_plugins.py +74 -0
  15. docs/api/agents.md +266 -0
  16. docs/api/models.md +244 -0
  17. docs/api/orchestrators.md +191 -0
  18. docs/api/services.md +197 -0
  19. docs/api/tools.md +231 -0
  20. docs/architecture/agents.md +188 -0
  21. docs/architecture/graph-orchestration.md +152 -0
  22. docs/architecture/graph_orchestration.md +159 -0
  23. docs/architecture/middleware.md +138 -0
  24. docs/architecture/orchestrators.md +198 -0
  25. docs/architecture/services.md +138 -0
  26. docs/architecture/tools.md +171 -0
  27. docs/architecture/workflow-diagrams.md +670 -0
  28. docs/architecture/workflows.md +662 -0
  29. docs/configuration/CONFIGURATION.md +743 -0
  30. docs/configuration/index.md +746 -0
  31. CONTRIBUTING.md → docs/contributing.md +66 -132
  32. docs/contributing/code-quality.md +77 -0
  33. docs/contributing/code-style.md +57 -0
  34. docs/contributing/error-handling.md +65 -0
  35. docs/contributing/implementation-patterns.md +80 -0
  36. docs/contributing/index.md +159 -0
  37. docs/contributing/prompt-engineering.md +65 -0
  38. docs/contributing/testing.md +61 -0
  39. docs/getting-started/examples.md +205 -0
  40. docs/getting-started/installation.md +144 -0
  41. docs/getting-started/mcp-integration.md +211 -0
  42. docs/getting-started/quick-start.md +115 -0
  43. docs/index.md +63 -0
  44. LICENSE.md → docs/license.md +10 -0
  45. docs/overview/architecture.md +192 -0
  46. docs/overview/features.md +144 -0
  47. docs/overview/quick-start.md +82 -0
  48. docs/team.md +40 -0
  49. examples/README.md +184 -0
  50. examples/embeddings_demo/run_embeddings.py +104 -0
.env.example CHANGED
@@ -1,83 +1,63 @@
1
- # HuggingFace
2
- HF_TOKEN=your_huggingface_token_here
3
 
4
- # OpenAI (optional)
5
- OPENAI_API_KEY=your_openai_key_here
6
 
7
- # Anthropic (optional)
8
- ANTHROPIC_API_KEY=your_anthropic_key_here
 
9
 
10
  # Model names (optional - sensible defaults set in config.py)
11
- # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
12
  # OPENAI_MODEL=gpt-5.1
 
13
 
 
14
 
15
- # ============================================
16
- # Audio Processing Configuration (TTS)
17
- # ============================================
18
- # Kokoro TTS Model Configuration
19
- TTS_MODEL=hexgrad/Kokoro-82M
20
- TTS_VOICE=af_heart
21
- TTS_SPEED=1.0
22
- TTS_GPU=T4
23
- TTS_TIMEOUT=60
24
-
25
- # Available TTS Voices:
26
- # American English Female: af_heart, af_bella, af_nicole, af_aoede, af_kore, af_sarah, af_nova, af_sky, af_alloy, af_jessica, af_river
27
- # American English Male: am_michael, am_fenrir, am_puck, am_echo, am_eric, am_liam, am_onyx, am_santa, am_adam
28
-
29
- # Available GPU Types (Modal):
30
- # T4 - Cheapest, good for testing (default)
31
- # A10 - Good balance of cost/performance
32
- # A100 - Fastest, most expensive
33
- # L4 - NVIDIA L4 GPU
34
- # L40S - NVIDIA L40S GPU
35
- # Note: GPU type is set at function definition time. Changes require app restart.
36
-
37
- # ============================================
38
- # Audio Processing Configuration (STT)
39
- # ============================================
40
- # Speech-to-Text API Configuration
41
- STT_API_URL=nvidia/canary-1b-v2
42
- STT_SOURCE_LANG=English
43
- STT_TARGET_LANG=English
44
-
45
- # Available STT Languages:
46
- # English, Bulgarian, Croatian, Czech, Danish, Dutch, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Slovak, Slovenian, Spanish, Swedish, Russian, Ukrainian
47
-
48
- # ============================================
49
- # Audio Feature Flags
50
- # ============================================
51
- ENABLE_AUDIO_INPUT=true
52
- ENABLE_AUDIO_OUTPUT=true
53
-
54
- # ============================================
55
- # Image OCR Configuration
56
- # ============================================
57
- OCR_API_URL=prithivMLmods/Multimodal-OCR3
58
- ENABLE_IMAGE_INPUT=true
59
-
60
- # ============== EMBEDDINGS ==============
61
-
62
- # OpenAI Embedding Model (used if LLM_PROVIDER is openai and performing RAG/Embeddings)
63
- OPENAI_EMBEDDING_MODEL=text-embedding-3-small
64
-
65
- # Local Embedding Model (used for local/offline embeddings)
66
- LOCAL_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
67
-
68
- # ============== HUGGINGFACE (FREE TIER) ==============
69
-
70
- # HuggingFace Token - enables Llama 3.1 (best quality free model)
71
  # Get yours at: https://huggingface.co/settings/tokens
72
- #
73
- # WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta)
74
- # WITH HF_TOKEN: Uses Llama 3.1 8B Instruct (requires accepting license)
75
  #
76
  # For HuggingFace Spaces deployment:
77
  # Set this as a "Secret" in Space Settings -> Variables and secrets
78
  # Users/judges don't need their own token - the Space secret is used
79
  #
80
  HF_TOKEN=hf_your-token-here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # ============== AGENT CONFIGURATION ==============
83
 
@@ -85,23 +65,60 @@ MAX_ITERATIONS=10
85
  SEARCH_TIMEOUT=30
86
  LOG_LEVEL=INFO
87
 
88
- # ============================================
89
- # Modal Configuration (Required for TTS)
90
- # ============================================
91
- # Modal credentials are required for TTS (Text-to-Speech) functionality
92
- # Get your credentials from: https://modal.com/
93
- MODAL_TOKEN_ID=your_modal_token_id_here
94
- MODAL_TOKEN_SECRET=your_modal_token_secret_here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # ============== EXTERNAL SERVICES ==============
97
 
98
- # PubMed (optional - higher rate limits)
99
  NCBI_API_KEY=your-ncbi-key-here
100
 
101
- # Vector Database (optional - for LlamaIndex RAG)
 
 
 
 
 
 
102
  CHROMA_DB_PATH=./chroma_db
103
- # Neo4j Knowledge Graph
104
- NEO4J_URI=bolt://localhost:7687
105
- NEO4J_USER=neo4j
106
- NEO4J_PASSWORD=your_neo4j_password_here
107
- NEO4J_DATABASE=your_database_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============== LLM CONFIGURATION ==============
 
2
 
3
+ # Provider: "openai", "anthropic", or "huggingface"
4
+ LLM_PROVIDER=openai
5
 
6
+ # API Keys (at least one required for full LLM analysis)
7
+ OPENAI_API_KEY=sk-your-key-here
8
+ ANTHROPIC_API_KEY=sk-ant-your-key-here
9
 
10
  # Model names (optional - sensible defaults set in config.py)
 
11
  # OPENAI_MODEL=gpt-5.1
12
+ # ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
13
 
14
+ # ============== HUGGINGFACE CONFIGURATION ==============
15
 
16
+ # HuggingFace Token - enables gated models and higher rate limits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Get yours at: https://huggingface.co/settings/tokens
18
+ #
19
+ # WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
20
+ # WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
21
  #
22
  # For HuggingFace Spaces deployment:
23
  # Set this as a "Secret" in Space Settings -> Variables and secrets
24
  # Users/judges don't need their own token - the Space secret is used
25
  #
26
  HF_TOKEN=hf_your-token-here
27
+ # Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)
28
+
29
+ # Default HuggingFace model for inference (gated, requires auth)
30
+ # Can be overridden in UI dropdown
31
+ # Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
32
+ HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
33
+
34
+ # Fallback models for HuggingFace Inference API (comma-separated)
35
+ # Models are tried in order until one succeeds
36
+ # Format: model1,model2,model3
37
+ # Latest reasoning models first, then reliable fallbacks
38
+ # Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
39
+ # Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
40
+ HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct
41
+
42
+ # Override model/provider selection (optional, usually set via UI)
43
+ # HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
44
+ # HF_PROVIDER=hyperbolic
45
+
46
+ # ============== EMBEDDING CONFIGURATION ==============
47
+
48
+ # Embedding Provider: "openai", "local", or "huggingface"
49
+ # Default: "local" (no API key required)
50
+ EMBEDDING_PROVIDER=local
51
+
52
+ # OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
53
+ OPENAI_EMBEDDING_MODEL=text-embedding-3-small
54
+
55
+ # Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
56
+ # BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
57
+ LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
58
+
59
+ # HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
60
+ HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
61
 
62
  # ============== AGENT CONFIGURATION ==============
63
 
 
65
  SEARCH_TIMEOUT=30
66
  LOG_LEVEL=INFO
67
 
68
+ # Graph-based execution (experimental)
69
+ # USE_GRAPH_EXECUTION=false
70
+
71
+ # Budget & Rate Limiting
72
+ # DEFAULT_TOKEN_LIMIT=100000
73
+ # DEFAULT_TIME_LIMIT_MINUTES=10
74
+ # DEFAULT_ITERATIONS_LIMIT=10
75
+
76
+ # ============== WEB SEARCH CONFIGURATION ==============
77
+
78
+ # Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
79
+ # Default: "duckduckgo" (no API key required)
80
+ WEB_SEARCH_PROVIDER=duckduckgo
81
+
82
+ # Serper API Key (for Google search via Serper)
83
+ # SERPER_API_KEY=your-serper-key-here
84
+
85
+ # SearchXNG Host URL (for self-hosted search)
86
+ # SEARCHXNG_HOST=http://localhost:8080
87
+
88
+ # Brave Search API Key
89
+ # BRAVE_API_KEY=your-brave-key-here
90
+
91
+ # Tavily API Key
92
+ # TAVILY_API_KEY=your-tavily-key-here
93
 
94
  # ============== EXTERNAL SERVICES ==============
95
 
96
+ # PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
97
  NCBI_API_KEY=your-ncbi-key-here
98
 
99
+ # Modal (optional - for secure code execution sandbox)
100
+ # MODAL_TOKEN_ID=your-modal-token-id
101
+ # MODAL_TOKEN_SECRET=your-modal-token-secret
102
+
103
+ # ============== VECTOR DATABASE (ChromaDB) ==============
104
+
105
+ # ChromaDB storage path
106
  CHROMA_DB_PATH=./chroma_db
107
+
108
+ # Persist ChromaDB to disk (default: true)
109
+ # CHROMA_DB_PERSIST=true
110
+
111
+ # Remote ChromaDB server (optional)
112
+ # CHROMA_DB_HOST=localhost
113
+ # CHROMA_DB_PORT=8000
114
+
115
+ # ============== RAG SERVICE CONFIGURATION ==============
116
+
117
+ # ChromaDB collection name for RAG
118
+ # RAG_COLLECTION_NAME=deepcritical_evidence
119
+
120
+ # Number of top results to retrieve from RAG
121
+ # RAG_SIMILARITY_TOP_K=5
122
+
123
+ # Automatically ingest evidence into RAG
124
+ # RAG_AUTO_INGEST=true
.github/README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > [!IMPORTANT]
3
+ > **You are reading the Github README!**
4
+ >
5
+ > - 📚 **Documentation**: See our [technical documentation](https://deepcritical.github.io/GradioDemo/) for detailed information
6
+ > - 📖 **Demo README**: Check out the [Demo README](..README.md) for setup, configuration, and contribution guidelines
7
+ > - 🏆 **Hackathon Submission**: Keep reading below for more information about our MCP Hackathon submission
8
+
9
+
10
+ <div align="center">
11
+
12
+ [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
13
+ [![Documentation](https://img.shields.io/badge/Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](deepcritical.github.io/GradioDemo/)
14
+ [![Demo](https://img.shields.io/badge/🚀%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
15
+ [![codecov](https://codecov.io/gh/DeepCritical/GradioDemo/graph/badge.svg?token=B1f05RCGpz)](https://codecov.io/gh/DeepCritical/GradioDemo)
16
+ [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
17
+
18
+ </div>
19
+
20
+ ## Quick Start
21
+
22
+ ### 1. Environment Setup
23
+
24
+ ```bash
25
+ # Install uv if you haven't already
26
+ pip install uv
27
+
28
+ # Sync dependencies
29
+ uv sync --all-extras
30
+ ```
31
+
32
+ ### 2. Run the UI
33
+
34
+ ```bash
35
+ # Start the Gradio app
36
+ gradio run "src/app.py"
37
+ ```
38
+
39
+ Open your browser to `http://localhost:7860`.
40
+
41
+ ### 3. Authentication (Optional)
42
+
43
+ **HuggingFace OAuth Login**:
44
+ - Click the "Sign in with HuggingFace" button at the top of the app
45
+ - Your HuggingFace API token will be automatically used for AI inference
46
+ - No need to manually enter API keys when logged in
47
+ - OAuth token is used only for the current session and never stored
48
+
49
+ ### 4. Connect via MCP
50
+
51
+ This application exposes a Model Context Protocol (MCP) server, allowing you to use its search tools directly from Claude Desktop or other MCP clients.
52
+
53
+ **MCP Server URL**: `http://localhost:7860/gradio_api/mcp/`
54
+
55
+ **Claude Desktop Configuration**:
56
+ Add this to your `claude_desktop_config.json`:
57
+ ```json
58
+ {
59
+ "mcpServers": {
60
+ "deepcritical": {
61
+ "url": "http://localhost:7860/gradio_api/mcp/"
62
+ }
63
+ }
64
+ }
65
+ ```
.github/workflows/ci.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, dev]
6
+ pull_request:
7
+ branches: [main, dev]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v5
21
+ with:
22
+ version: "latest"
23
+
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+
29
+ - name: Install dependencies
30
+ run: |
31
+ uv sync --extra dev
32
+
33
+ - name: Lint with ruff
34
+ continue-on-error: true
35
+ run: |
36
+ uv run ruff check . --exclude tests --exclude reference_repos
37
+ uv run ruff format --check . --exclude tests --exclude reference_repos
38
+
39
+ - name: Type check with mypy
40
+ continue-on-error: true
41
+ run: |
42
+ uv run mypy src --ignore-missing-imports
43
+
44
+ - name: Run unit tests (No OpenAI/Anthropic, HuggingFace only)
45
+ env:
46
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
47
+ LLM_PROVIDER: huggingface
48
+ run: |
49
+ uv run pytest tests/unit/ -v -m "not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml
50
+
51
+ - name: Run local embeddings tests
52
+ env:
53
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
54
+ LLM_PROVIDER: huggingface
55
+ run: |
56
+ uv run pytest tests/ -v -m "local_embeddings" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
57
+ continue-on-error: true # Allow failures if dependencies not available
58
+
59
+ - name: Run HuggingFace integration tests
60
+ env:
61
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
62
+ LLM_PROVIDER: huggingface
63
+ run: |
64
+ uv run pytest tests/integration/ -v -m "huggingface and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
65
+ continue-on-error: true # Allow failures if HF_TOKEN not set
66
+
67
+ - name: Run non-OpenAI/Anthropic integration tests (excluding embedding providers)
68
+ env:
69
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
70
+ LLM_PROVIDER: huggingface
71
+ run: |
72
+ uv run pytest tests/integration/ -v -m "integration and not openai and not anthropic and not embedding_provider" --tb=short -p no:logfire --cov --cov-branch --cov-report=xml --cov-append || true
73
+ continue-on-error: true # Allow failures if dependencies not available
74
+
75
+ - name: Upload coverage reports to Codecov
76
+ uses: codecov/codecov-action@v5
77
+ continue-on-error: true
78
+ with:
79
+ token: ${{ secrets.CODECOV_TOKEN }}
80
+ slug: DeepCritical/GradioDemo
.github/workflows/docs.yml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Documentation
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ - dev
8
+ paths:
9
+ - 'docs/**'
10
+ - 'mkdocs.yml'
11
+ - '.github/workflows/docs.yml'
12
+ pull_request:
13
+ branches:
14
+ - main
15
+ - dev
16
+ paths:
17
+ - 'docs/**'
18
+ - 'mkdocs.yml'
19
+ - '.github/workflows/docs.yml'
20
+ workflow_dispatch:
21
+
22
+ permissions:
23
+ contents: write
24
+
25
+ jobs:
26
+ build:
27
+ runs-on: ubuntu-latest
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+
31
+ - name: Set up Python
32
+ uses: actions/setup-python@v5
33
+ with:
34
+ python-version: '3.11'
35
+
36
+ - name: Install uv
37
+ uses: astral-sh/setup-uv@v5
38
+ with:
39
+ version: "latest"
40
+
41
+ - name: Install dependencies
42
+ run: |
43
+ uv sync --extra dev
44
+
45
+ - name: Build documentation
46
+ run: |
47
+ uv run mkdocs build --strict
48
+
49
+ - name: Deploy to GitHub Pages
50
+ if: (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev') && github.event_name == 'push'
51
+ uses: peaceiris/actions-gh-pages@v3
52
+ with:
53
+ github_token: ${{ secrets.GITHUB_TOKEN }}
54
+ publish_dir: ./site
55
+ publish_branch: dev
56
+ cname: false
57
+ keep_files: true
58
+
59
+
60
+
61
+
.gitignore CHANGED
@@ -1,7 +1,10 @@
 
 
1
  folder/
2
  site/
3
  .cursor/
4
  .ruff_cache/
 
5
  # Python
6
  __pycache__/
7
  *.py[cod]
@@ -69,13 +72,12 @@ logs/
69
  .pytest_cache/
70
  .mypy_cache/
71
  .coverage
 
 
72
  htmlcov/
73
- test_output*.txt
74
 
75
  # Database files
76
  chroma_db/
77
  *.sqlite3
78
 
79
-
80
  # Trigger rebuild Wed Nov 26 17:51:41 EST 2025
81
- .env
 
1
+ =0.22.0
2
+ =0.22.0,
3
  folder/
4
  site/
5
  .cursor/
6
  .ruff_cache/
7
+ docs/contributing/
8
  # Python
9
  __pycache__/
10
  *.py[cod]
 
72
  .pytest_cache/
73
  .mypy_cache/
74
  .coverage
75
+ .coverage.*
76
+ coverage.xml
77
  htmlcov/
 
78
 
79
  # Database files
80
  chroma_db/
81
  *.sqlite3
82
 
 
83
  # Trigger rebuild Wed Nov 26 17:51:41 EST 2025
 
.pre-commit-config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.14.7 # Compatible with ruff>=0.14.6 (matches CI)
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix, --exclude, tests, --exclude, reference_repos]
7
+ exclude: ^reference_repos/
8
+ - id: ruff-format
9
+ args: [--exclude, tests, --exclude, reference_repos]
10
+ exclude: ^reference_repos/
11
+
12
+ - repo: https://github.com/pre-commit/mirrors-mypy
13
+ rev: v1.18.2 # Matches CI version mypy>=1.18.2
14
+ hooks:
15
+ - id: mypy
16
+ files: ^src/
17
+ exclude: ^folder
18
+ additional_dependencies:
19
+ - pydantic>=2.7
20
+ - pydantic-settings>=2.2
21
+ - tenacity>=8.2
22
+ - pydantic-ai>=0.0.16
23
+ args: [--ignore-missing-imports]
24
+
25
+ - repo: local
26
+ hooks:
27
+ - id: pytest-unit
28
+ name: pytest unit tests (no OpenAI)
29
+ entry: uv
30
+ language: system
31
+ types: [python]
32
+ args: [
33
+ "run",
34
+ "python",
35
+ ".pre-commit-hooks/run_pytest_with_sync.py",
36
+ "unit",
37
+ ]
38
+ pass_filenames: false
39
+ always_run: true
40
+ require_serial: false
41
+ - id: pytest-local-embeddings
42
+ name: pytest local embeddings tests
43
+ entry: uv
44
+ language: system
45
+ types: [python]
46
+ args: [
47
+ "run",
48
+ "python",
49
+ ".pre-commit-hooks/run_pytest_with_sync.py",
50
+ "embeddings",
51
+ ]
52
+ pass_filenames: false
53
+ always_run: true
54
+ require_serial: false
=0.22.0 ADDED
File without changes
=0.22.0, ADDED
File without changes
Makefile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: install test lint format typecheck check clean all cov cov-html
2
+
3
+ # Default target
4
+ all: check
5
+
6
+ install:
7
+ uv sync --all-extras
8
+ uv run pre-commit install
9
+
10
+ test:
11
+ uv run pytest tests/unit/ -v -m "not openai" -p no:logfire
12
+
13
+ test-hf:
14
+ uv run pytest tests/ -v -m "huggingface" -p no:logfire
15
+
16
+ test-all:
17
+ uv run pytest tests/ -v -p no:logfire
18
+
19
+ # Coverage aliases
20
+ cov: test-cov
21
+ test-cov:
22
+ uv run pytest --cov=src --cov-report=term-missing -m "not openai" -p no:logfire
23
+
24
+ cov-html:
25
+ uv run pytest --cov=src --cov-report=html -p no:logfire
26
+ @echo "Coverage report: open htmlcov/index.html"
27
+
28
+ lint:
29
+ uv run ruff check src tests
30
+
31
+ format:
32
+ uv run ruff format src tests
33
+
34
+ typecheck:
35
+ uv run mypy src
36
+
37
+ check: lint typecheck test-cov
38
+ @echo "All checks passed!"
39
+
40
+ docs-build:
41
+ uv run mkdocs build
42
+
43
+ docs-serve:
44
+ uv run mkdocs serve
45
+
46
+ docs-clean:
47
+ rm -rf site/
48
+
49
+ clean:
50
+ rm -rf .pytest_cache .mypy_cache .ruff_cache __pycache__ .coverage htmlcov
51
+ find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: The DETERMINATOR
3
  emoji: 🐉
4
  colorFrom: red
5
  colorTo: yellow
@@ -10,54 +10,114 @@ app_file: src/app.py
10
  hf_oauth: true
11
  hf_oauth_expiration_minutes: 480
12
  hf_oauth_scopes:
13
- # Required for HuggingFace Inference API (includes all third-party providers)
14
- # This scope grants access to:
15
- # - HuggingFace's own Inference API
16
- # - Third-party inference providers (nebius, together, scaleway, hyperbolic, novita, nscale, sambanova, ovh, fireworks, etc.)
17
- # - All models available through the Inference Providers API
18
- - inference-api
19
- # Optional: Uncomment if you need to access user's billing information
20
- # - read-billing
21
  pinned: true
22
  license: mit
23
  tags:
24
  - mcp-in-action-track-enterprise
25
  - mcp-hackathon
26
- - deep-research
27
  - biomedical-ai
28
  - pydantic-ai
29
  - llamaindex
30
  - modal
31
- - building-mcp-track-enterprise
32
- - building-mcp-track-consumer
33
- - mcp-in-action-track-enterprise
34
- - mcp-in-action-track-consumer
35
- - building-mcp-track-modal
36
- - building-mcp-track-blaxel
37
- - building-mcp-track-llama-index
38
- - building-mcp-track-HUGGINGFACE
39
  ---
40
 
41
  > [!IMPORTANT]
42
  > **You are reading the Gradio Demo README!**
43
  >
44
- > - 📚 **Documentation**: See our [technical documentation](https://deepcritical.github.io/GradioDemo/) for detailed information
45
- > - 📖 **Complete README**: Check out the [Github README](.github/README.md) for setup, configuration, and contribution guidelines
46
- > - ⚠️**This README is for our Gradio Demo Only !**
47
 
48
  <div align="center">
49
 
50
- [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
51
- [![Documentation](https://img.shields.io/badge/Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](deepcritical.github.io/GradioDemo/)
52
- [![Demo](https://img.shields.io/badge/Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
53
  [![codecov](https://codecov.io/gh/DeepCritical/GradioDemo/graph/badge.svg?token=B1f05RCGpz)](https://codecov.io/gh/DeepCritical/GradioDemo)
54
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
55
 
56
 
57
  </div>
58
 
59
- # The DETERMINATOR
60
 
61
  ## About
62
 
63
- The DETERMINATOR is a powerful generalist deep research agent system that stops at nothing until finding precise answers to complex questions. It uses iterative search-and-judge loops to comprehensively investigate any research question from any domain.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Critical Deep Resarch
3
  emoji: 🐉
4
  colorFrom: red
5
  colorTo: yellow
 
10
  hf_oauth: true
11
  hf_oauth_expiration_minutes: 480
12
  hf_oauth_scopes:
13
+ - inference-api
 
 
 
 
 
 
 
14
  pinned: true
15
  license: mit
16
  tags:
17
  - mcp-in-action-track-enterprise
18
  - mcp-hackathon
19
+ - drug-repurposing
20
  - biomedical-ai
21
  - pydantic-ai
22
  - llamaindex
23
  - modal
 
 
 
 
 
 
 
 
24
  ---
25
 
26
  > [!IMPORTANT]
27
  > **You are reading the Gradio Demo README!**
28
  >
29
+ > - 📚 **Documentation**: See our [technical documentation](deepcritical.github.io/GradioDemo/) for detailed information
30
+ > - 📖 **Complete README**: Check out the [full README](.github/README.md) for setup, configuration, and contribution guidelines
31
+ > - 🏆 **Hackathon Submission**: Keep reading below for more information about our MCP Hackathon submission
32
 
33
  <div align="center">
34
 
35
+ [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
36
+ [![Documentation](https://img.shields.io/badge/📚%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](deepcritical.github.io/GradioDemo/)
37
+ [![Demo](https://img.shields.io/badge/🚀%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
38
  [![codecov](https://codecov.io/gh/DeepCritical/GradioDemo/graph/badge.svg?token=B1f05RCGpz)](https://codecov.io/gh/DeepCritical/GradioDemo)
39
  [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
40
 
41
 
42
  </div>
43
 
44
+ # DeepCritical
45
 
46
  ## About
47
 
48
+ The [Deep Critical Gradio Hackathon Team](### Team) met online in the Alzheimer's Critical Literature Review Group in the Hugging Science initiative. We're building the agent framework we want to use for ai assisted research to [turn the vast amounts of clinical data into cures](https://github.com/DeepCritical/GradioDemo).
49
+
50
+ For this hackathon we're proposing a simple yet powerful Deep Research Agent that iteratively looks for the answer until it finds it using general purpose websearch and special purpose retrievers for technical retrievers.
51
+
52
+ ## Deep Critical In the Medial
53
+
54
+ - Social Medial Posts about Deep Critical :
55
+ -
56
+ -
57
+ -
58
+ -
59
+ -
60
+ -
61
+ -
62
+
63
+ ## Important information
64
+
65
+ - **[readme](.github\README.md)**: configure, deploy , contribute and learn more here.
66
+ - **[docs](deepcritical.github.io/GradioDemo/)**: want to know how all this works ? read our detailed technical documentation here.
67
+ - **[demo](https://huggingface/spaces/DataQuests/DeepCritical)**: Try our demo on huggingface
68
+ - **[team](### Team)**: Join us , or follow us !
69
+ - **[video]**: See our demo video
70
+
71
+ ## Future Developments
72
+
73
+ - [] Apply Deep Research Systems To Generate Short Form Video (up to 5 minutes)
74
+ - [] Visualize Pydantic Graphs as Loading Screens in the UI
75
+ - [] Improve Data Science with more Complex Graph Agents
76
+ - [] Create Deep Critical Drug Reporposing / Discovery Demo
77
+ - [] Create Deep Critical Literal Review
78
+ - [] Create Deep Critical Hypothesis Generator
79
+ - [] Create PyPi Package
80
+
81
+ ## Completed
82
+
83
+ - [] **Multi-Source Search**: PubMed, ClinicalTrials.gov, bioRxiv/medRxiv
84
+ - [] **MCP Integration**: Use our tools from Claude Desktop or any MCP client
85
+ - [] **HuggingFace OAuth**: Sign in with HuggingFace
86
+ - [] **Modal Sandbox**: Secure execution of AI-generated statistical code
87
+ - [] **LlamaIndex RAG**: Semantic search and evidence synthesis
88
+ - [] **HuggingfaceInference**:
89
+ - [] **HuggingfaceMCP Custom Config To Use Community Tools**:
90
+ - [] **Strongly Typed Composable Graphs**:
91
+ - [] **Specialized Research Teams of Agents**:
92
+
93
+
94
+
95
+ ### Team
96
+
97
+ - ZJ
98
+ - MarioAderman
99
+ - Josephrp
100
+
101
+
102
+ ## Acknowledgements
103
+
104
+ - McSwaggins
105
+ - Magentic
106
+ - Huggingface
107
+ - Gradio
108
+ - DeepCritical
109
+ - Sponsors
110
+ - Microsoft
111
+ - Pydantic
112
+ - Llama-index
113
+ - Anthhropic/MCP
114
+ - List of Tools Makers
115
+
116
+
117
+ ## Links
118
+
119
+ [![GitHub](https://img.shields.io/github/stars/DeepCritical/GradioDemo?style=for-the-badge&logo=github&logoColor=white&label=🐙%20GitHub&labelColor=181717&color=181717)](https://github.com/DeepCritical/GradioDemo)
120
+ [![Documentation](https://img.shields.io/badge/📚%20Docs-0080FF?style=for-the-badge&logo=readthedocs&logoColor=white&labelColor=0080FF&color=0080FF)](deepcritical.github.io/GradioDemo/)
121
+ [![Demo](https://img.shields.io/badge/🚀%20Demo-FFD21E?style=for-the-badge&logo=huggingface&logoColor=white&labelColor=FFD21E&color=FFD21E)](https://huggingface.co/spaces/DataQuests/DeepCritical)
122
+ [![codecov](https://codecov.io/gh/DeepCritical/GradioDemo/graph/badge.svg?token=B1f05RCGpz)](https://codecov.io/gh/DeepCritical/GradioDemo)
123
+ [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP)
.cursorrules → dev/.cursorrules RENAMED
@@ -238,3 +238,4 @@
238
 
239
 
240
 
 
 
238
 
239
 
240
 
241
+
dev/AGENTS.txt ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepCritical Project - Rules
2
+
3
+ ## Project-Wide Rules
4
+
5
+ **Architecture**: Multi-agent research system using Pydantic AI for agent orchestration, supporting iterative and deep research patterns. Uses middleware for state management, budget tracking, and workflow coordination.
6
+
7
+ **Type Safety**: ALWAYS use complete type hints. All functions must have parameter and return type annotations. Use `mypy --strict` compliance. Use `TYPE_CHECKING` imports for circular dependencies: `from typing import TYPE_CHECKING; if TYPE_CHECKING: from src.services.embeddings import EmbeddingService`
8
+
9
+ **Async Patterns**: ALL I/O operations must be async (`async def`, `await`). Use `asyncio.gather()` for parallel operations. CPU-bound work must use `run_in_executor()`: `loop = asyncio.get_running_loop(); result = await loop.run_in_executor(None, cpu_bound_function, args)`. Never block the event loop.
10
+
11
+ **Error Handling**: Use custom exceptions from `src/utils/exceptions.py`: `DeepCriticalError`, `SearchError`, `RateLimitError`, `JudgeError`, `ConfigurationError`. Always chain exceptions: `raise SearchError(...) from e`. Log with structlog: `logger.error("Operation failed", error=str(e), context=value)`.
12
+
13
+ **Logging**: Use `structlog` for ALL logging (NOT `print` or `logging`). Import: `import structlog; logger = structlog.get_logger()`. Log with structured data: `logger.info("event", key=value)`. Use appropriate levels: DEBUG, INFO, WARNING, ERROR.
14
+
15
+ **Pydantic Models**: All data exchange uses Pydantic models from `src/utils/models.py`. Models are frozen (`model_config = {"frozen": True}`) for immutability. Use `Field()` with descriptions. Validate with `ge=`, `le=`, `min_length=`, `max_length=` constraints.
16
+
17
+ **Code Style**: Ruff with 100-char line length. Ignore rules: `PLR0913` (too many arguments), `PLR0912` (too many branches), `PLR0911` (too many returns), `PLR2004` (magic values), `PLW0603` (global statement), `PLC0415` (lazy imports).
18
+
19
+ **Docstrings**: Google-style docstrings for all public functions. Include Args, Returns, Raises sections. Use type hints in docstrings only if needed for clarity.
20
+
21
+ **Testing**: Unit tests in `tests/unit/` (mocked, fast). Integration tests in `tests/integration/` (real APIs, marked `@pytest.mark.integration`). Use `respx` for httpx mocking, `pytest-mock` for general mocking.
22
+
23
+ **State Management**: Use `ContextVar` in middleware for thread-safe isolation. Never use global mutable state (except singletons via `@lru_cache`). Use `WorkflowState` from `src/middleware/state_machine.py` for workflow state.
24
+
25
+ **Citation Validation**: ALWAYS validate references before returning reports. Use `validate_references()` from `src/utils/citation_validator.py`. Remove hallucinated citations. Log warnings for removed citations.
26
+
27
+ ---
28
+
29
+ ## src/agents/ - Agent Implementation Rules
30
+
31
+ **Pattern**: All agents use Pydantic AI `Agent` class. Agents have structured output types (Pydantic models) or return strings. Use factory functions in `src/agent_factory/agents.py` for creation.
32
+
33
+ **Agent Structure**:
34
+ - System prompt as module-level constant (with date injection: `datetime.now().strftime("%Y-%m-%d")`)
35
+ - Agent class with `__init__(model: Any | None = None)`
36
+ - Main method (e.g., `async def evaluate()`, `async def write_report()`)
37
+ - Factory function: `def create_agent_name(model: Any | None = None) -> AgentName`
38
+
39
+ **Model Initialization**: Use `get_model()` from `src/agent_factory/judges.py` if no model provided. Support OpenAI/Anthropic/HF Inference via settings.
40
+
41
+ **Error Handling**: Return fallback values (e.g., `KnowledgeGapOutput(research_complete=False, outstanding_gaps=[...])`) on failure. Log errors with context. Use retry logic (3 retries) in Pydantic AI Agent initialization.
42
+
43
+ **Input Validation**: Validate query/inputs are not empty. Truncate very long inputs with warnings. Handle None values gracefully.
44
+
45
+ **Output Types**: Use structured output types from `src/utils/models.py` (e.g., `KnowledgeGapOutput`, `AgentSelectionPlan`, `ReportDraft`). For text output (writer agents), return `str` directly.
46
+
47
+ **Agent-Specific Rules**:
48
+ - `knowledge_gap.py`: Outputs `KnowledgeGapOutput`. Evaluates research completeness.
49
+ - `tool_selector.py`: Outputs `AgentSelectionPlan`. Selects tools (RAG/web/database).
50
+ - `writer.py`: Returns markdown string. Includes citations in numbered format.
51
+ - `long_writer.py`: Uses `ReportDraft` input/output. Handles section-by-section writing.
52
+ - `proofreader.py`: Takes `ReportDraft`, returns polished markdown.
53
+ - `thinking.py`: Returns observation string from conversation history.
54
+ - `input_parser.py`: Outputs `ParsedQuery` with research mode detection.
55
+
56
+ ---
57
+
58
+ ## src/tools/ - Search Tool Rules
59
+
60
+ **Protocol**: All tools implement `SearchTool` protocol from `src/tools/base.py`: `name` property and `async def search(query, max_results) -> list[Evidence]`.
61
+
62
+ **Rate Limiting**: Use `@retry` decorator from tenacity: `@retry(stop=stop_after_attempt(3), wait=wait_exponential(...))`. Implement `_rate_limit()` method for APIs with limits. Use shared rate limiters from `src/tools/rate_limiter.py`.
63
+
64
+ **Error Handling**: Raise `SearchError` or `RateLimitError` on failures. Handle HTTP errors (429, 500, timeout). Return empty list on non-critical errors (log warning).
65
+
66
+ **Query Preprocessing**: Use `preprocess_query()` from `src/tools/query_utils.py` to remove noise and expand synonyms.
67
+
68
+ **Evidence Conversion**: Convert API responses to `Evidence` objects with `Citation`. Extract metadata (title, url, date, authors). Set relevance scores (0.0-1.0). Handle missing fields gracefully.
69
+
70
+ **Tool-Specific Rules**:
71
+ - `pubmed.py`: Use NCBI E-utilities (ESearch → EFetch). Rate limit: 0.34s between requests. Parse XML with `xmltodict`. Handle single vs. multiple articles.
72
+ - `clinicaltrials.py`: Use `requests` library (NOT httpx - WAF blocks httpx). Run in thread pool: `await asyncio.to_thread(requests.get, ...)`. Filter: Only interventional studies, active/completed.
73
+ - `europepmc.py`: Handle preprint markers: `[PREPRINT - Not peer-reviewed]`. Build URLs from DOI or PMID.
74
+ - `rag_tool.py`: Wraps `LlamaIndexRAGService`. Returns Evidence from RAG results. Handles ingestion.
75
+ - `search_handler.py`: Orchestrates parallel searches across multiple tools. Uses `asyncio.gather()` with `return_exceptions=True`. Aggregates results into `SearchResult`.
76
+
77
+ ---
78
+
79
+ ## src/middleware/ - Middleware Rules
80
+
81
+ **State Management**: Use `ContextVar` for thread-safe isolation. `WorkflowState` uses `ContextVar[WorkflowState | None]`. Initialize with `init_workflow_state(embedding_service)`. Access with `get_workflow_state()` (auto-initializes if missing).
82
+
83
+ **WorkflowState**: Tracks `evidence: list[Evidence]`, `conversation: Conversation`, `embedding_service: Any`. Methods: `add_evidence()` (deduplicates by URL), `async search_related()` (semantic search).
84
+
85
+ **WorkflowManager**: Manages parallel research loops. Methods: `add_loop()`, `run_loops_parallel()`, `update_loop_status()`, `sync_loop_evidence_to_state()`. Uses `asyncio.gather()` for parallel execution. Handles errors per loop (don't fail all if one fails).
86
+
87
+ **BudgetTracker**: Tracks tokens, time, iterations per loop and globally. Methods: `create_budget()`, `add_tokens()`, `start_timer()`, `update_timer()`, `increment_iteration()`, `check_budget()`, `can_continue()`. Token estimation: `estimate_tokens(text)` (~4 chars per token), `estimate_llm_call_tokens(prompt, response)`.
88
+
89
+ **Models**: All middleware models in `src/utils/models.py`. `IterationData`, `Conversation`, `ResearchLoop`, `BudgetStatus` are used by middleware.
90
+
91
+ ---
92
+
93
+ ## src/orchestrator/ - Orchestration Rules
94
+
95
+ **Research Flows**: Two patterns: `IterativeResearchFlow` (single loop) and `DeepResearchFlow` (plan → parallel loops → synthesis). Both support agent chains (`use_graph=False`) and graph execution (`use_graph=True`).
96
+
97
+ **IterativeResearchFlow**: Pattern: Generate observations → Evaluate gaps → Select tools → Execute → Judge → Continue/Complete. Uses `KnowledgeGapAgent`, `ToolSelectorAgent`, `ThinkingAgent`, `WriterAgent`, `JudgeHandler`. Tracks iterations, time, budget.
98
+
99
+ **DeepResearchFlow**: Pattern: Planner → Parallel iterative loops per section → Synthesizer. Uses `PlannerAgent`, `IterativeResearchFlow` (per section), `LongWriterAgent` or `ProofreaderAgent`. Uses `WorkflowManager` for parallel execution.
100
+
101
+ **Graph Orchestrator**: Uses Pydantic AI Graphs (when available) or agent chains (fallback). Routes based on research mode (iterative/deep/auto). Streams `AgentEvent` objects for UI.
102
+
103
+ **State Initialization**: Always call `init_workflow_state()` before running flows. Initialize `BudgetTracker` per loop. Use `WorkflowManager` for parallel coordination.
104
+
105
+ **Event Streaming**: Yield `AgentEvent` objects during execution. Event types: "started", "search_complete", "judge_complete", "hypothesizing", "synthesizing", "complete", "error". Include iteration numbers and data payloads.
106
+
107
+ ---
108
+
109
+ ## src/services/ - Service Rules
110
+
111
+ **EmbeddingService**: Local sentence-transformers (NO API key required). All operations async-safe via `run_in_executor()`. ChromaDB for vector storage. Deduplication threshold: 0.85 (85% similarity = duplicate).
112
+
113
+ **LlamaIndexRAGService**: Uses OpenAI embeddings (requires `OPENAI_API_KEY`). Methods: `ingest_evidence()`, `retrieve()`, `query()`. Returns documents with metadata (source, title, url, date, authors). Lazy initialization with graceful fallback.
114
+
115
+ **StatisticalAnalyzer**: Generates Python code via LLM. Executes in Modal sandbox (secure, isolated). Library versions pinned in `SANDBOX_LIBRARIES` dict. Returns `AnalysisResult` with verdict (SUPPORTED/REFUTED/INCONCLUSIVE).
116
+
117
+ **Singleton Pattern**: Use `@lru_cache(maxsize=1)` for singletons: `@lru_cache(maxsize=1); def get_service() -> Service: return Service()`. Lazy initialization to avoid requiring dependencies at import time.
118
+
119
+ ---
120
+
121
+ ## src/utils/ - Utility Rules
122
+
123
+ **Models**: All Pydantic models in `src/utils/models.py`. Use frozen models (`model_config = {"frozen": True}`) except where mutation needed. Use `Field()` with descriptions. Validate with constraints.
124
+
125
+ **Config**: Settings via Pydantic Settings (`src/utils/config.py`). Load from `.env` automatically. Use `settings` singleton: `from src.utils.config import settings`. Validate API keys with properties: `has_openai_key`, `has_anthropic_key`.
126
+
127
+ **Exceptions**: Custom exception hierarchy in `src/utils/exceptions.py`. Base: `DeepCriticalError`. Specific: `SearchError`, `RateLimitError`, `JudgeError`, `ConfigurationError`. Always chain exceptions.
128
+
129
+ **LLM Factory**: Centralized LLM model creation in `src/utils/llm_factory.py`. Supports OpenAI, Anthropic, HF Inference. Use `get_model()` or factory functions. Check requirements before initialization.
130
+
131
+ **Citation Validator**: Use `validate_references()` from `src/utils/citation_validator.py`. Removes hallucinated citations (URLs not in evidence). Logs warnings. Returns validated report string.
132
+
133
+ ---
134
+
135
+ ## src/orchestrator_factory.py Rules
136
+
137
+ **Purpose**: Factory for creating orchestrators. Supports "simple" (legacy) and "advanced" (magentic) modes. Auto-detects mode based on API key availability.
138
+
139
+ **Pattern**: Lazy import for optional dependencies (`_get_magentic_orchestrator_class()`). Handles `ImportError` gracefully with clear error messages.
140
+
141
+ **Mode Detection**: `_determine_mode()` checks explicit mode or auto-detects: "advanced" if `settings.has_openai_key`, else "simple". Maps "magentic" → "advanced".
142
+
143
+ **Function Signature**: `create_orchestrator(search_handler, judge_handler, config, mode) -> Any`. Simple mode requires handlers. Advanced mode uses MagenticOrchestrator.
144
+
145
+ **Error Handling**: Raise `ValueError` with clear messages if requirements not met. Log mode selection with structlog.
146
+
147
+ ---
148
+
149
+ ## src/orchestrator_hierarchical.py Rules
150
+
151
+ **Purpose**: Hierarchical orchestrator using middleware and sub-teams. Adapts Magentic ChatAgent to SubIterationTeam protocol.
152
+
153
+ **Pattern**: Uses `SubIterationMiddleware` with `ResearchTeam` and `LLMSubIterationJudge`. Event-driven via callback queue.
154
+
155
+ **State Initialization**: Initialize embedding service with graceful fallback. Use `init_magentic_state()` (deprecated, but kept for compatibility).
156
+
157
+ **Event Streaming**: Uses `asyncio.Queue` for event coordination. Yields `AgentEvent` objects. Handles event callback pattern with `asyncio.wait()`.
158
+
159
+ **Error Handling**: Log errors with context. Yield error events. Process remaining events after task completion.
160
+
161
+ ---
162
+
163
+ ## src/orchestrator_magentic.py Rules
164
+
165
+ **Purpose**: Magentic-based orchestrator using ChatAgent pattern. Each agent has internal LLM. Manager orchestrates agents.
166
+
167
+ **Pattern**: Uses `MagenticBuilder` with participants (searcher, hypothesizer, judge, reporter). Manager uses `OpenAIChatClient`. Workflow built in `_build_workflow()`.
168
+
169
+ **Event Processing**: `_process_event()` converts Magentic events to `AgentEvent`. Handles: `MagenticOrchestratorMessageEvent`, `MagenticAgentMessageEvent`, `MagenticFinalResultEvent`, `MagenticAgentDeltaEvent`, `WorkflowOutputEvent`.
170
+
171
+ **Text Extraction**: `_extract_text()` defensively extracts text from messages. Priority: `.content` → `.text` → `str(message)`. Handles buggy message objects.
172
+
173
+ **State Initialization**: Initialize embedding service with graceful fallback. Use `init_magentic_state()` (deprecated).
174
+
175
+ **Requirements**: Must call `check_magentic_requirements()` in `__init__`. Requires `agent-framework-core` and OpenAI API key.
176
+
177
+ **Event Types**: Maps agent names to event types: "search" → "search_complete", "judge" → "judge_complete", "hypothes" → "hypothesizing", "report" → "synthesizing".
178
+
179
+ ---
180
+
181
+ ## src/agent_factory/ - Factory Rules
182
+
183
+ **Pattern**: Factory functions for creating agents and handlers. Lazy initialization for optional dependencies. Support OpenAI/Anthropic/HF Inference.
184
+
185
+ **Judges**: `create_judge_handler()` creates `JudgeHandler` with structured output (`JudgeAssessment`). Supports `MockJudgeHandler`, `HFInferenceJudgeHandler` as fallbacks.
186
+
187
+ **Agents**: Factory functions in `agents.py` for all Pydantic AI agents. Pattern: `create_agent_name(model: Any | None = None) -> AgentName`. Use `get_model()` if model not provided.
188
+
189
+ **Graph Builder**: `graph_builder.py` contains utilities for building research graphs. Supports iterative and deep research graph construction.
190
+
191
+ **Error Handling**: Raise `ConfigurationError` if required API keys missing. Log agent creation. Handle import errors gracefully.
192
+
193
+ ---
194
+
195
+ ## src/prompts/ - Prompt Rules
196
+
197
+ **Pattern**: System prompts stored as module-level constants. Include date injection: `datetime.now().strftime("%Y-%m-%d")`. Format evidence with truncation (1500 chars per item).
198
+
199
+ **Judge Prompts**: In `judge.py`. Handle empty evidence case separately. Always request structured JSON output.
200
+
201
+ **Hypothesis Prompts**: In `hypothesis.py`. Use diverse evidence selection (MMR algorithm). Sentence-aware truncation.
202
+
203
+ **Report Prompts**: In `report.py`. Include full citation details. Use diverse evidence selection (n=20). Emphasize citation validation rules.
204
+
205
+ ---
206
+
207
+ ## Testing Rules
208
+
209
+ **Structure**: Unit tests in `tests/unit/` (mocked, fast). Integration tests in `tests/integration/` (real APIs, marked `@pytest.mark.integration`).
210
+
211
+ **Mocking**: Use `respx` for httpx mocking. Use `pytest-mock` for general mocking. Mock LLM calls in unit tests (use `MockJudgeHandler`).
212
+
213
+ **Fixtures**: Common fixtures in `tests/conftest.py`: `mock_httpx_client`, `mock_llm_response`.
214
+
215
+ **Coverage**: Aim for >80% coverage. Test error handling, edge cases, and integration paths.
216
+
217
+ ---
218
+
219
+ ## File-Specific Agent Rules
220
+
221
+ **knowledge_gap.py**: Outputs `KnowledgeGapOutput`. System prompt evaluates research completeness. Handles conversation history. Returns fallback on error.
222
+
223
+ **writer.py**: Returns markdown string. System prompt includes citation format examples. Validates inputs. Truncates long findings. Retry logic for transient failures.
224
+
225
+ **long_writer.py**: Uses `ReportDraft` input/output. Writes sections iteratively. Reformats references (deduplicates, renumbers). Reformats section headings.
226
+
227
+ **proofreader.py**: Takes `ReportDraft`, returns polished markdown. Removes duplicates. Adds summary. Preserves references.
228
+
229
+ **tool_selector.py**: Outputs `AgentSelectionPlan`. System prompt lists available agents (WebSearchAgent, SiteCrawlerAgent, RAGAgent). Guidelines for when to use each.
230
+
231
+ **thinking.py**: Returns observation string. Generates observations from conversation history. Uses query and background context.
232
+
233
+ **input_parser.py**: Outputs `ParsedQuery`. Detects research mode (iterative/deep). Extracts entities and research questions. Improves/refines query.
234
+
235
+
236
+
dev/Makefile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: install test lint format typecheck check clean all cov cov-html
2
+
3
+ # Default target
4
+ all: check
5
+
6
+ install:
7
+ uv sync --all-extras
8
+ uv run pre-commit install
9
+
10
+ test:
11
+ uv run pytest tests/unit/ -v -m "not openai" -p no:logfire
12
+
13
+ test-hf:
14
+ uv run pytest tests/ -v -m "huggingface" -p no:logfire
15
+
16
+ test-all:
17
+ uv run pytest tests/ -v -p no:logfire
18
+
19
+ # Coverage aliases
20
+ cov: test-cov
21
+ test-cov:
22
+ uv run pytest --cov=src --cov-report=term-missing -m "not openai" -p no:logfire
23
+
24
+ cov-html:
25
+ uv run pytest --cov=src --cov-report=html -p no:logfire
26
+ @echo "Coverage report: open htmlcov/index.html"
27
+
28
+ lint:
29
+ uv run ruff check src tests
30
+
31
+ format:
32
+ uv run ruff format src tests
33
+
34
+ typecheck:
35
+ uv run mypy src
36
+
37
+ check: lint typecheck test-cov
38
+ @echo "All checks passed!"
39
+
40
+ docs-build:
41
+ uv run mkdocs build
42
+
43
+ docs-serve:
44
+ uv run mkdocs serve
45
+
46
+ docs-clean:
47
+ rm -rf site/
48
+
49
+ clean:
50
+ rm -rf .pytest_cache .mypy_cache .ruff_cache __pycache__ .coverage htmlcov
51
+ find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
dev/docs_plugins.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Custom MkDocs extension to handle code anchor format: ```start:end:filepath"""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from markdown import Markdown
7
+ from markdown.extensions import Extension
8
+ from markdown.preprocessors import Preprocessor
9
+
10
+
11
+ class CodeAnchorPreprocessor(Preprocessor):
12
+ """Preprocess code blocks with anchor format: ```start:end:filepath"""
13
+
14
+ def __init__(self, md: Markdown, base_path: Path):
15
+ super().__init__(md)
16
+ self.base_path = base_path
17
+ self.pattern = re.compile(r"^```(\d+):(\d+):([^\n]+)\n(.*?)```$", re.MULTILINE | re.DOTALL)
18
+
19
+ def run(self, lines: list[str]) -> list[str]:
20
+ """Process lines and convert code anchor format to standard code blocks."""
21
+ text = "\n".join(lines)
22
+ new_text = self.pattern.sub(self._replace_code_anchor, text)
23
+ return new_text.split("\n")
24
+
25
+ def _replace_code_anchor(self, match) -> str:
26
+ """Replace code anchor format with standard code block + link."""
27
+ start_line = int(match.group(1))
28
+ end_line = int(match.group(2))
29
+ file_path = match.group(3).strip()
30
+ existing_code = match.group(4)
31
+
32
+ # Determine language from file extension
33
+ ext = Path(file_path).suffix.lower()
34
+ lang_map = {
35
+ ".py": "python",
36
+ ".js": "javascript",
37
+ ".ts": "typescript",
38
+ ".md": "markdown",
39
+ ".yaml": "yaml",
40
+ ".yml": "yaml",
41
+ ".toml": "toml",
42
+ ".json": "json",
43
+ ".html": "html",
44
+ ".css": "css",
45
+ ".sh": "bash",
46
+ }
47
+ language = lang_map.get(ext, "python")
48
+
49
+ # Generate GitHub link
50
+ repo_url = "https://github.com/DeepCritical/GradioDemo"
51
+ github_link = f"{repo_url}/blob/main/{file_path}#L{start_line}-L{end_line}"
52
+
53
+ # Return standard code block with source link
54
+ return (
55
+ f'[View source: `{file_path}` (lines {start_line}-{end_line})]({github_link}){{: target="_blank" }}\n\n'
56
+ f"```{language}\n{existing_code}\n```"
57
+ )
58
+
59
+
60
+ class CodeAnchorExtension(Extension):
61
+ """Markdown extension for code anchors."""
62
+
63
+ def __init__(self, base_path: str = ".", **kwargs):
64
+ super().__init__(**kwargs)
65
+ self.base_path = Path(base_path)
66
+
67
+ def extendMarkdown(self, md: Markdown): # noqa: N802
68
+ """Register the preprocessor."""
69
+ md.preprocessors.register(CodeAnchorPreprocessor(md, self.base_path), "codeanchor", 25)
70
+
71
+
72
+ def makeExtension(**kwargs): # noqa: N802
73
+ """Create the extension."""
74
+ return CodeAnchorExtension(**kwargs)
docs/api/agents.md ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agents API Reference
2
+
3
+ This page documents the API for DeepCritical agents.
4
+
5
+ ## KnowledgeGapAgent
6
+
7
+ **Module**: `src.agents.knowledge_gap`
8
+
9
+ **Purpose**: Evaluates research state and identifies knowledge gaps.
10
+
11
+ ### Methods
12
+
13
+ #### `evaluate`
14
+
15
+ ```python
16
+ async def evaluate(
17
+ self,
18
+ query: str,
19
+ background_context: str,
20
+ conversation_history: Conversation,
21
+ iteration: int,
22
+ time_elapsed_minutes: float,
23
+ max_time_minutes: float
24
+ ) -> KnowledgeGapOutput
25
+ ```
26
+
27
+ Evaluates research completeness and identifies outstanding knowledge gaps.
28
+
29
+ **Parameters**:
30
+ - `query`: Research query string
31
+ - `background_context`: Background context for the query
32
+ - `conversation_history`: Conversation history with previous iterations
33
+ - `iteration`: Current iteration number
34
+ - `time_elapsed_minutes`: Elapsed time in minutes
35
+ - `max_time_minutes`: Maximum time limit in minutes
36
+
37
+ **Returns**: `KnowledgeGapOutput` with:
38
+ - `research_complete`: Boolean indicating if research is complete
39
+ - `outstanding_gaps`: List of remaining knowledge gaps
40
+
41
+ ## ToolSelectorAgent
42
+
43
+ **Module**: `src.agents.tool_selector`
44
+
45
+ **Purpose**: Selects appropriate tools for addressing knowledge gaps.
46
+
47
+ ### Methods
48
+
49
+ #### `select_tools`
50
+
51
+ ```python
52
+ async def select_tools(
53
+ self,
54
+ query: str,
55
+ knowledge_gaps: list[str],
56
+ available_tools: list[str]
57
+ ) -> AgentSelectionPlan
58
+ ```
59
+
60
+ Selects tools for addressing knowledge gaps.
61
+
62
+ **Parameters**:
63
+ - `query`: Research query string
64
+ - `knowledge_gaps`: List of knowledge gaps to address
65
+ - `available_tools`: List of available tool names
66
+
67
+ **Returns**: `AgentSelectionPlan` with list of `AgentTask` objects.
68
+
69
+ ## WriterAgent
70
+
71
+ **Module**: `src.agents.writer`
72
+
73
+ **Purpose**: Generates final reports from research findings.
74
+
75
+ ### Methods
76
+
77
+ #### `write_report`
78
+
79
+ ```python
80
+ async def write_report(
81
+ self,
82
+ query: str,
83
+ findings: str,
84
+ output_length: str = "medium",
85
+ output_instructions: str | None = None
86
+ ) -> str
87
+ ```
88
+
89
+ Generates a markdown report from research findings.
90
+
91
+ **Parameters**:
92
+ - `query`: Research query string
93
+ - `findings`: Research findings to include in report
94
+ - `output_length`: Desired output length ("short", "medium", "long")
95
+ - `output_instructions`: Additional instructions for report generation
96
+
97
+ **Returns**: Markdown string with numbered citations.
98
+
99
+ ## LongWriterAgent
100
+
101
+ **Module**: `src.agents.long_writer`
102
+
103
+ **Purpose**: Long-form report generation with section-by-section writing.
104
+
105
+ ### Methods
106
+
107
+ #### `write_next_section`
108
+
109
+ ```python
110
+ async def write_next_section(
111
+ self,
112
+ query: str,
113
+ draft: ReportDraft,
114
+ section_title: str,
115
+ section_content: str
116
+ ) -> LongWriterOutput
117
+ ```
118
+
119
+ Writes the next section of a long-form report.
120
+
121
+ **Parameters**:
122
+ - `query`: Research query string
123
+ - `draft`: Current report draft
124
+ - `section_title`: Title of the section to write
125
+ - `section_content`: Content/guidance for the section
126
+
127
+ **Returns**: `LongWriterOutput` with updated draft.
128
+
129
+ #### `write_report`
130
+
131
+ ```python
132
+ async def write_report(
133
+ self,
134
+ query: str,
135
+ report_title: str,
136
+ report_draft: ReportDraft
137
+ ) -> str
138
+ ```
139
+
140
+ Generates final report from draft.
141
+
142
+ **Parameters**:
143
+ - `query`: Research query string
144
+ - `report_title`: Title of the report
145
+ - `report_draft`: Complete report draft
146
+
147
+ **Returns**: Final markdown report string.
148
+
149
+ ## ProofreaderAgent
150
+
151
+ **Module**: `src.agents.proofreader`
152
+
153
+ **Purpose**: Proofreads and polishes report drafts.
154
+
155
+ ### Methods
156
+
157
+ #### `proofread`
158
+
159
+ ```python
160
+ async def proofread(
161
+ self,
162
+ query: str,
163
+ report_title: str,
164
+ report_draft: ReportDraft
165
+ ) -> str
166
+ ```
167
+
168
+ Proofreads and polishes a report draft.
169
+
170
+ **Parameters**:
171
+ - `query`: Research query string
172
+ - `report_title`: Title of the report
173
+ - `report_draft`: Report draft to proofread
174
+
175
+ **Returns**: Polished markdown string.
176
+
177
+ ## ThinkingAgent
178
+
179
+ **Module**: `src.agents.thinking`
180
+
181
+ **Purpose**: Generates observations from conversation history.
182
+
183
+ ### Methods
184
+
185
+ #### `generate_observations`
186
+
187
+ ```python
188
+ async def generate_observations(
189
+ self,
190
+ query: str,
191
+ background_context: str,
192
+ conversation_history: Conversation
193
+ ) -> str
194
+ ```
195
+
196
+ Generates observations from conversation history.
197
+
198
+ **Parameters**:
199
+ - `query`: Research query string
200
+ - `background_context`: Background context
201
+ - `conversation_history`: Conversation history
202
+
203
+ **Returns**: Observation string.
204
+
205
+ ## InputParserAgent
206
+
207
+ **Module**: `src.agents.input_parser`
208
+
209
+ **Purpose**: Parses and improves user queries, detects research mode.
210
+
211
+ ### Methods
212
+
213
+ #### `parse_query`
214
+
215
+ ```python
216
+ async def parse_query(
217
+ self,
218
+ query: str
219
+ ) -> ParsedQuery
220
+ ```
221
+
222
+ Parses and improves a user query.
223
+
224
+ **Parameters**:
225
+ - `query`: Original query string
226
+
227
+ **Returns**: `ParsedQuery` with:
228
+ - `original_query`: Original query string
229
+ - `improved_query`: Refined query string
230
+ - `research_mode`: "iterative" or "deep"
231
+ - `key_entities`: List of key entities
232
+ - `research_questions`: List of research questions
233
+
234
+ ## Factory Functions
235
+
236
+ All agents have factory functions in `src.agent_factory.agents`:
237
+
238
+ ```python
239
+ def create_knowledge_gap_agent(model: Any | None = None) -> KnowledgeGapAgent
240
+ def create_tool_selector_agent(model: Any | None = None) -> ToolSelectorAgent
241
+ def create_writer_agent(model: Any | None = None) -> WriterAgent
242
+ def create_long_writer_agent(model: Any | None = None) -> LongWriterAgent
243
+ def create_proofreader_agent(model: Any | None = None) -> ProofreaderAgent
244
+ def create_thinking_agent(model: Any | None = None) -> ThinkingAgent
245
+ def create_input_parser_agent(model: Any | None = None) -> InputParserAgent
246
+ ```
247
+
248
+ **Parameters**:
249
+ - `model`: Optional Pydantic AI model. If None, uses `get_model()` from settings.
250
+
251
+ **Returns**: Agent instance.
252
+
253
+ ## See Also
254
+
255
+ - [Architecture - Agents](../architecture/agents.md) - Architecture overview
256
+ - [Models API](models.md) - Data models used by agents
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+
docs/api/models.md ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Models API Reference
2
+
3
+ This page documents the Pydantic models used throughout DeepCritical.
4
+
5
+ ## Evidence
6
+
7
+ **Module**: `src.utils.models`
8
+
9
+ **Purpose**: Represents evidence from search results.
10
+
11
+ ```python
12
+ class Evidence(BaseModel):
13
+ citation: Citation
14
+ content: str
15
+ relevance_score: float = Field(ge=0.0, le=1.0)
16
+ metadata: dict[str, Any] = Field(default_factory=dict)
17
+ ```
18
+
19
+ **Fields**:
20
+ - `citation`: Citation information (title, URL, date, authors)
21
+ - `content`: Evidence text content
22
+ - `relevance_score`: Relevance score (0.0-1.0)
23
+ - `metadata`: Additional metadata dictionary
24
+
25
+ ## Citation
26
+
27
+ **Module**: `src.utils.models`
28
+
29
+ **Purpose**: Citation information for evidence.
30
+
31
+ ```python
32
+ class Citation(BaseModel):
33
+ title: str
34
+ url: str
35
+ date: str | None = None
36
+ authors: list[str] = Field(default_factory=list)
37
+ ```
38
+
39
+ **Fields**:
40
+ - `title`: Article/trial title
41
+ - `url`: Source URL
42
+ - `date`: Publication date (optional)
43
+ - `authors`: List of authors (optional)
44
+
45
+ ## KnowledgeGapOutput
46
+
47
+ **Module**: `src.utils.models`
48
+
49
+ **Purpose**: Output from knowledge gap evaluation.
50
+
51
+ ```python
52
+ class KnowledgeGapOutput(BaseModel):
53
+ research_complete: bool
54
+ outstanding_gaps: list[str] = Field(default_factory=list)
55
+ ```
56
+
57
+ **Fields**:
58
+ - `research_complete`: Boolean indicating if research is complete
59
+ - `outstanding_gaps`: List of remaining knowledge gaps
60
+
61
+ ## AgentSelectionPlan
62
+
63
+ **Module**: `src.utils.models`
64
+
65
+ **Purpose**: Plan for tool/agent selection.
66
+
67
+ ```python
68
+ class AgentSelectionPlan(BaseModel):
69
+ tasks: list[AgentTask] = Field(default_factory=list)
70
+ ```
71
+
72
+ **Fields**:
73
+ - `tasks`: List of agent tasks to execute
74
+
75
+ ## AgentTask
76
+
77
+ **Module**: `src.utils.models`
78
+
79
+ **Purpose**: Individual agent task.
80
+
81
+ ```python
82
+ class AgentTask(BaseModel):
83
+ agent_name: str
84
+ query: str
85
+ context: dict[str, Any] = Field(default_factory=dict)
86
+ ```
87
+
88
+ **Fields**:
89
+ - `agent_name`: Name of agent to use
90
+ - `query`: Task query
91
+ - `context`: Additional context dictionary
92
+
93
+ ## ReportDraft
94
+
95
+ **Module**: `src.utils.models`
96
+
97
+ **Purpose**: Draft structure for long-form reports.
98
+
99
+ ```python
100
+ class ReportDraft(BaseModel):
101
+ title: str
102
+ sections: list[ReportSection] = Field(default_factory=list)
103
+ references: list[Citation] = Field(default_factory=list)
104
+ ```
105
+
106
+ **Fields**:
107
+ - `title`: Report title
108
+ - `sections`: List of report sections
109
+ - `references`: List of citations
110
+
111
+ ## ReportSection
112
+
113
+ **Module**: `src.utils.models`
114
+
115
+ **Purpose**: Individual section in a report draft.
116
+
117
+ ```python
118
+ class ReportSection(BaseModel):
119
+ title: str
120
+ content: str
121
+ order: int
122
+ ```
123
+
124
+ **Fields**:
125
+ - `title`: Section title
126
+ - `content`: Section content
127
+ - `order`: Section order number
128
+
129
+ ## ParsedQuery
130
+
131
+ **Module**: `src.utils.models`
132
+
133
+ **Purpose**: Parsed and improved query.
134
+
135
+ ```python
136
+ class ParsedQuery(BaseModel):
137
+ original_query: str
138
+ improved_query: str
139
+ research_mode: Literal["iterative", "deep"]
140
+ key_entities: list[str] = Field(default_factory=list)
141
+ research_questions: list[str] = Field(default_factory=list)
142
+ ```
143
+
144
+ **Fields**:
145
+ - `original_query`: Original query string
146
+ - `improved_query`: Refined query string
147
+ - `research_mode`: Research mode ("iterative" or "deep")
148
+ - `key_entities`: List of key entities
149
+ - `research_questions`: List of research questions
150
+
151
+ ## Conversation
152
+
153
+ **Module**: `src.utils.models`
154
+
155
+ **Purpose**: Conversation history with iterations.
156
+
157
+ ```python
158
+ class Conversation(BaseModel):
159
+ iterations: list[IterationData] = Field(default_factory=list)
160
+ ```
161
+
162
+ **Fields**:
163
+ - `iterations`: List of iteration data
164
+
165
+ ## IterationData
166
+
167
+ **Module**: `src.utils.models`
168
+
169
+ **Purpose**: Data for a single iteration.
170
+
171
+ ```python
172
+ class IterationData(BaseModel):
173
+ iteration: int
174
+ observations: str | None = None
175
+ knowledge_gaps: list[str] = Field(default_factory=list)
176
+ tool_calls: list[dict[str, Any]] = Field(default_factory=list)
177
+ findings: str | None = None
178
+ thoughts: str | None = None
179
+ ```
180
+
181
+ **Fields**:
182
+ - `iteration`: Iteration number
183
+ - `observations`: Generated observations
184
+ - `knowledge_gaps`: Identified knowledge gaps
185
+ - `tool_calls`: Tool calls made
186
+ - `findings`: Findings from tools
187
+ - `thoughts`: Agent thoughts
188
+
189
+ ## AgentEvent
190
+
191
+ **Module**: `src.utils.models`
192
+
193
+ **Purpose**: Event emitted during research execution.
194
+
195
+ ```python
196
+ class AgentEvent(BaseModel):
197
+ type: str
198
+ iteration: int | None = None
199
+ data: dict[str, Any] = Field(default_factory=dict)
200
+ ```
201
+
202
+ **Fields**:
203
+ - `type`: Event type (e.g., "started", "search_complete", "complete")
204
+ - `iteration`: Iteration number (optional)
205
+ - `data`: Event data dictionary
206
+
207
+ ## BudgetStatus
208
+
209
+ **Module**: `src.utils.models`
210
+
211
+ **Purpose**: Current budget status.
212
+
213
+ ```python
214
+ class BudgetStatus(BaseModel):
215
+ tokens_used: int
216
+ tokens_limit: int
217
+ time_elapsed_seconds: float
218
+ time_limit_seconds: float
219
+ iterations: int
220
+ iterations_limit: int
221
+ ```
222
+
223
+ **Fields**:
224
+ - `tokens_used`: Tokens used so far
225
+ - `tokens_limit`: Token limit
226
+ - `time_elapsed_seconds`: Elapsed time in seconds
227
+ - `time_limit_seconds`: Time limit in seconds
228
+ - `iterations`: Current iteration count
229
+ - `iterations_limit`: Iteration limit
230
+
231
+ ## See Also
232
+
233
+ - [Architecture - Agents](../architecture/agents.md) - How models are used
234
+ - [Configuration](../configuration/index.md) - Model configuration
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
docs/api/orchestrators.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Orchestrators API Reference
2
+
3
+ This page documents the API for DeepCritical orchestrators.
4
+
5
+ ## IterativeResearchFlow
6
+
7
+ **Module**: `src.orchestrator.research_flow`
8
+
9
+ **Purpose**: Single-loop research with search-judge-synthesize cycles.
10
+
11
+ ### Methods
12
+
13
+ #### `run`
14
+
15
+ ```python
16
+ async def run(
17
+ self,
18
+ query: str,
19
+ background_context: str = "",
20
+ max_iterations: int | None = None,
21
+ max_time_minutes: float | None = None,
22
+ token_budget: int | None = None
23
+ ) -> AsyncGenerator[AgentEvent, None]
24
+ ```
25
+
26
+ Runs iterative research flow.
27
+
28
+ **Parameters**:
29
+ - `query`: Research query string
30
+ - `background_context`: Background context (default: "")
31
+ - `max_iterations`: Maximum iterations (default: from settings)
32
+ - `max_time_minutes`: Maximum time in minutes (default: from settings)
33
+ - `token_budget`: Token budget (default: from settings)
34
+
35
+ **Yields**: `AgentEvent` objects for:
36
+ - `started`: Research started
37
+ - `search_complete`: Search completed
38
+ - `judge_complete`: Evidence evaluation completed
39
+ - `synthesizing`: Generating report
40
+ - `complete`: Research completed
41
+ - `error`: Error occurred
42
+
43
+ ## DeepResearchFlow
44
+
45
+ **Module**: `src.orchestrator.research_flow`
46
+
47
+ **Purpose**: Multi-section parallel research with planning and synthesis.
48
+
49
+ ### Methods
50
+
51
+ #### `run`
52
+
53
+ ```python
54
+ async def run(
55
+ self,
56
+ query: str,
57
+ background_context: str = "",
58
+ max_iterations_per_section: int | None = None,
59
+ max_time_minutes: float | None = None,
60
+ token_budget: int | None = None
61
+ ) -> AsyncGenerator[AgentEvent, None]
62
+ ```
63
+
64
+ Runs deep research flow.
65
+
66
+ **Parameters**:
67
+ - `query`: Research query string
68
+ - `background_context`: Background context (default: "")
69
+ - `max_iterations_per_section`: Maximum iterations per section (default: from settings)
70
+ - `max_time_minutes`: Maximum time in minutes (default: from settings)
71
+ - `token_budget`: Token budget (default: from settings)
72
+
73
+ **Yields**: `AgentEvent` objects for:
74
+ - `started`: Research started
75
+ - `planning`: Creating research plan
76
+ - `looping`: Running parallel research loops
77
+ - `synthesizing`: Synthesizing results
78
+ - `complete`: Research completed
79
+ - `error`: Error occurred
80
+
81
+ ## GraphOrchestrator
82
+
83
+ **Module**: `src.orchestrator.graph_orchestrator`
84
+
85
+ **Purpose**: Graph-based execution using Pydantic AI agents as nodes.
86
+
87
+ ### Methods
88
+
89
+ #### `run`
90
+
91
+ ```python
92
+ async def run(
93
+ self,
94
+ query: str,
95
+ research_mode: str = "auto",
96
+ use_graph: bool = True
97
+ ) -> AsyncGenerator[AgentEvent, None]
98
+ ```
99
+
100
+ Runs graph-based research orchestration.
101
+
102
+ **Parameters**:
103
+ - `query`: Research query string
104
+ - `research_mode`: Research mode ("iterative", "deep", or "auto")
105
+ - `use_graph`: Whether to use graph execution (default: True)
106
+
107
+ **Yields**: `AgentEvent` objects during graph execution.
108
+
109
+ ## Orchestrator Factory
110
+
111
+ **Module**: `src.orchestrator_factory`
112
+
113
+ **Purpose**: Factory for creating orchestrators.
114
+
115
+ ### Functions
116
+
117
+ #### `create_orchestrator`
118
+
119
+ ```python
120
+ def create_orchestrator(
121
+ search_handler: SearchHandlerProtocol,
122
+ judge_handler: JudgeHandlerProtocol,
123
+ config: dict[str, Any],
124
+ mode: str | None = None
125
+ ) -> Any
126
+ ```
127
+
128
+ Creates an orchestrator instance.
129
+
130
+ **Parameters**:
131
+ - `search_handler`: Search handler protocol implementation
132
+ - `judge_handler`: Judge handler protocol implementation
133
+ - `config`: Configuration dictionary
134
+ - `mode`: Orchestrator mode ("simple", "advanced", "magentic", or None for auto-detect)
135
+
136
+ **Returns**: Orchestrator instance.
137
+
138
+ **Raises**:
139
+ - `ValueError`: If requirements not met
140
+
141
+ **Modes**:
142
+ - `"simple"`: Legacy orchestrator
143
+ - `"advanced"` or `"magentic"`: Magentic orchestrator (requires OpenAI API key)
144
+ - `None`: Auto-detect based on API key availability
145
+
146
+ ## MagenticOrchestrator
147
+
148
+ **Module**: `src.orchestrator_magentic`
149
+
150
+ **Purpose**: Multi-agent coordination using Microsoft Agent Framework.
151
+
152
+ ### Methods
153
+
154
+ #### `run`
155
+
156
+ ```python
157
+ async def run(
158
+ self,
159
+ query: str,
160
+ max_rounds: int = 15,
161
+ max_stalls: int = 3
162
+ ) -> AsyncGenerator[AgentEvent, None]
163
+ ```
164
+
165
+ Runs Magentic orchestration.
166
+
167
+ **Parameters**:
168
+ - `query`: Research query string
169
+ - `max_rounds`: Maximum rounds (default: 15)
170
+ - `max_stalls`: Maximum stalls before reset (default: 3)
171
+
172
+ **Yields**: `AgentEvent` objects converted from Magentic events.
173
+
174
+ **Requirements**:
175
+ - `agent-framework-core` package
176
+ - OpenAI API key
177
+
178
+ ## See Also
179
+
180
+ - [Architecture - Orchestrators](../architecture/orchestrators.md) - Architecture overview
181
+ - [Graph Orchestration](../architecture/graph-orchestration.md) - Graph execution details
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
docs/api/services.md ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Services API Reference
2
+
3
+ This page documents the API for DeepCritical services.
4
+
5
+ ## EmbeddingService
6
+
7
+ **Module**: `src.services.embeddings`
8
+
9
+ **Purpose**: Local sentence-transformers for semantic search and deduplication.
10
+
11
+ ### Methods
12
+
13
+ #### `embed`
14
+
15
+ ```python
16
+ async def embed(self, text: str) -> list[float]
17
+ ```
18
+
19
+ Generates embedding for a text string.
20
+
21
+ **Parameters**:
22
+ - `text`: Text to embed
23
+
24
+ **Returns**: Embedding vector as list of floats.
25
+
26
+ #### `embed_batch`
27
+
28
+ ```python
29
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]
30
+ ```
31
+
32
+ Generates embeddings for multiple texts.
33
+
34
+ **Parameters**:
35
+ - `texts`: List of texts to embed
36
+
37
+ **Returns**: List of embedding vectors.
38
+
39
+ #### `similarity`
40
+
41
+ ```python
42
+ async def similarity(self, text1: str, text2: str) -> float
43
+ ```
44
+
45
+ Calculates similarity between two texts.
46
+
47
+ **Parameters**:
48
+ - `text1`: First text
49
+ - `text2`: Second text
50
+
51
+ **Returns**: Similarity score (0.0-1.0).
52
+
53
+ #### `find_duplicates`
54
+
55
+ ```python
56
+ async def find_duplicates(
57
+ self,
58
+ texts: list[str],
59
+ threshold: float = 0.85
60
+ ) -> list[tuple[int, int]]
61
+ ```
62
+
63
+ Finds duplicate texts based on similarity threshold.
64
+
65
+ **Parameters**:
66
+ - `texts`: List of texts to check
67
+ - `threshold`: Similarity threshold (default: 0.85)
68
+
69
+ **Returns**: List of (index1, index2) tuples for duplicate pairs.
70
+
71
+ ### Factory Function
72
+
73
+ #### `get_embedding_service`
74
+
75
+ ```python
76
+ @lru_cache(maxsize=1)
77
+ def get_embedding_service() -> EmbeddingService
78
+ ```
79
+
80
+ Returns singleton EmbeddingService instance.
81
+
82
+ ## LlamaIndexRAGService
83
+
84
+ **Module**: `src.services.rag`
85
+
86
+ **Purpose**: Retrieval-Augmented Generation using LlamaIndex.
87
+
88
+ ### Methods
89
+
90
+ #### `ingest_evidence`
91
+
92
+ ```python
93
+ async def ingest_evidence(self, evidence: list[Evidence]) -> None
94
+ ```
95
+
96
+ Ingests evidence into RAG service.
97
+
98
+ **Parameters**:
99
+ - `evidence`: List of Evidence objects to ingest
100
+
101
+ **Note**: Requires OpenAI API key for embeddings.
102
+
103
+ #### `retrieve`
104
+
105
+ ```python
106
+ async def retrieve(
107
+ self,
108
+ query: str,
109
+ top_k: int = 5
110
+ ) -> list[Document]
111
+ ```
112
+
113
+ Retrieves relevant documents for a query.
114
+
115
+ **Parameters**:
116
+ - `query`: Search query string
117
+ - `top_k`: Number of top results to return (default: 5)
118
+
119
+ **Returns**: List of Document objects with metadata.
120
+
121
+ #### `query`
122
+
123
+ ```python
124
+ async def query(
125
+ self,
126
+ query: str,
127
+ top_k: int = 5
128
+ ) -> str
129
+ ```
130
+
131
+ Queries RAG service and returns formatted results.
132
+
133
+ **Parameters**:
134
+ - `query`: Search query string
135
+ - `top_k`: Number of top results to return (default: 5)
136
+
137
+ **Returns**: Formatted query results as string.
138
+
139
+ ### Factory Function
140
+
141
+ #### `get_rag_service`
142
+
143
+ ```python
144
+ @lru_cache(maxsize=1)
145
+ def get_rag_service() -> LlamaIndexRAGService | None
146
+ ```
147
+
148
+ Returns singleton LlamaIndexRAGService instance, or None if OpenAI key not available.
149
+
150
+ ## StatisticalAnalyzer
151
+
152
+ **Module**: `src.services.statistical_analyzer`
153
+
154
+ **Purpose**: Secure execution of AI-generated statistical code.
155
+
156
+ ### Methods
157
+
158
+ #### `analyze`
159
+
160
+ ```python
161
+ async def analyze(
162
+ self,
163
+ hypothesis: str,
164
+ evidence: list[Evidence],
165
+ data_description: str | None = None
166
+ ) -> AnalysisResult
167
+ ```
168
+
169
+ Analyzes a hypothesis using statistical methods.
170
+
171
+ **Parameters**:
172
+ - `hypothesis`: Hypothesis to analyze
173
+ - `evidence`: List of Evidence objects
174
+ - `data_description`: Optional data description
175
+
176
+ **Returns**: `AnalysisResult` with:
177
+ - `verdict`: SUPPORTED, REFUTED, or INCONCLUSIVE
178
+ - `code`: Generated analysis code
179
+ - `output`: Execution output
180
+ - `error`: Error message if execution failed
181
+
182
+ **Note**: Requires Modal credentials for sandbox execution.
183
+
184
+ ## See Also
185
+
186
+ - [Architecture - Services](../architecture/services.md) - Architecture overview
187
+ - [Configuration](../configuration/index.md) - Service configuration
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
docs/api/tools.md ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tools API Reference
2
+
3
+ This page documents the API for DeepCritical search tools.
4
+
5
+ ## SearchTool Protocol
6
+
7
+ All tools implement the `SearchTool` protocol:
8
+
9
+ ```python
10
+ class SearchTool(Protocol):
11
+ @property
12
+ def name(self) -> str: ...
13
+
14
+ async def search(
15
+ self,
16
+ query: str,
17
+ max_results: int = 10
18
+ ) -> list[Evidence]: ...
19
+ ```
20
+
21
+ ## PubMedTool
22
+
23
+ **Module**: `src.tools.pubmed`
24
+
25
+ **Purpose**: Search peer-reviewed biomedical literature from PubMed.
26
+
27
+ ### Properties
28
+
29
+ #### `name`
30
+
31
+ ```python
32
+ @property
33
+ def name(self) -> str
34
+ ```
35
+
36
+ Returns tool name: `"pubmed"`
37
+
38
+ ### Methods
39
+
40
+ #### `search`
41
+
42
+ ```python
43
+ async def search(
44
+ self,
45
+ query: str,
46
+ max_results: int = 10
47
+ ) -> list[Evidence]
48
+ ```
49
+
50
+ Searches PubMed for articles.
51
+
52
+ **Parameters**:
53
+ - `query`: Search query string
54
+ - `max_results`: Maximum number of results to return (default: 10)
55
+
56
+ **Returns**: List of `Evidence` objects with PubMed articles.
57
+
58
+ **Raises**:
59
+ - `SearchError`: If search fails
60
+ - `RateLimitError`: If rate limit is exceeded
61
+
62
+ ## ClinicalTrialsTool
63
+
64
+ **Module**: `src.tools.clinicaltrials`
65
+
66
+ **Purpose**: Search ClinicalTrials.gov for interventional studies.
67
+
68
+ ### Properties
69
+
70
+ #### `name`
71
+
72
+ ```python
73
+ @property
74
+ def name(self) -> str
75
+ ```
76
+
77
+ Returns tool name: `"clinicaltrials"`
78
+
79
+ ### Methods
80
+
81
+ #### `search`
82
+
83
+ ```python
84
+ async def search(
85
+ self,
86
+ query: str,
87
+ max_results: int = 10
88
+ ) -> list[Evidence]
89
+ ```
90
+
91
+ Searches ClinicalTrials.gov for trials.
92
+
93
+ **Parameters**:
94
+ - `query`: Search query string
95
+ - `max_results`: Maximum number of results to return (default: 10)
96
+
97
+ **Returns**: List of `Evidence` objects with clinical trials.
98
+
99
+ **Note**: Only returns interventional studies with status: COMPLETED, ACTIVE_NOT_RECRUITING, RECRUITING, ENROLLING_BY_INVITATION
100
+
101
+ **Raises**:
102
+ - `SearchError`: If search fails
103
+
104
+ ## EuropePMCTool
105
+
106
+ **Module**: `src.tools.europepmc`
107
+
108
+ **Purpose**: Search Europe PMC for preprints and peer-reviewed articles.
109
+
110
+ ### Properties
111
+
112
+ #### `name`
113
+
114
+ ```python
115
+ @property
116
+ def name(self) -> str
117
+ ```
118
+
119
+ Returns tool name: `"europepmc"`
120
+
121
+ ### Methods
122
+
123
+ #### `search`
124
+
125
+ ```python
126
+ async def search(
127
+ self,
128
+ query: str,
129
+ max_results: int = 10
130
+ ) -> list[Evidence]
131
+ ```
132
+
133
+ Searches Europe PMC for articles and preprints.
134
+
135
+ **Parameters**:
136
+ - `query`: Search query string
137
+ - `max_results`: Maximum number of results to return (default: 10)
138
+
139
+ **Returns**: List of `Evidence` objects with articles/preprints.
140
+
141
+ **Note**: Includes both preprints (marked with `[PREPRINT - Not peer-reviewed]`) and peer-reviewed articles.
142
+
143
+ **Raises**:
144
+ - `SearchError`: If search fails
145
+
146
+ ## RAGTool
147
+
148
+ **Module**: `src.tools.rag_tool`
149
+
150
+ **Purpose**: Semantic search within collected evidence.
151
+
152
+ ### Properties
153
+
154
+ #### `name`
155
+
156
+ ```python
157
+ @property
158
+ def name(self) -> str
159
+ ```
160
+
161
+ Returns tool name: `"rag"`
162
+
163
+ ### Methods
164
+
165
+ #### `search`
166
+
167
+ ```python
168
+ async def search(
169
+ self,
170
+ query: str,
171
+ max_results: int = 10
172
+ ) -> list[Evidence]
173
+ ```
174
+
175
+ Searches collected evidence using semantic similarity.
176
+
177
+ **Parameters**:
178
+ - `query`: Search query string
179
+ - `max_results`: Maximum number of results to return (default: 10)
180
+
181
+ **Returns**: List of `Evidence` objects from collected evidence.
182
+
183
+ **Note**: Requires evidence to be ingested into RAG service first.
184
+
185
+ ## SearchHandler
186
+
187
+ **Module**: `src.tools.search_handler`
188
+
189
+ **Purpose**: Orchestrates parallel searches across multiple tools.
190
+
191
+ ### Methods
192
+
193
+ #### `search`
194
+
195
+ ```python
196
+ async def search(
197
+ self,
198
+ query: str,
199
+ tools: list[SearchTool] | None = None,
200
+ max_results_per_tool: int = 10
201
+ ) -> SearchResult
202
+ ```
203
+
204
+ Searches multiple tools in parallel.
205
+
206
+ **Parameters**:
207
+ - `query`: Search query string
208
+ - `tools`: List of tools to use (default: all available tools)
209
+ - `max_results_per_tool`: Maximum results per tool (default: 10)
210
+
211
+ **Returns**: `SearchResult` with:
212
+ - `evidence`: Aggregated list of evidence
213
+ - `tool_results`: Results per tool
214
+ - `total_count`: Total number of results
215
+
216
+ **Note**: Uses `asyncio.gather()` for parallel execution. Handles tool failures gracefully.
217
+
218
+ ## See Also
219
+
220
+ - [Architecture - Tools](../architecture/tools.md) - Architecture overview
221
+ - [Models API](models.md) - Data models used by tools
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
docs/architecture/agents.md ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agents Architecture
2
+
3
+ DeepCritical uses Pydantic AI agents for all AI-powered operations. All agents follow a consistent pattern and use structured output types.
4
+
5
+ ## Agent Pattern
6
+
7
+ All agents use the Pydantic AI `Agent` class with the following structure:
8
+
9
+ - **System Prompt**: Module-level constant with date injection
10
+ - **Agent Class**: `__init__(model: Any | None = None)`
11
+ - **Main Method**: Async method (e.g., `async def evaluate()`, `async def write_report()`)
12
+ - **Factory Function**: `def create_agent_name(model: Any | None = None) -> AgentName`
13
+
14
+ ## Model Initialization
15
+
16
+ Agents use `get_model()` from `src/agent_factory/judges.py` if no model is provided. This supports:
17
+
18
+ - OpenAI models
19
+ - Anthropic models
20
+ - HuggingFace Inference API models
21
+
22
+ The model selection is based on the configured `LLM_PROVIDER` in settings.
23
+
24
+ ## Error Handling
25
+
26
+ Agents return fallback values on failure rather than raising exceptions:
27
+
28
+ - `KnowledgeGapOutput(research_complete=False, outstanding_gaps=[...])`
29
+ - Empty strings for text outputs
30
+ - Default structured outputs
31
+
32
+ All errors are logged with context using structlog.
33
+
34
+ ## Input Validation
35
+
36
+ All agents validate inputs:
37
+
38
+ - Check that queries/inputs are not empty
39
+ - Truncate very long inputs with warnings
40
+ - Handle None values gracefully
41
+
42
+ ## Output Types
43
+
44
+ Agents use structured output types from `src/utils/models.py`:
45
+
46
+ - `KnowledgeGapOutput`: Research completeness evaluation
47
+ - `AgentSelectionPlan`: Tool selection plan
48
+ - `ReportDraft`: Long-form report structure
49
+ - `ParsedQuery`: Query parsing and mode detection
50
+
51
+ For text output (writer agents), agents return `str` directly.
52
+
53
+ ## Agent Types
54
+
55
+ ### Knowledge Gap Agent
56
+
57
+ **File**: `src/agents/knowledge_gap.py`
58
+
59
+ **Purpose**: Evaluates research state and identifies knowledge gaps.
60
+
61
+ **Output**: `KnowledgeGapOutput` with:
62
+ - `research_complete`: Boolean indicating if research is complete
63
+ - `outstanding_gaps`: List of remaining knowledge gaps
64
+
65
+ **Methods**:
66
+ - `async def evaluate(query, background_context, conversation_history, iteration, time_elapsed_minutes, max_time_minutes) -> KnowledgeGapOutput`
67
+
68
+ ### Tool Selector Agent
69
+
70
+ **File**: `src/agents/tool_selector.py`
71
+
72
+ **Purpose**: Selects appropriate tools for addressing knowledge gaps.
73
+
74
+ **Output**: `AgentSelectionPlan` with list of `AgentTask` objects.
75
+
76
+ **Available Agents**:
77
+ - `WebSearchAgent`: General web search for fresh information
78
+ - `SiteCrawlerAgent`: Research specific entities/companies
79
+ - `RAGAgent`: Semantic search within collected evidence
80
+
81
+ ### Writer Agent
82
+
83
+ **File**: `src/agents/writer.py`
84
+
85
+ **Purpose**: Generates final reports from research findings.
86
+
87
+ **Output**: Markdown string with numbered citations.
88
+
89
+ **Methods**:
90
+ - `async def write_report(query, findings, output_length, output_instructions) -> str`
91
+
92
+ **Features**:
93
+ - Validates inputs
94
+ - Truncates very long findings (max 50000 chars) with warning
95
+ - Retry logic for transient failures (3 retries)
96
+ - Citation validation before returning
97
+
98
+ ### Long Writer Agent
99
+
100
+ **File**: `src/agents/long_writer.py`
101
+
102
+ **Purpose**: Long-form report generation with section-by-section writing.
103
+
104
+ **Input/Output**: Uses `ReportDraft` models.
105
+
106
+ **Methods**:
107
+ - `async def write_next_section(query, draft, section_title, section_content) -> LongWriterOutput`
108
+ - `async def write_report(query, report_title, report_draft) -> str`
109
+
110
+ **Features**:
111
+ - Writes sections iteratively
112
+ - Aggregates references across sections
113
+ - Reformats section headings and references
114
+ - Deduplicates and renumbers references
115
+
116
+ ### Proofreader Agent
117
+
118
+ **File**: `src/agents/proofreader.py`
119
+
120
+ **Purpose**: Proofreads and polishes report drafts.
121
+
122
+ **Input**: `ReportDraft`
123
+ **Output**: Polished markdown string
124
+
125
+ **Methods**:
126
+ - `async def proofread(query, report_title, report_draft) -> str`
127
+
128
+ **Features**:
129
+ - Removes duplicate content across sections
130
+ - Adds executive summary if multiple sections
131
+ - Preserves all references and citations
132
+ - Improves flow and readability
133
+
134
+ ### Thinking Agent
135
+
136
+ **File**: `src/agents/thinking.py`
137
+
138
+ **Purpose**: Generates observations from conversation history.
139
+
140
+ **Output**: Observation string
141
+
142
+ **Methods**:
143
+ - `async def generate_observations(query, background_context, conversation_history) -> str`
144
+
145
+ ### Input Parser Agent
146
+
147
+ **File**: `src/agents/input_parser.py`
148
+
149
+ **Purpose**: Parses and improves user queries, detects research mode.
150
+
151
+ **Output**: `ParsedQuery` with:
152
+ - `original_query`: Original query string
153
+ - `improved_query`: Refined query string
154
+ - `research_mode`: "iterative" or "deep"
155
+ - `key_entities`: List of key entities
156
+ - `research_questions`: List of research questions
157
+
158
+ ## Factory Functions
159
+
160
+ All agents have factory functions in `src/agent_factory/agents.py`:
161
+
162
+ ```python
163
+ def create_knowledge_gap_agent(model: Any | None = None) -> KnowledgeGapAgent
164
+ def create_tool_selector_agent(model: Any | None = None) -> ToolSelectorAgent
165
+ def create_writer_agent(model: Any | None = None) -> WriterAgent
166
+ # ... etc
167
+ ```
168
+
169
+ Factory functions:
170
+ - Use `get_model()` if no model provided
171
+ - Raise `ConfigurationError` if creation fails
172
+ - Log agent creation
173
+
174
+ ## See Also
175
+
176
+ - [Orchestrators](orchestrators.md) - How agents are orchestrated
177
+ - [API Reference - Agents](../api/agents.md) - API documentation
178
+ - [Contributing - Code Style](../contributing/code-style.md) - Development guidelines
179
+
180
+
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
docs/architecture/graph-orchestration.md ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Graph Orchestration Architecture
2
+
3
+ ## Overview
4
+
5
+ Phase 4 implements a graph-based orchestration system for research workflows using Pydantic AI agents as nodes. This enables better parallel execution, conditional routing, and state management compared to simple agent chains.
6
+
7
+ ## Graph Structure
8
+
9
+ ### Nodes
10
+
11
+ Graph nodes represent different stages in the research workflow:
12
+
13
+ 1. **Agent Nodes**: Execute Pydantic AI agents
14
+ - Input: Prompt/query
15
+ - Output: Structured or unstructured response
16
+ - Examples: `KnowledgeGapAgent`, `ToolSelectorAgent`, `ThinkingAgent`
17
+
18
+ 2. **State Nodes**: Update or read workflow state
19
+ - Input: Current state
20
+ - Output: Updated state
21
+ - Examples: Update evidence, update conversation history
22
+
23
+ 3. **Decision Nodes**: Make routing decisions based on conditions
24
+ - Input: Current state/results
25
+ - Output: Next node ID
26
+ - Examples: Continue research vs. complete research
27
+
28
+ 4. **Parallel Nodes**: Execute multiple nodes concurrently
29
+ - Input: List of node IDs
30
+ - Output: Aggregated results
31
+ - Examples: Parallel iterative research loops
32
+
33
+ ### Edges
34
+
35
+ Edges define transitions between nodes:
36
+
37
+ 1. **Sequential Edges**: Always traversed (no condition)
38
+ - From: Source node
39
+ - To: Target node
40
+ - Condition: None (always True)
41
+
42
+ 2. **Conditional Edges**: Traversed based on condition
43
+ - From: Source node
44
+ - To: Target node
45
+ - Condition: Callable that returns bool
46
+ - Example: If research complete → go to writer, else → continue loop
47
+
48
+ 3. **Parallel Edges**: Used for parallel execution branches
49
+ - From: Parallel node
50
+ - To: Multiple target nodes
51
+ - Execution: All targets run concurrently
52
+
53
+ ## Graph Patterns
54
+
55
+ ### Iterative Research Graph
56
+
57
+ ```
58
+ [Input] → [Thinking] → [Knowledge Gap] → [Decision: Complete?]
59
+ ↓ No ↓ Yes
60
+ [Tool Selector] [Writer]
61
+
62
+ [Execute Tools] → [Loop Back]
63
+ ```
64
+
65
+ ### Deep Research Graph
66
+
67
+ ```
68
+ [Input] → [Planner] → [Parallel Iterative Loops] → [Synthesizer]
69
+ ↓ ↓ ↓
70
+ [Loop1] [Loop2] [Loop3]
71
+ ```
72
+
73
+ ## State Management
74
+
75
+ State is managed via `WorkflowState` using `ContextVar` for thread-safe isolation:
76
+
77
+ - **Evidence**: Collected evidence from searches
78
+ - **Conversation**: Iteration history (gaps, tool calls, findings, thoughts)
79
+ - **Embedding Service**: For semantic search
80
+
81
+ State transitions occur at state nodes, which update the global workflow state.
82
+
83
+ ## Execution Flow
84
+
85
+ 1. **Graph Construction**: Build graph from nodes and edges
86
+ 2. **Graph Validation**: Ensure graph is valid (no cycles, all nodes reachable)
87
+ 3. **Graph Execution**: Traverse graph from entry node
88
+ 4. **Node Execution**: Execute each node based on type
89
+ 5. **Edge Evaluation**: Determine next node(s) based on edges
90
+ 6. **Parallel Execution**: Use `asyncio.gather()` for parallel nodes
91
+ 7. **State Updates**: Update state at state nodes
92
+ 8. **Event Streaming**: Yield events during execution for UI
93
+
94
+ ## Conditional Routing
95
+
96
+ Decision nodes evaluate conditions and return next node IDs:
97
+
98
+ - **Knowledge Gap Decision**: If `research_complete` → writer, else → tool selector
99
+ - **Budget Decision**: If budget exceeded → exit, else → continue
100
+ - **Iteration Decision**: If max iterations → exit, else → continue
101
+
102
+ ## Parallel Execution
103
+
104
+ Parallel nodes execute multiple nodes concurrently:
105
+
106
+ - Each parallel branch runs independently
107
+ - Results are aggregated after all branches complete
108
+ - State is synchronized after parallel execution
109
+ - Errors in one branch don't stop other branches
110
+
111
+ ## Budget Enforcement
112
+
113
+ Budget constraints are enforced at decision nodes:
114
+
115
+ - **Token Budget**: Track LLM token usage
116
+ - **Time Budget**: Track elapsed time
117
+ - **Iteration Budget**: Track iteration count
118
+
119
+ If any budget is exceeded, execution routes to exit node.
120
+
121
+ ## Error Handling
122
+
123
+ Errors are handled at multiple levels:
124
+
125
+ 1. **Node Level**: Catch errors in individual node execution
126
+ 2. **Graph Level**: Handle errors during graph traversal
127
+ 3. **State Level**: Rollback state changes on error
128
+
129
+ Errors are logged and yield error events for UI.
130
+
131
+ ## Backward Compatibility
132
+
133
+ Graph execution is optional via feature flag:
134
+
135
+ - `USE_GRAPH_EXECUTION=true`: Use graph-based execution
136
+ - `USE_GRAPH_EXECUTION=false`: Use agent chain execution (existing)
137
+
138
+ This allows gradual migration and fallback if needed.
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
docs/architecture/graph_orchestration.md ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Graph Orchestration Architecture
2
+
3
+ ## Overview
4
+
5
+ Phase 4 implements a graph-based orchestration system for research workflows using Pydantic AI agents as nodes. This enables better parallel execution, conditional routing, and state management compared to simple agent chains.
6
+
7
+ ## Graph Structure
8
+
9
+ ### Nodes
10
+
11
+ Graph nodes represent different stages in the research workflow:
12
+
13
+ 1. **Agent Nodes**: Execute Pydantic AI agents
14
+ - Input: Prompt/query
15
+ - Output: Structured or unstructured response
16
+ - Examples: `KnowledgeGapAgent`, `ToolSelectorAgent`, `ThinkingAgent`
17
+
18
+ 2. **State Nodes**: Update or read workflow state
19
+ - Input: Current state
20
+ - Output: Updated state
21
+ - Examples: Update evidence, update conversation history
22
+
23
+ 3. **Decision Nodes**: Make routing decisions based on conditions
24
+ - Input: Current state/results
25
+ - Output: Next node ID
26
+ - Examples: Continue research vs. complete research
27
+
28
+ 4. **Parallel Nodes**: Execute multiple nodes concurrently
29
+ - Input: List of node IDs
30
+ - Output: Aggregated results
31
+ - Examples: Parallel iterative research loops
32
+
33
+ ### Edges
34
+
35
+ Edges define transitions between nodes:
36
+
37
+ 1. **Sequential Edges**: Always traversed (no condition)
38
+ - From: Source node
39
+ - To: Target node
40
+ - Condition: None (always True)
41
+
42
+ 2. **Conditional Edges**: Traversed based on condition
43
+ - From: Source node
44
+ - To: Target node
45
+ - Condition: Callable that returns bool
46
+ - Example: If research complete → go to writer, else → continue loop
47
+
48
+ 3. **Parallel Edges**: Used for parallel execution branches
49
+ - From: Parallel node
50
+ - To: Multiple target nodes
51
+ - Execution: All targets run concurrently
52
+
53
+ ## Graph Patterns
54
+
55
+ ### Iterative Research Graph
56
+
57
+ ```
58
+ [Input] → [Thinking] → [Knowledge Gap] → [Decision: Complete?]
59
+ ↓ No ↓ Yes
60
+ [Tool Selector] [Writer]
61
+
62
+ [Execute Tools] → [Loop Back]
63
+ ```
64
+
65
+ ### Deep Research Graph
66
+
67
+ ```
68
+ [Input] → [Planner] → [Parallel Iterative Loops] → [Synthesizer]
69
+ ↓ ↓ ↓
70
+ [Loop1] [Loop2] [Loop3]
71
+ ```
72
+
73
+ ## State Management
74
+
75
+ State is managed via `WorkflowState` using `ContextVar` for thread-safe isolation:
76
+
77
+ - **Evidence**: Collected evidence from searches
78
+ - **Conversation**: Iteration history (gaps, tool calls, findings, thoughts)
79
+ - **Embedding Service**: For semantic search
80
+
81
+ State transitions occur at state nodes, which update the global workflow state.
82
+
83
+ ## Execution Flow
84
+
85
+ 1. **Graph Construction**: Build graph from nodes and edges
86
+ 2. **Graph Validation**: Ensure graph is valid (no cycles, all nodes reachable)
87
+ 3. **Graph Execution**: Traverse graph from entry node
88
+ 4. **Node Execution**: Execute each node based on type
89
+ 5. **Edge Evaluation**: Determine next node(s) based on edges
90
+ 6. **Parallel Execution**: Use `asyncio.gather()` for parallel nodes
91
+ 7. **State Updates**: Update state at state nodes
92
+ 8. **Event Streaming**: Yield events during execution for UI
93
+
94
+ ## Conditional Routing
95
+
96
+ Decision nodes evaluate conditions and return next node IDs:
97
+
98
+ - **Knowledge Gap Decision**: If `research_complete` → writer, else → tool selector
99
+ - **Budget Decision**: If budget exceeded → exit, else → continue
100
+ - **Iteration Decision**: If max iterations → exit, else → continue
101
+
102
+ ## Parallel Execution
103
+
104
+ Parallel nodes execute multiple nodes concurrently:
105
+
106
+ - Each parallel branch runs independently
107
+ - Results are aggregated after all branches complete
108
+ - State is synchronized after parallel execution
109
+ - Errors in one branch don't stop other branches
110
+
111
+ ## Budget Enforcement
112
+
113
+ Budget constraints are enforced at decision nodes:
114
+
115
+ - **Token Budget**: Track LLM token usage
116
+ - **Time Budget**: Track elapsed time
117
+ - **Iteration Budget**: Track iteration count
118
+
119
+ If any budget is exceeded, execution routes to exit node.
120
+
121
+ ## Error Handling
122
+
123
+ Errors are handled at multiple levels:
124
+
125
+ 1. **Node Level**: Catch errors in individual node execution
126
+ 2. **Graph Level**: Handle errors during graph traversal
127
+ 3. **State Level**: Rollback state changes on error
128
+
129
+ Errors are logged and yield error events for UI.
130
+
131
+ ## Backward Compatibility
132
+
133
+ Graph execution is optional via feature flag:
134
+
135
+ - `USE_GRAPH_EXECUTION=true`: Use graph-based execution
136
+ - `USE_GRAPH_EXECUTION=false`: Use agent chain execution (existing)
137
+
138
+ This allows gradual migration and fallback if needed.
139
+
140
+ ## See Also
141
+
142
+ - [Orchestrators](orchestrators.md) - Overview of all orchestrator patterns
143
+ - [Workflows](workflows.md) - Workflow diagrams and patterns
144
+ - [Workflow Diagrams](workflow-diagrams.md) - Detailed workflow diagrams
145
+ - [API Reference - Orchestrators](../api/orchestrators.md) - API documentation
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
docs/architecture/middleware.md ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Middleware Architecture
2
+
3
+ DeepCritical uses middleware for state management, budget tracking, and workflow coordination.
4
+
5
+ ## State Management
6
+
7
+ ### WorkflowState
8
+
9
+ **File**: `src/middleware/state_machine.py`
10
+
11
+ **Purpose**: Thread-safe state management for research workflows
12
+
13
+ **Implementation**: Uses `ContextVar` for thread-safe isolation
14
+
15
+ **State Components**:
16
+ - `evidence: list[Evidence]`: Collected evidence from searches
17
+ - `conversation: Conversation`: Iteration history (gaps, tool calls, findings, thoughts)
18
+ - `embedding_service: Any`: Embedding service for semantic search
19
+
20
+ **Methods**:
21
+ - `add_evidence(evidence: Evidence)`: Adds evidence with URL-based deduplication
22
+ - `async search_related(query: str, top_k: int = 5) -> list[Evidence]`: Semantic search
23
+
24
+ **Initialization**:
25
+ ```python
26
+ from src.middleware.state_machine import init_workflow_state
27
+
28
+ init_workflow_state(embedding_service)
29
+ ```
30
+
31
+ **Access**:
32
+ ```python
33
+ from src.middleware.state_machine import get_workflow_state
34
+
35
+ state = get_workflow_state() # Auto-initializes if missing
36
+ ```
37
+
38
+ ## Workflow Manager
39
+
40
+ **File**: `src/middleware/workflow_manager.py`
41
+
42
+ **Purpose**: Coordinates parallel research loops
43
+
44
+ **Methods**:
45
+ - `add_loop(loop: ResearchLoop)`: Add a research loop to manage
46
+ - `async run_loops_parallel() -> list[ResearchLoop]`: Run all loops in parallel
47
+ - `update_loop_status(loop_id: str, status: str)`: Update loop status
48
+ - `sync_loop_evidence_to_state()`: Synchronize evidence from loops to global state
49
+
50
+ **Features**:
51
+ - Uses `asyncio.gather()` for parallel execution
52
+ - Handles errors per loop (doesn't fail all if one fails)
53
+ - Tracks loop status: `pending`, `running`, `completed`, `failed`, `cancelled`
54
+ - Evidence deduplication across parallel loops
55
+
56
+ **Usage**:
57
+ ```python
58
+ from src.middleware.workflow_manager import WorkflowManager
59
+
60
+ manager = WorkflowManager()
61
+ manager.add_loop(loop1)
62
+ manager.add_loop(loop2)
63
+ completed_loops = await manager.run_loops_parallel()
64
+ ```
65
+
66
+ ## Budget Tracker
67
+
68
+ **File**: `src/middleware/budget_tracker.py`
69
+
70
+ **Purpose**: Tracks and enforces resource limits
71
+
72
+ **Budget Components**:
73
+ - **Tokens**: LLM token usage
74
+ - **Time**: Elapsed time in seconds
75
+ - **Iterations**: Number of iterations
76
+
77
+ **Methods**:
78
+ - `create_budget(token_limit, time_limit_seconds, iterations_limit) -> BudgetStatus`
79
+ - `add_tokens(tokens: int)`: Add token usage
80
+ - `start_timer()`: Start time tracking
81
+ - `update_timer()`: Update elapsed time
82
+ - `increment_iteration()`: Increment iteration count
83
+ - `check_budget() -> BudgetStatus`: Check current budget status
84
+ - `can_continue() -> bool`: Check if research can continue
85
+
86
+ **Token Estimation**:
87
+ - `estimate_tokens(text: str) -> int`: ~4 chars per token
88
+ - `estimate_llm_call_tokens(prompt: str, response: str) -> int`: Estimate LLM call tokens
89
+
90
+ **Usage**:
91
+ ```python
92
+ from src.middleware.budget_tracker import BudgetTracker
93
+
94
+ tracker = BudgetTracker()
95
+ budget = tracker.create_budget(
96
+ token_limit=100000,
97
+ time_limit_seconds=600,
98
+ iterations_limit=10
99
+ )
100
+ tracker.start_timer()
101
+ # ... research operations ...
102
+ if not tracker.can_continue():
103
+ # Budget exceeded, stop research
104
+ pass
105
+ ```
106
+
107
+ ## Models
108
+
109
+ All middleware models are defined in `src/utils/models.py`:
110
+
111
+ - `IterationData`: Data for a single iteration
112
+ - `Conversation`: Conversation history with iterations
113
+ - `ResearchLoop`: Research loop state and configuration
114
+ - `BudgetStatus`: Current budget status
115
+
116
+ ## Thread Safety
117
+
118
+ All middleware components use `ContextVar` for thread-safe isolation:
119
+
120
+ - Each request/thread has its own workflow state
121
+ - No global mutable state
122
+ - Safe for concurrent requests
123
+
124
+ ## See Also
125
+
126
+ - [Orchestrators](orchestrators.md) - How middleware is used in orchestration
127
+ - [API Reference - Orchestrators](../api/orchestrators.md) - API documentation
128
+ - [Contributing - Code Style](../contributing/code-style.md) - Development guidelines
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
docs/architecture/orchestrators.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Orchestrators Architecture
2
+
3
+ DeepCritical supports multiple orchestration patterns for research workflows.
4
+
5
+ ## Research Flows
6
+
7
+ ### IterativeResearchFlow
8
+
9
+ **File**: `src/orchestrator/research_flow.py`
10
+
11
+ **Pattern**: Generate observations → Evaluate gaps → Select tools → Execute → Judge → Continue/Complete
12
+
13
+ **Agents Used**:
14
+ - `KnowledgeGapAgent`: Evaluates research completeness
15
+ - `ToolSelectorAgent`: Selects tools for addressing gaps
16
+ - `ThinkingAgent`: Generates observations
17
+ - `WriterAgent`: Creates final report
18
+ - `JudgeHandler`: Assesses evidence sufficiency
19
+
20
+ **Features**:
21
+ - Tracks iterations, time, budget
22
+ - Supports graph execution (`use_graph=True`) and agent chains (`use_graph=False`)
23
+ - Iterates until research complete or constraints met
24
+
25
+ **Usage**:
26
+ ```python
27
+ from src.orchestrator.research_flow import IterativeResearchFlow
28
+
29
+ flow = IterativeResearchFlow(
30
+ search_handler=search_handler,
31
+ judge_handler=judge_handler,
32
+ use_graph=False
33
+ )
34
+
35
+ async for event in flow.run(query):
36
+ # Handle events
37
+ pass
38
+ ```
39
+
40
+ ### DeepResearchFlow
41
+
42
+ **File**: `src/orchestrator/research_flow.py`
43
+
44
+ **Pattern**: Planner → Parallel iterative loops per section → Synthesizer
45
+
46
+ **Agents Used**:
47
+ - `PlannerAgent`: Breaks query into report sections
48
+ - `IterativeResearchFlow`: Per-section research (parallel)
49
+ - `LongWriterAgent` or `ProofreaderAgent`: Final synthesis
50
+
51
+ **Features**:
52
+ - Uses `WorkflowManager` for parallel execution
53
+ - Budget tracking per section and globally
54
+ - State synchronization across parallel loops
55
+ - Supports graph execution and agent chains
56
+
57
+ **Usage**:
58
+ ```python
59
+ from src.orchestrator.research_flow import DeepResearchFlow
60
+
61
+ flow = DeepResearchFlow(
62
+ search_handler=search_handler,
63
+ judge_handler=judge_handler,
64
+ use_graph=True
65
+ )
66
+
67
+ async for event in flow.run(query):
68
+ # Handle events
69
+ pass
70
+ ```
71
+
72
+ ## Graph Orchestrator
73
+
74
+ **File**: `src/orchestrator/graph_orchestrator.py`
75
+
76
+ **Purpose**: Graph-based execution using Pydantic AI agents as nodes
77
+
78
+ **Features**:
79
+ - Uses Pydantic AI Graphs (when available) or agent chains (fallback)
80
+ - Routes based on research mode (iterative/deep/auto)
81
+ - Streams `AgentEvent` objects for UI
82
+
83
+ **Node Types**:
84
+ - **Agent Nodes**: Execute Pydantic AI agents
85
+ - **State Nodes**: Update or read workflow state
86
+ - **Decision Nodes**: Make routing decisions
87
+ - **Parallel Nodes**: Execute multiple nodes concurrently
88
+
89
+ **Edge Types**:
90
+ - **Sequential Edges**: Always traversed
91
+ - **Conditional Edges**: Traversed based on condition
92
+ - **Parallel Edges**: Used for parallel execution branches
93
+
94
+ ## Orchestrator Factory
95
+
96
+ **File**: `src/orchestrator_factory.py`
97
+
98
+ **Purpose**: Factory for creating orchestrators
99
+
100
+ **Modes**:
101
+ - **Simple**: Legacy orchestrator (backward compatible)
102
+ - **Advanced**: Magentic orchestrator (requires OpenAI API key)
103
+ - **Auto-detect**: Chooses based on API key availability
104
+
105
+ **Usage**:
106
+ ```python
107
+ from src.orchestrator_factory import create_orchestrator
108
+
109
+ orchestrator = create_orchestrator(
110
+ search_handler=search_handler,
111
+ judge_handler=judge_handler,
112
+ config={},
113
+ mode="advanced" # or "simple" or None for auto-detect
114
+ )
115
+ ```
116
+
117
+ ## Magentic Orchestrator
118
+
119
+ **File**: `src/orchestrator_magentic.py`
120
+
121
+ **Purpose**: Multi-agent coordination using Microsoft Agent Framework
122
+
123
+ **Features**:
124
+ - Uses `agent-framework-core`
125
+ - ChatAgent pattern with internal LLMs per agent
126
+ - `MagenticBuilder` with participants: searcher, hypothesizer, judge, reporter
127
+ - Manager orchestrates agents via `OpenAIChatClient`
128
+ - Requires OpenAI API key (function calling support)
129
+ - Event-driven: converts Magentic events to `AgentEvent` for UI streaming
130
+
131
+ **Requirements**:
132
+ - `agent-framework-core` package
133
+ - OpenAI API key
134
+
135
+ ## Hierarchical Orchestrator
136
+
137
+ **File**: `src/orchestrator_hierarchical.py`
138
+
139
+ **Purpose**: Hierarchical orchestrator using middleware and sub-teams
140
+
141
+ **Features**:
142
+ - Uses `SubIterationMiddleware` with `ResearchTeam` and `LLMSubIterationJudge`
143
+ - Adapts Magentic ChatAgent to `SubIterationTeam` protocol
144
+ - Event-driven via `asyncio.Queue` for coordination
145
+ - Supports sub-iteration patterns for complex research tasks
146
+
147
+ ## Legacy Simple Mode
148
+
149
+ **File**: `src/legacy_orchestrator.py`
150
+
151
+ **Purpose**: Linear search-judge-synthesize loop
152
+
153
+ **Features**:
154
+ - Uses `SearchHandlerProtocol` and `JudgeHandlerProtocol`
155
+ - Generator-based design yielding `AgentEvent` objects
156
+ - Backward compatibility for simple use cases
157
+
158
+ ## State Initialization
159
+
160
+ All orchestrators must initialize workflow state:
161
+
162
+ ```python
163
+ from src.middleware.state_machine import init_workflow_state
164
+ from src.services.embeddings import get_embedding_service
165
+
166
+ embedding_service = get_embedding_service()
167
+ init_workflow_state(embedding_service)
168
+ ```
169
+
170
+ ## Event Streaming
171
+
172
+ All orchestrators yield `AgentEvent` objects:
173
+
174
+ **Event Types**:
175
+ - `started`: Research started
176
+ - `search_complete`: Search completed
177
+ - `judge_complete`: Evidence evaluation completed
178
+ - `hypothesizing`: Generating hypotheses
179
+ - `synthesizing`: Synthesizing results
180
+ - `complete`: Research completed
181
+ - `error`: Error occurred
182
+
183
+ **Event Structure**:
184
+ ```python
185
+ class AgentEvent:
186
+ type: str
187
+ iteration: int | None
188
+ data: dict[str, Any]
189
+ ```
190
+
191
+ ## See Also
192
+
193
+ - [Graph Orchestration](graph-orchestration.md) - Graph-based execution details
194
+ - [Graph Orchestration (Detailed)](graph_orchestration.md) - Detailed graph architecture
195
+ - [Workflows](workflows.md) - Workflow diagrams and patterns
196
+ - [Workflow Diagrams](workflow-diagrams.md) - Detailed workflow diagrams
197
+ - [API Reference - Orchestrators](../api/orchestrators.md) - API documentation
198
+
docs/architecture/services.md ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Services Architecture
2
+
3
+ DeepCritical provides several services for embeddings, RAG, and statistical analysis.
4
+
5
+ ## Embedding Service
6
+
7
+ **File**: `src/services/embeddings.py`
8
+
9
+ **Purpose**: Local sentence-transformers for semantic search and deduplication
10
+
11
+ **Features**:
12
+ - **No API Key Required**: Uses local sentence-transformers models
13
+ - **Async-Safe**: All operations use `run_in_executor()` to avoid blocking
14
+ - **ChromaDB Storage**: Vector storage for embeddings
15
+ - **Deduplication**: 0.85 similarity threshold (85% similarity = duplicate)
16
+
17
+ **Model**: Configurable via `settings.local_embedding_model` (default: `all-MiniLM-L6-v2`)
18
+
19
+ **Methods**:
20
+ - `async def embed(text: str) -> list[float]`: Generate embeddings
21
+ - `async def embed_batch(texts: list[str]) -> list[list[float]]`: Batch embedding
22
+ - `async def similarity(text1: str, text2: str) -> float`: Calculate similarity
23
+ - `async def find_duplicates(texts: list[str], threshold: float = 0.85) -> list[tuple[int, int]]`: Find duplicates
24
+
25
+ **Usage**:
26
+ ```python
27
+ from src.services.embeddings import get_embedding_service
28
+
29
+ service = get_embedding_service()
30
+ embedding = await service.embed("text to embed")
31
+ ```
32
+
33
+ ## LlamaIndex RAG Service
34
+
35
+ **File**: `src/services/rag.py`
36
+
37
+ **Purpose**: Retrieval-Augmented Generation using LlamaIndex
38
+
39
+ **Features**:
40
+ - **OpenAI Embeddings**: Requires `OPENAI_API_KEY`
41
+ - **ChromaDB Storage**: Vector database for document storage
42
+ - **Metadata Preservation**: Preserves source, title, URL, date, authors
43
+ - **Lazy Initialization**: Graceful fallback if OpenAI key not available
44
+
45
+ **Methods**:
46
+ - `async def ingest_evidence(evidence: list[Evidence]) -> None`: Ingest evidence into RAG
47
+ - `async def retrieve(query: str, top_k: int = 5) -> list[Document]`: Retrieve relevant documents
48
+ - `async def query(query: str, top_k: int = 5) -> str`: Query with RAG
49
+
50
+ **Usage**:
51
+ ```python
52
+ from src.services.rag import get_rag_service
53
+
54
+ service = get_rag_service()
55
+ if service:
56
+ documents = await service.retrieve("query", top_k=5)
57
+ ```
58
+
59
+ ## Statistical Analyzer
60
+
61
+ **File**: `src/services/statistical_analyzer.py`
62
+
63
+ **Purpose**: Secure execution of AI-generated statistical code
64
+
65
+ **Features**:
66
+ - **Modal Sandbox**: Secure, isolated execution environment
67
+ - **Code Generation**: Generates Python code via LLM
68
+ - **Library Pinning**: Version-pinned libraries in `SANDBOX_LIBRARIES`
69
+ - **Network Isolation**: `block_network=True` by default
70
+
71
+ **Libraries Available**:
72
+ - pandas, numpy, scipy
73
+ - matplotlib, scikit-learn
74
+ - statsmodels
75
+
76
+ **Output**: `AnalysisResult` with:
77
+ - `verdict`: SUPPORTED, REFUTED, or INCONCLUSIVE
78
+ - `code`: Generated analysis code
79
+ - `output`: Execution output
80
+ - `error`: Error message if execution failed
81
+
82
+ **Usage**:
83
+ ```python
84
+ from src.services.statistical_analyzer import StatisticalAnalyzer
85
+
86
+ analyzer = StatisticalAnalyzer()
87
+ result = await analyzer.analyze(
88
+ hypothesis="Metformin reduces cancer risk",
89
+ evidence=evidence_list
90
+ )
91
+ ```
92
+
93
+ ## Singleton Pattern
94
+
95
+ All services use the singleton pattern with `@lru_cache(maxsize=1)`:
96
+
97
+ ```python
98
+ @lru_cache(maxsize=1)
99
+ def get_embedding_service() -> EmbeddingService:
100
+ return EmbeddingService()
101
+ ```
102
+
103
+ This ensures:
104
+ - Single instance per process
105
+ - Lazy initialization
106
+ - No dependencies required at import time
107
+
108
+ ## Service Availability
109
+
110
+ Services check availability before use:
111
+
112
+ ```python
113
+ from src.utils.config import settings
114
+
115
+ if settings.modal_available:
116
+ # Use Modal sandbox
117
+ pass
118
+
119
+ if settings.has_openai_key:
120
+ # Use OpenAI embeddings for RAG
121
+ pass
122
+ ```
123
+
124
+ ## See Also
125
+
126
+ - [Tools](tools.md) - How services are used by search tools
127
+ - [API Reference - Services](../api/services.md) - API documentation
128
+ - [Configuration](../configuration/index.md) - Service configuration
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
docs/architecture/tools.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tools Architecture
2
+
3
+ DeepCritical implements a protocol-based search tool system for retrieving evidence from multiple sources.
4
+
5
+ ## SearchTool Protocol
6
+
7
+ All tools implement the `SearchTool` protocol from `src/tools/base.py`:
8
+
9
+ ```python
10
+ class SearchTool(Protocol):
11
+ @property
12
+ def name(self) -> str: ...
13
+
14
+ async def search(
15
+ self,
16
+ query: str,
17
+ max_results: int = 10
18
+ ) -> list[Evidence]: ...
19
+ ```
20
+
21
+ ## Rate Limiting
22
+
23
+ All tools use the `@retry` decorator from tenacity:
24
+
25
+ ```python
26
+ @retry(
27
+ stop=stop_after_attempt(3),
28
+ wait=wait_exponential(...)
29
+ )
30
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
31
+ # Implementation
32
+ ```
33
+
34
+ Tools with API rate limits implement `_rate_limit()` method and use shared rate limiters from `src/tools/rate_limiter.py`.
35
+
36
+ ## Error Handling
37
+
38
+ Tools raise custom exceptions:
39
+
40
+ - `SearchError`: General search failures
41
+ - `RateLimitError`: Rate limit exceeded
42
+
43
+ Tools handle HTTP errors (429, 500, timeout) and return empty lists on non-critical errors (with warning logs).
44
+
45
+ ## Query Preprocessing
46
+
47
+ Tools use `preprocess_query()` from `src/tools/query_utils.py` to:
48
+
49
+ - Remove noise from queries
50
+ - Expand synonyms
51
+ - Normalize query format
52
+
53
+ ## Evidence Conversion
54
+
55
+ All tools convert API responses to `Evidence` objects with:
56
+
57
+ - `Citation`: Title, URL, date, authors
58
+ - `content`: Evidence text
59
+ - `relevance_score`: 0.0-1.0 relevance score
60
+ - `metadata`: Additional metadata
61
+
62
+ Missing fields are handled gracefully with defaults.
63
+
64
+ ## Tool Implementations
65
+
66
+ ### PubMed Tool
67
+
68
+ **File**: `src/tools/pubmed.py`
69
+
70
+ **API**: NCBI E-utilities (ESearch → EFetch)
71
+
72
+ **Rate Limiting**:
73
+ - 0.34s between requests (3 req/sec without API key)
74
+ - 0.1s between requests (10 req/sec with NCBI API key)
75
+
76
+ **Features**:
77
+ - XML parsing with `xmltodict`
78
+ - Handles single vs. multiple articles
79
+ - Query preprocessing
80
+ - Evidence conversion with metadata extraction
81
+
82
+ ### ClinicalTrials Tool
83
+
84
+ **File**: `src/tools/clinicaltrials.py`
85
+
86
+ **API**: ClinicalTrials.gov API v2
87
+
88
+ **Important**: Uses `requests` library (NOT httpx) because WAF blocks httpx TLS fingerprint.
89
+
90
+ **Execution**: Runs in thread pool: `await asyncio.to_thread(requests.get, ...)`
91
+
92
+ **Filtering**:
93
+ - Only interventional studies
94
+ - Status: `COMPLETED`, `ACTIVE_NOT_RECRUITING`, `RECRUITING`, `ENROLLING_BY_INVITATION`
95
+
96
+ **Features**:
97
+ - Parses nested JSON structure
98
+ - Extracts trial metadata
99
+ - Evidence conversion
100
+
101
+ ### Europe PMC Tool
102
+
103
+ **File**: `src/tools/europepmc.py`
104
+
105
+ **API**: Europe PMC REST API
106
+
107
+ **Features**:
108
+ - Handles preprint markers: `[PREPRINT - Not peer-reviewed]`
109
+ - Builds URLs from DOI or PMID
110
+ - Checks `pubTypeList` for preprint detection
111
+ - Includes both preprints and peer-reviewed articles
112
+
113
+ ### RAG Tool
114
+
115
+ **File**: `src/tools/rag_tool.py`
116
+
117
+ **Purpose**: Semantic search within collected evidence
118
+
119
+ **Implementation**: Wraps `LlamaIndexRAGService`
120
+
121
+ **Features**:
122
+ - Returns Evidence from RAG results
123
+ - Handles evidence ingestion
124
+ - Semantic similarity search
125
+ - Metadata preservation
126
+
127
+ ### Search Handler
128
+
129
+ **File**: `src/tools/search_handler.py`
130
+
131
+ **Purpose**: Orchestrates parallel searches across multiple tools
132
+
133
+ **Features**:
134
+ - Uses `asyncio.gather()` with `return_exceptions=True`
135
+ - Aggregates results into `SearchResult`
136
+ - Handles tool failures gracefully
137
+ - Deduplicates results by URL
138
+
139
+ ## Tool Registration
140
+
141
+ Tools are registered in the search handler:
142
+
143
+ ```python
144
+ from src.tools.pubmed import PubMedTool
145
+ from src.tools.clinicaltrials import ClinicalTrialsTool
146
+ from src.tools.europepmc import EuropePMCTool
147
+
148
+ search_handler = SearchHandler(
149
+ tools=[
150
+ PubMedTool(),
151
+ ClinicalTrialsTool(),
152
+ EuropePMCTool(),
153
+ ]
154
+ )
155
+ ```
156
+
157
+ ## See Also
158
+
159
+ - [Services](services.md) - RAG and embedding services
160
+ - [API Reference - Tools](../api/tools.md) - API documentation
161
+ - [Contributing - Implementation Patterns](../contributing/implementation-patterns.md) - Development guidelines
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
docs/architecture/workflow-diagrams.md ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepCritical Workflow - Simplified Magentic Architecture
2
+
3
+ > **Architecture Pattern**: Microsoft Magentic Orchestration
4
+ > **Design Philosophy**: Simple, dynamic, manager-driven coordination
5
+ > **Key Innovation**: Intelligent manager replaces rigid sequential phases
6
+
7
+ ---
8
+
9
+ ## 1. High-Level Magentic Workflow
10
+
11
+ ```mermaid
12
+ flowchart TD
13
+ Start([User Query]) --> Manager[Magentic Manager<br/>Plan • Select • Assess • Adapt]
14
+
15
+ Manager -->|Plans| Task1[Task Decomposition]
16
+ Task1 --> Manager
17
+
18
+ Manager -->|Selects & Executes| HypAgent[Hypothesis Agent]
19
+ Manager -->|Selects & Executes| SearchAgent[Search Agent]
20
+ Manager -->|Selects & Executes| AnalysisAgent[Analysis Agent]
21
+ Manager -->|Selects & Executes| ReportAgent[Report Agent]
22
+
23
+ HypAgent -->|Results| Manager
24
+ SearchAgent -->|Results| Manager
25
+ AnalysisAgent -->|Results| Manager
26
+ ReportAgent -->|Results| Manager
27
+
28
+ Manager -->|Assesses Quality| Decision{Good Enough?}
29
+ Decision -->|No - Refine| Manager
30
+ Decision -->|No - Different Agent| Manager
31
+ Decision -->|No - Stalled| Replan[Reset Plan]
32
+ Replan --> Manager
33
+
34
+ Decision -->|Yes| Synthesis[Synthesize Final Result]
35
+ Synthesis --> Output([Research Report])
36
+
37
+ style Start fill:#e1f5e1
38
+ style Manager fill:#ffe6e6
39
+ style HypAgent fill:#fff4e6
40
+ style SearchAgent fill:#fff4e6
41
+ style AnalysisAgent fill:#fff4e6
42
+ style ReportAgent fill:#fff4e6
43
+ style Decision fill:#ffd6d6
44
+ style Synthesis fill:#d4edda
45
+ style Output fill:#e1f5e1
46
+ ```
47
+
48
+ ## 2. Magentic Manager: The 6-Phase Cycle
49
+
50
+ ```mermaid
51
+ flowchart LR
52
+ P1[1. Planning<br/>Analyze task<br/>Create strategy] --> P2[2. Agent Selection<br/>Pick best agent<br/>for subtask]
53
+ P2 --> P3[3. Execution<br/>Run selected<br/>agent with tools]
54
+ P3 --> P4[4. Assessment<br/>Evaluate quality<br/>Check progress]
55
+ P4 --> Decision{Quality OK?<br/>Progress made?}
56
+ Decision -->|Yes| P6[6. Synthesis<br/>Combine results<br/>Generate report]
57
+ Decision -->|No| P5[5. Iteration<br/>Adjust plan<br/>Try again]
58
+ P5 --> P2
59
+ P6 --> Done([Complete])
60
+
61
+ style P1 fill:#fff4e6
62
+ style P2 fill:#ffe6e6
63
+ style P3 fill:#e6f3ff
64
+ style P4 fill:#ffd6d6
65
+ style P5 fill:#fff3cd
66
+ style P6 fill:#d4edda
67
+ style Done fill:#e1f5e1
68
+ ```
69
+
70
+ ## 3. Simplified Agent Architecture
71
+
72
+ ```mermaid
73
+ graph TB
74
+ subgraph "Orchestration Layer"
75
+ Manager[Magentic Manager<br/>• Plans workflow<br/>• Selects agents<br/>• Assesses quality<br/>• Adapts strategy]
76
+ SharedContext[(Shared Context<br/>• Hypotheses<br/>• Search Results<br/>• Analysis<br/>• Progress)]
77
+ Manager <--> SharedContext
78
+ end
79
+
80
+ subgraph "Specialist Agents"
81
+ HypAgent[Hypothesis Agent<br/>• Domain understanding<br/>• Hypothesis generation<br/>• Testability refinement]
82
+ SearchAgent[Search Agent<br/>• Multi-source search<br/>• RAG retrieval<br/>• Result ranking]
83
+ AnalysisAgent[Analysis Agent<br/>• Evidence extraction<br/>• Statistical analysis<br/>• Code execution]
84
+ ReportAgent[Report Agent<br/>• Report assembly<br/>• Visualization<br/>• Citation formatting]
85
+ end
86
+
87
+ subgraph "MCP Tools"
88
+ WebSearch[Web Search<br/>PubMed • arXiv • bioRxiv]
89
+ CodeExec[Code Execution<br/>Sandboxed Python]
90
+ RAG[RAG Retrieval<br/>Vector DB • Embeddings]
91
+ Viz[Visualization<br/>Charts • Graphs]
92
+ end
93
+
94
+ Manager -->|Selects & Directs| HypAgent
95
+ Manager -->|Selects & Directs| SearchAgent
96
+ Manager -->|Selects & Directs| AnalysisAgent
97
+ Manager -->|Selects & Directs| ReportAgent
98
+
99
+ HypAgent --> SharedContext
100
+ SearchAgent --> SharedContext
101
+ AnalysisAgent --> SharedContext
102
+ ReportAgent --> SharedContext
103
+
104
+ SearchAgent --> WebSearch
105
+ SearchAgent --> RAG
106
+ AnalysisAgent --> CodeExec
107
+ ReportAgent --> CodeExec
108
+ ReportAgent --> Viz
109
+
110
+ style Manager fill:#ffe6e6
111
+ style SharedContext fill:#ffe6f0
112
+ style HypAgent fill:#fff4e6
113
+ style SearchAgent fill:#fff4e6
114
+ style AnalysisAgent fill:#fff4e6
115
+ style ReportAgent fill:#fff4e6
116
+ style WebSearch fill:#e6f3ff
117
+ style CodeExec fill:#e6f3ff
118
+ style RAG fill:#e6f3ff
119
+ style Viz fill:#e6f3ff
120
+ ```
121
+
122
+ ## 4. Dynamic Workflow Example
123
+
124
+ ```mermaid
125
+ sequenceDiagram
126
+ participant User
127
+ participant Manager
128
+ participant HypAgent
129
+ participant SearchAgent
130
+ participant AnalysisAgent
131
+ participant ReportAgent
132
+
133
+ User->>Manager: "Research protein folding in Alzheimer's"
134
+
135
+ Note over Manager: PLAN: Generate hypotheses → Search → Analyze → Report
136
+
137
+ Manager->>HypAgent: Generate 3 hypotheses
138
+ HypAgent-->>Manager: Returns 3 hypotheses
139
+ Note over Manager: ASSESS: Good quality, proceed
140
+
141
+ Manager->>SearchAgent: Search literature for hypothesis 1
142
+ SearchAgent-->>Manager: Returns 15 papers
143
+ Note over Manager: ASSESS: Good results, continue
144
+
145
+ Manager->>SearchAgent: Search for hypothesis 2
146
+ SearchAgent-->>Manager: Only 2 papers found
147
+ Note over Manager: ASSESS: Insufficient, refine search
148
+
149
+ Manager->>SearchAgent: Refined query for hypothesis 2
150
+ SearchAgent-->>Manager: Returns 12 papers
151
+ Note over Manager: ASSESS: Better, proceed
152
+
153
+ Manager->>AnalysisAgent: Analyze evidence for all hypotheses
154
+ AnalysisAgent-->>Manager: Returns analysis with code
155
+ Note over Manager: ASSESS: Complete, generate report
156
+
157
+ Manager->>ReportAgent: Create comprehensive report
158
+ ReportAgent-->>Manager: Returns formatted report
159
+ Note over Manager: SYNTHESIZE: Combine all results
160
+
161
+ Manager->>User: Final Research Report
162
+ ```
163
+
164
+ ## 5. Manager Decision Logic
165
+
166
+ ```mermaid
167
+ flowchart TD
168
+ Start([Manager Receives Task]) --> Plan[Create Initial Plan]
169
+
170
+ Plan --> Select[Select Agent for Next Subtask]
171
+ Select --> Execute[Execute Agent]
172
+ Execute --> Collect[Collect Results]
173
+
174
+ Collect --> Assess[Assess Quality & Progress]
175
+
176
+ Assess --> Q1{Quality Sufficient?}
177
+ Q1 -->|No| Q2{Same Agent Can Fix?}
178
+ Q2 -->|Yes| Feedback[Provide Specific Feedback]
179
+ Feedback --> Execute
180
+ Q2 -->|No| Different[Try Different Agent]
181
+ Different --> Select
182
+
183
+ Q1 -->|Yes| Q3{Task Complete?}
184
+ Q3 -->|No| Q4{Making Progress?}
185
+ Q4 -->|Yes| Select
186
+ Q4 -->|No - Stalled| Replan[Reset Plan & Approach]
187
+ Replan --> Plan
188
+
189
+ Q3 -->|Yes| Synth[Synthesize Final Result]
190
+ Synth --> Done([Return Report])
191
+
192
+ style Start fill:#e1f5e1
193
+ style Plan fill:#fff4e6
194
+ style Select fill:#ffe6e6
195
+ style Execute fill:#e6f3ff
196
+ style Assess fill:#ffd6d6
197
+ style Q1 fill:#ffe6e6
198
+ style Q2 fill:#ffe6e6
199
+ style Q3 fill:#ffe6e6
200
+ style Q4 fill:#ffe6e6
201
+ style Synth fill:#d4edda
202
+ style Done fill:#e1f5e1
203
+ ```
204
+
205
+ ## 6. Hypothesis Agent Workflow
206
+
207
+ ```mermaid
208
+ flowchart LR
209
+ Input[Research Query] --> Domain[Identify Domain<br/>& Key Concepts]
210
+ Domain --> Context[Retrieve Background<br/>Knowledge]
211
+ Context --> Generate[Generate 3-5<br/>Initial Hypotheses]
212
+ Generate --> Refine[Refine for<br/>Testability]
213
+ Refine --> Rank[Rank by<br/>Quality Score]
214
+ Rank --> Output[Return Top<br/>Hypotheses]
215
+
216
+ Output --> Struct[Hypothesis Structure:<br/>• Statement<br/>• Rationale<br/>• Testability Score<br/>• Data Requirements<br/>• Expected Outcomes]
217
+
218
+ style Input fill:#e1f5e1
219
+ style Output fill:#fff4e6
220
+ style Struct fill:#e6f3ff
221
+ ```
222
+
223
+ ## 7. Search Agent Workflow
224
+
225
+ ```mermaid
226
+ flowchart TD
227
+ Input[Hypotheses] --> Strategy[Formulate Search<br/>Strategy per Hypothesis]
228
+
229
+ Strategy --> Multi[Multi-Source Search]
230
+
231
+ Multi --> PubMed[PubMed Search<br/>via MCP]
232
+ Multi --> ArXiv[arXiv Search<br/>via MCP]
233
+ Multi --> BioRxiv[bioRxiv Search<br/>via MCP]
234
+
235
+ PubMed --> Aggregate[Aggregate Results]
236
+ ArXiv --> Aggregate
237
+ BioRxiv --> Aggregate
238
+
239
+ Aggregate --> Filter[Filter & Rank<br/>by Relevance]
240
+ Filter --> Dedup[Deduplicate<br/>Cross-Reference]
241
+ Dedup --> Embed[Embed Documents<br/>via MCP]
242
+ Embed --> Vector[(Vector DB)]
243
+ Vector --> RAGRetrieval[RAG Retrieval<br/>Top-K per Hypothesis]
244
+ RAGRetrieval --> Output[Return Contextualized<br/>Search Results]
245
+
246
+ style Input fill:#fff4e6
247
+ style Multi fill:#ffe6e6
248
+ style Vector fill:#ffe6f0
249
+ style Output fill:#e6f3ff
250
+ ```
251
+
252
+ ## 8. Analysis Agent Workflow
253
+
254
+ ```mermaid
255
+ flowchart TD
256
+ Input1[Hypotheses] --> Extract
257
+ Input2[Search Results] --> Extract[Extract Evidence<br/>per Hypothesis]
258
+
259
+ Extract --> Methods[Determine Analysis<br/>Methods Needed]
260
+
261
+ Methods --> Branch{Requires<br/>Computation?}
262
+ Branch -->|Yes| GenCode[Generate Python<br/>Analysis Code]
263
+ Branch -->|No| Qual[Qualitative<br/>Synthesis]
264
+
265
+ GenCode --> Execute[Execute Code<br/>via MCP Sandbox]
266
+ Execute --> Interpret1[Interpret<br/>Results]
267
+ Qual --> Interpret2[Interpret<br/>Findings]
268
+
269
+ Interpret1 --> Synthesize[Synthesize Evidence<br/>Across Sources]
270
+ Interpret2 --> Synthesize
271
+
272
+ Synthesize --> Verdict[Determine Verdict<br/>per Hypothesis]
273
+ Verdict --> Support[• Supported<br/>• Refuted<br/>• Inconclusive]
274
+ Support --> Gaps[Identify Knowledge<br/>Gaps & Limitations]
275
+ Gaps --> Output[Return Analysis<br/>Report]
276
+
277
+ style Input1 fill:#fff4e6
278
+ style Input2 fill:#e6f3ff
279
+ style Execute fill:#ffe6e6
280
+ style Output fill:#e6ffe6
281
+ ```
282
+
283
+ ## 9. Report Agent Workflow
284
+
285
+ ```mermaid
286
+ flowchart TD
287
+ Input1[Query] --> Assemble
288
+ Input2[Hypotheses] --> Assemble
289
+ Input3[Search Results] --> Assemble
290
+ Input4[Analysis] --> Assemble[Assemble Report<br/>Sections]
291
+
292
+ Assemble --> Exec[Executive Summary]
293
+ Assemble --> Intro[Introduction]
294
+ Assemble --> Methods[Methods]
295
+ Assemble --> Results[Results per<br/>Hypothesis]
296
+ Assemble --> Discussion[Discussion]
297
+ Assemble --> Future[Future Directions]
298
+ Assemble --> Refs[References]
299
+
300
+ Results --> VizCheck{Needs<br/>Visualization?}
301
+ VizCheck -->|Yes| GenViz[Generate Viz Code]
302
+ GenViz --> ExecViz[Execute via MCP<br/>Create Charts]
303
+ ExecViz --> Combine
304
+ VizCheck -->|No| Combine[Combine All<br/>Sections]
305
+
306
+ Exec --> Combine
307
+ Intro --> Combine
308
+ Methods --> Combine
309
+ Discussion --> Combine
310
+ Future --> Combine
311
+ Refs --> Combine
312
+
313
+ Combine --> Format[Format Output]
314
+ Format --> MD[Markdown]
315
+ Format --> PDF[PDF]
316
+ Format --> JSON[JSON]
317
+
318
+ MD --> Output[Return Final<br/>Report]
319
+ PDF --> Output
320
+ JSON --> Output
321
+
322
+ style Input1 fill:#e1f5e1
323
+ style Input2 fill:#fff4e6
324
+ style Input3 fill:#e6f3ff
325
+ style Input4 fill:#e6ffe6
326
+ style Output fill:#d4edda
327
+ ```
328
+
329
+ ## 10. Data Flow & Event Streaming
330
+
331
+ ```mermaid
332
+ flowchart TD
333
+ User[👤 User] -->|Research Query| UI[Gradio UI]
334
+ UI -->|Submit| Manager[Magentic Manager]
335
+
336
+ Manager -->|Event: Planning| UI
337
+ Manager -->|Select Agent| HypAgent[Hypothesis Agent]
338
+ HypAgent -->|Event: Delta/Message| UI
339
+ HypAgent -->|Hypotheses| Context[(Shared Context)]
340
+
341
+ Context -->|Retrieved by| Manager
342
+ Manager -->|Select Agent| SearchAgent[Search Agent]
343
+ SearchAgent -->|MCP Request| WebSearch[Web Search Tool]
344
+ WebSearch -->|Results| SearchAgent
345
+ SearchAgent -->|Event: Delta/Message| UI
346
+ SearchAgent -->|Documents| Context
347
+ SearchAgent -->|Embeddings| VectorDB[(Vector DB)]
348
+
349
+ Context -->|Retrieved by| Manager
350
+ Manager -->|Select Agent| AnalysisAgent[Analysis Agent]
351
+ AnalysisAgent -->|MCP Request| CodeExec[Code Execution Tool]
352
+ CodeExec -->|Results| AnalysisAgent
353
+ AnalysisAgent -->|Event: Delta/Message| UI
354
+ AnalysisAgent -->|Analysis| Context
355
+
356
+ Context -->|Retrieved by| Manager
357
+ Manager -->|Select Agent| ReportAgent[Report Agent]
358
+ ReportAgent -->|MCP Request| CodeExec
359
+ ReportAgent -->|Event: Delta/Message| UI
360
+ ReportAgent -->|Report| Context
361
+
362
+ Manager -->|Event: Final Result| UI
363
+ UI -->|Display| User
364
+
365
+ style User fill:#e1f5e1
366
+ style UI fill:#e6f3ff
367
+ style Manager fill:#ffe6e6
368
+ style Context fill:#ffe6f0
369
+ style VectorDB fill:#ffe6f0
370
+ style WebSearch fill:#f0f0f0
371
+ style CodeExec fill:#f0f0f0
372
+ ```
373
+
374
+ ## 11. MCP Tool Architecture
375
+
376
+ ```mermaid
377
+ graph TB
378
+ subgraph "Agent Layer"
379
+ Manager[Magentic Manager]
380
+ HypAgent[Hypothesis Agent]
381
+ SearchAgent[Search Agent]
382
+ AnalysisAgent[Analysis Agent]
383
+ ReportAgent[Report Agent]
384
+ end
385
+
386
+ subgraph "MCP Protocol Layer"
387
+ Registry[MCP Tool Registry<br/>• Discovers tools<br/>• Routes requests<br/>• Manages connections]
388
+ end
389
+
390
+ subgraph "MCP Servers"
391
+ Server1[Web Search Server<br/>localhost:8001<br/>• PubMed<br/>• arXiv<br/>• bioRxiv]
392
+ Server2[Code Execution Server<br/>localhost:8002<br/>• Sandboxed Python<br/>• Package management]
393
+ Server3[RAG Server<br/>localhost:8003<br/>• Vector embeddings<br/>• Similarity search]
394
+ Server4[Visualization Server<br/>localhost:8004<br/>• Chart generation<br/>• Plot rendering]
395
+ end
396
+
397
+ subgraph "External Services"
398
+ PubMed[PubMed API]
399
+ ArXiv[arXiv API]
400
+ BioRxiv[bioRxiv API]
401
+ Modal[Modal Sandbox]
402
+ ChromaDB[(ChromaDB)]
403
+ end
404
+
405
+ SearchAgent -->|Request| Registry
406
+ AnalysisAgent -->|Request| Registry
407
+ ReportAgent -->|Request| Registry
408
+
409
+ Registry --> Server1
410
+ Registry --> Server2
411
+ Registry --> Server3
412
+ Registry --> Server4
413
+
414
+ Server1 --> PubMed
415
+ Server1 --> ArXiv
416
+ Server1 --> BioRxiv
417
+ Server2 --> Modal
418
+ Server3 --> ChromaDB
419
+
420
+ style Manager fill:#ffe6e6
421
+ style Registry fill:#fff4e6
422
+ style Server1 fill:#e6f3ff
423
+ style Server2 fill:#e6f3ff
424
+ style Server3 fill:#e6f3ff
425
+ style Server4 fill:#e6f3ff
426
+ ```
427
+
428
+ ## 12. Progress Tracking & Stall Detection
429
+
430
+ ```mermaid
431
+ stateDiagram-v2
432
+ [*] --> Initialization: User Query
433
+
434
+ Initialization --> Planning: Manager starts
435
+
436
+ Planning --> AgentExecution: Select agent
437
+
438
+ AgentExecution --> Assessment: Collect results
439
+
440
+ Assessment --> QualityCheck: Evaluate output
441
+
442
+ QualityCheck --> AgentExecution: Poor quality<br/>(retry < max_rounds)
443
+ QualityCheck --> Planning: Poor quality<br/>(try different agent)
444
+ QualityCheck --> NextAgent: Good quality<br/>(task incomplete)
445
+ QualityCheck --> Synthesis: Good quality<br/>(task complete)
446
+
447
+ NextAgent --> AgentExecution: Select next agent
448
+
449
+ state StallDetection <<choice>>
450
+ Assessment --> StallDetection: Check progress
451
+ StallDetection --> Planning: No progress<br/>(stall count < max)
452
+ StallDetection --> ErrorRecovery: No progress<br/>(max stalls reached)
453
+
454
+ ErrorRecovery --> PartialReport: Generate partial results
455
+ PartialReport --> [*]
456
+
457
+ Synthesis --> FinalReport: Combine all outputs
458
+ FinalReport --> [*]
459
+
460
+ note right of QualityCheck
461
+ Manager assesses:
462
+ • Output completeness
463
+ • Quality metrics
464
+ • Progress made
465
+ end note
466
+
467
+ note right of StallDetection
468
+ Stall = no new progress
469
+ after agent execution
470
+ Triggers plan reset
471
+ end note
472
+ ```
473
+
474
+ ## 13. Gradio UI Integration
475
+
476
+ ```mermaid
477
+ graph TD
478
+ App[Gradio App<br/>DeepCritical Research Agent]
479
+
480
+ App --> Input[Input Section]
481
+ App --> Status[Status Section]
482
+ App --> Output[Output Section]
483
+
484
+ Input --> Query[Research Question<br/>Text Area]
485
+ Input --> Controls[Controls]
486
+ Controls --> MaxHyp[Max Hypotheses: 1-10]
487
+ Controls --> MaxRounds[Max Rounds: 5-20]
488
+ Controls --> Submit[Start Research Button]
489
+
490
+ Status --> Log[Real-time Event Log<br/>• Manager planning<br/>• Agent selection<br/>• Execution updates<br/>• Quality assessment]
491
+ Status --> Progress[Progress Tracker<br/>• Current agent<br/>• Round count<br/>• Stall count]
492
+
493
+ Output --> Tabs[Tabbed Results]
494
+ Tabs --> Tab1[Hypotheses Tab<br/>Generated hypotheses with scores]
495
+ Tabs --> Tab2[Search Results Tab<br/>Papers & sources found]
496
+ Tabs --> Tab3[Analysis Tab<br/>Evidence & verdicts]
497
+ Tabs --> Tab4[Report Tab<br/>Final research report]
498
+ Tab4 --> Download[Download Report<br/>MD / PDF / JSON]
499
+
500
+ Submit -.->|Triggers| Workflow[Magentic Workflow]
501
+ Workflow -.->|MagenticOrchestratorMessageEvent| Log
502
+ Workflow -.->|MagenticAgentDeltaEvent| Log
503
+ Workflow -.->|MagenticAgentMessageEvent| Log
504
+ Workflow -.->|MagenticFinalResultEvent| Tab4
505
+
506
+ style App fill:#e1f5e1
507
+ style Input fill:#fff4e6
508
+ style Status fill:#e6f3ff
509
+ style Output fill:#e6ffe6
510
+ style Workflow fill:#ffe6e6
511
+ ```
512
+
513
+ ## 14. Complete System Context
514
+
515
+ ```mermaid
516
+ graph LR
517
+ User[👤 Researcher<br/>Asks research questions] -->|Submits query| DC[DeepCritical<br/>Magentic Workflow]
518
+
519
+ DC -->|Literature search| PubMed[PubMed API<br/>Medical papers]
520
+ DC -->|Preprint search| ArXiv[arXiv API<br/>Scientific preprints]
521
+ DC -->|Biology search| BioRxiv[bioRxiv API<br/>Biology preprints]
522
+ DC -->|Agent reasoning| Claude[Claude API<br/>Sonnet 4 / Opus]
523
+ DC -->|Code execution| Modal[Modal Sandbox<br/>Safe Python env]
524
+ DC -->|Vector storage| Chroma[ChromaDB<br/>Embeddings & RAG]
525
+
526
+ DC -->|Deployed on| HF[HuggingFace Spaces<br/>Gradio 6.0]
527
+
528
+ PubMed -->|Results| DC
529
+ ArXiv -->|Results| DC
530
+ BioRxiv -->|Results| DC
531
+ Claude -->|Responses| DC
532
+ Modal -->|Output| DC
533
+ Chroma -->|Context| DC
534
+
535
+ DC -->|Research report| User
536
+
537
+ style User fill:#e1f5e1
538
+ style DC fill:#ffe6e6
539
+ style PubMed fill:#e6f3ff
540
+ style ArXiv fill:#e6f3ff
541
+ style BioRxiv fill:#e6f3ff
542
+ style Claude fill:#ffd6d6
543
+ style Modal fill:#f0f0f0
544
+ style Chroma fill:#ffe6f0
545
+ style HF fill:#d4edda
546
+ ```
547
+
548
+ ## 15. Workflow Timeline (Simplified)
549
+
550
+ ```mermaid
551
+ gantt
552
+ title DeepCritical Magentic Workflow - Typical Execution
553
+ dateFormat mm:ss
554
+ axisFormat %M:%S
555
+
556
+ section Manager Planning
557
+ Initial planning :p1, 00:00, 10s
558
+
559
+ section Hypothesis Agent
560
+ Generate hypotheses :h1, after p1, 30s
561
+ Manager assessment :h2, after h1, 5s
562
+
563
+ section Search Agent
564
+ Search hypothesis 1 :s1, after h2, 20s
565
+ Search hypothesis 2 :s2, after s1, 20s
566
+ Search hypothesis 3 :s3, after s2, 20s
567
+ RAG processing :s4, after s3, 15s
568
+ Manager assessment :s5, after s4, 5s
569
+
570
+ section Analysis Agent
571
+ Evidence extraction :a1, after s5, 15s
572
+ Code generation :a2, after a1, 20s
573
+ Code execution :a3, after a2, 25s
574
+ Synthesis :a4, after a3, 20s
575
+ Manager assessment :a5, after a4, 5s
576
+
577
+ section Report Agent
578
+ Report assembly :r1, after a5, 30s
579
+ Visualization :r2, after r1, 15s
580
+ Formatting :r3, after r2, 10s
581
+
582
+ section Manager Synthesis
583
+ Final synthesis :f1, after r3, 10s
584
+ ```
585
+
586
+ ---
587
+
588
+ ## Key Differences from Original Design
589
+
590
+ | Aspect | Original (Judge-in-Loop) | New (Magentic) |
591
+ |--------|-------------------------|----------------|
592
+ | **Control Flow** | Fixed sequential phases | Dynamic agent selection |
593
+ | **Quality Control** | Separate Judge Agent | Manager assessment built-in |
594
+ | **Retry Logic** | Phase-level with feedback | Agent-level with adaptation |
595
+ | **Flexibility** | Rigid 4-phase pipeline | Adaptive workflow |
596
+ | **Complexity** | 5 agents (including Judge) | 4 agents (no Judge) |
597
+ | **Progress Tracking** | Manual state management | Built-in round/stall detection |
598
+ | **Agent Coordination** | Sequential handoff | Manager-driven dynamic selection |
599
+ | **Error Recovery** | Retry same phase | Try different agent or replan |
600
+
601
+ ---
602
+
603
+ ## Simplified Design Principles
604
+
605
+ 1. **Manager is Intelligent**: LLM-powered manager handles planning, selection, and quality assessment
606
+ 2. **No Separate Judge**: Manager's assessment phase replaces dedicated Judge Agent
607
+ 3. **Dynamic Workflow**: Agents can be called multiple times in any order based on need
608
+ 4. **Built-in Safety**: max_round_count (15) and max_stall_count (3) prevent infinite loops
609
+ 5. **Event-Driven UI**: Real-time streaming updates to Gradio interface
610
+ 6. **MCP-Powered Tools**: All external capabilities via Model Context Protocol
611
+ 7. **Shared Context**: Centralized state accessible to all agents
612
+ 8. **Progress Awareness**: Manager tracks what's been done and what's needed
613
+
614
+ ---
615
+
616
+ ## Legend
617
+
618
+ - 🔴 **Red/Pink**: Manager, orchestration, decision-making
619
+ - 🟡 **Yellow/Orange**: Specialist agents, processing
620
+ - 🔵 **Blue**: Data, tools, MCP services
621
+ - 🟣 **Purple/Pink**: Storage, databases, state
622
+ - 🟢 **Green**: User interactions, final outputs
623
+ - ⚪ **Gray**: External services, APIs
624
+
625
+ ---
626
+
627
+ ## Implementation Highlights
628
+
629
+ **Simple 4-Agent Setup:**
630
+ ```python
631
+ workflow = (
632
+ MagenticBuilder()
633
+ .participants(
634
+ hypothesis=HypothesisAgent(tools=[background_tool]),
635
+ search=SearchAgent(tools=[web_search, rag_tool]),
636
+ analysis=AnalysisAgent(tools=[code_execution]),
637
+ report=ReportAgent(tools=[code_execution, visualization])
638
+ )
639
+ .with_standard_manager(
640
+ chat_client=AnthropicClient(model="claude-sonnet-4"),
641
+ max_round_count=15, # Prevent infinite loops
642
+ max_stall_count=3 # Detect stuck workflows
643
+ )
644
+ .build()
645
+ )
646
+ ```
647
+
648
+ **Manager handles quality assessment in its instructions:**
649
+ - Checks hypothesis quality (testable, novel, clear)
650
+ - Validates search results (relevant, authoritative, recent)
651
+ - Assesses analysis soundness (methodology, evidence, conclusions)
652
+ - Ensures report completeness (all sections, proper citations)
653
+
654
+ No separate Judge Agent needed - manager does it all!
655
+
656
+ ---
657
+
658
+ **Document Version**: 2.0 (Magentic Simplified)
659
+ **Last Updated**: 2025-11-24
660
+ **Architecture**: Microsoft Magentic Orchestration Pattern
661
+ **Agents**: 4 (Hypothesis, Search, Analysis, Report) + 1 Manager
662
+ **License**: MIT
663
+
664
+ ## See Also
665
+
666
+ - [Orchestrators](orchestrators.md) - Overview of all orchestrator patterns
667
+ - [Graph Orchestration](graph-orchestration.md) - Graph-based execution overview
668
+ - [Graph Orchestration (Detailed)](graph_orchestration.md) - Detailed graph architecture
669
+ - [Workflows](workflows.md) - Workflow patterns summary
670
+ - [API Reference - Orchestrators](../api/orchestrators.md) - API documentation
docs/architecture/workflows.md ADDED
@@ -0,0 +1,662 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepCritical Workflow - Simplified Magentic Architecture
2
+
3
+ > **Architecture Pattern**: Microsoft Magentic Orchestration
4
+ > **Design Philosophy**: Simple, dynamic, manager-driven coordination
5
+ > **Key Innovation**: Intelligent manager replaces rigid sequential phases
6
+
7
+ ---
8
+
9
+ ## 1. High-Level Magentic Workflow
10
+
11
+ ```mermaid
12
+ flowchart TD
13
+ Start([User Query]) --> Manager[Magentic Manager<br/>Plan • Select • Assess • Adapt]
14
+
15
+ Manager -->|Plans| Task1[Task Decomposition]
16
+ Task1 --> Manager
17
+
18
+ Manager -->|Selects & Executes| HypAgent[Hypothesis Agent]
19
+ Manager -->|Selects & Executes| SearchAgent[Search Agent]
20
+ Manager -->|Selects & Executes| AnalysisAgent[Analysis Agent]
21
+ Manager -->|Selects & Executes| ReportAgent[Report Agent]
22
+
23
+ HypAgent -->|Results| Manager
24
+ SearchAgent -->|Results| Manager
25
+ AnalysisAgent -->|Results| Manager
26
+ ReportAgent -->|Results| Manager
27
+
28
+ Manager -->|Assesses Quality| Decision{Good Enough?}
29
+ Decision -->|No - Refine| Manager
30
+ Decision -->|No - Different Agent| Manager
31
+ Decision -->|No - Stalled| Replan[Reset Plan]
32
+ Replan --> Manager
33
+
34
+ Decision -->|Yes| Synthesis[Synthesize Final Result]
35
+ Synthesis --> Output([Research Report])
36
+
37
+ style Start fill:#e1f5e1
38
+ style Manager fill:#ffe6e6
39
+ style HypAgent fill:#fff4e6
40
+ style SearchAgent fill:#fff4e6
41
+ style AnalysisAgent fill:#fff4e6
42
+ style ReportAgent fill:#fff4e6
43
+ style Decision fill:#ffd6d6
44
+ style Synthesis fill:#d4edda
45
+ style Output fill:#e1f5e1
46
+ ```
47
+
48
+ ## 2. Magentic Manager: The 6-Phase Cycle
49
+
50
+ ```mermaid
51
+ flowchart LR
52
+ P1[1. Planning<br/>Analyze task<br/>Create strategy] --> P2[2. Agent Selection<br/>Pick best agent<br/>for subtask]
53
+ P2 --> P3[3. Execution<br/>Run selected<br/>agent with tools]
54
+ P3 --> P4[4. Assessment<br/>Evaluate quality<br/>Check progress]
55
+ P4 --> Decision{Quality OK?<br/>Progress made?}
56
+ Decision -->|Yes| P6[6. Synthesis<br/>Combine results<br/>Generate report]
57
+ Decision -->|No| P5[5. Iteration<br/>Adjust plan<br/>Try again]
58
+ P5 --> P2
59
+ P6 --> Done([Complete])
60
+
61
+ style P1 fill:#fff4e6
62
+ style P2 fill:#ffe6e6
63
+ style P3 fill:#e6f3ff
64
+ style P4 fill:#ffd6d6
65
+ style P5 fill:#fff3cd
66
+ style P6 fill:#d4edda
67
+ style Done fill:#e1f5e1
68
+ ```
69
+
70
+ ## 3. Simplified Agent Architecture
71
+
72
+ ```mermaid
73
+ graph TB
74
+ subgraph "Orchestration Layer"
75
+ Manager[Magentic Manager<br/>• Plans workflow<br/>• Selects agents<br/>• Assesses quality<br/>• Adapts strategy]
76
+ SharedContext[(Shared Context<br/>• Hypotheses<br/>• Search Results<br/>• Analysis<br/>• Progress)]
77
+ Manager <--> SharedContext
78
+ end
79
+
80
+ subgraph "Specialist Agents"
81
+ HypAgent[Hypothesis Agent<br/>• Domain understanding<br/>• Hypothesis generation<br/>• Testability refinement]
82
+ SearchAgent[Search Agent<br/>• Multi-source search<br/>• RAG retrieval<br/>• Result ranking]
83
+ AnalysisAgent[Analysis Agent<br/>• Evidence extraction<br/>• Statistical analysis<br/>• Code execution]
84
+ ReportAgent[Report Agent<br/>• Report assembly<br/>• Visualization<br/>• Citation formatting]
85
+ end
86
+
87
+ subgraph "MCP Tools"
88
+ WebSearch[Web Search<br/>PubMed • arXiv • bioRxiv]
89
+ CodeExec[Code Execution<br/>Sandboxed Python]
90
+ RAG[RAG Retrieval<br/>Vector DB • Embeddings]
91
+ Viz[Visualization<br/>Charts • Graphs]
92
+ end
93
+
94
+ Manager -->|Selects & Directs| HypAgent
95
+ Manager -->|Selects & Directs| SearchAgent
96
+ Manager -->|Selects & Directs| AnalysisAgent
97
+ Manager -->|Selects & Directs| ReportAgent
98
+
99
+ HypAgent --> SharedContext
100
+ SearchAgent --> SharedContext
101
+ AnalysisAgent --> SharedContext
102
+ ReportAgent --> SharedContext
103
+
104
+ SearchAgent --> WebSearch
105
+ SearchAgent --> RAG
106
+ AnalysisAgent --> CodeExec
107
+ ReportAgent --> CodeExec
108
+ ReportAgent --> Viz
109
+
110
+ style Manager fill:#ffe6e6
111
+ style SharedContext fill:#ffe6f0
112
+ style HypAgent fill:#fff4e6
113
+ style SearchAgent fill:#fff4e6
114
+ style AnalysisAgent fill:#fff4e6
115
+ style ReportAgent fill:#fff4e6
116
+ style WebSearch fill:#e6f3ff
117
+ style CodeExec fill:#e6f3ff
118
+ style RAG fill:#e6f3ff
119
+ style Viz fill:#e6f3ff
120
+ ```
121
+
122
+ ## 4. Dynamic Workflow Example
123
+
124
+ ```mermaid
125
+ sequenceDiagram
126
+ participant User
127
+ participant Manager
128
+ participant HypAgent
129
+ participant SearchAgent
130
+ participant AnalysisAgent
131
+ participant ReportAgent
132
+
133
+ User->>Manager: "Research protein folding in Alzheimer's"
134
+
135
+ Note over Manager: PLAN: Generate hypotheses → Search → Analyze → Report
136
+
137
+ Manager->>HypAgent: Generate 3 hypotheses
138
+ HypAgent-->>Manager: Returns 3 hypotheses
139
+ Note over Manager: ASSESS: Good quality, proceed
140
+
141
+ Manager->>SearchAgent: Search literature for hypothesis 1
142
+ SearchAgent-->>Manager: Returns 15 papers
143
+ Note over Manager: ASSESS: Good results, continue
144
+
145
+ Manager->>SearchAgent: Search for hypothesis 2
146
+ SearchAgent-->>Manager: Only 2 papers found
147
+ Note over Manager: ASSESS: Insufficient, refine search
148
+
149
+ Manager->>SearchAgent: Refined query for hypothesis 2
150
+ SearchAgent-->>Manager: Returns 12 papers
151
+ Note over Manager: ASSESS: Better, proceed
152
+
153
+ Manager->>AnalysisAgent: Analyze evidence for all hypotheses
154
+ AnalysisAgent-->>Manager: Returns analysis with code
155
+ Note over Manager: ASSESS: Complete, generate report
156
+
157
+ Manager->>ReportAgent: Create comprehensive report
158
+ ReportAgent-->>Manager: Returns formatted report
159
+ Note over Manager: SYNTHESIZE: Combine all results
160
+
161
+ Manager->>User: Final Research Report
162
+ ```
163
+
164
+ ## 5. Manager Decision Logic
165
+
166
+ ```mermaid
167
+ flowchart TD
168
+ Start([Manager Receives Task]) --> Plan[Create Initial Plan]
169
+
170
+ Plan --> Select[Select Agent for Next Subtask]
171
+ Select --> Execute[Execute Agent]
172
+ Execute --> Collect[Collect Results]
173
+
174
+ Collect --> Assess[Assess Quality & Progress]
175
+
176
+ Assess --> Q1{Quality Sufficient?}
177
+ Q1 -->|No| Q2{Same Agent Can Fix?}
178
+ Q2 -->|Yes| Feedback[Provide Specific Feedback]
179
+ Feedback --> Execute
180
+ Q2 -->|No| Different[Try Different Agent]
181
+ Different --> Select
182
+
183
+ Q1 -->|Yes| Q3{Task Complete?}
184
+ Q3 -->|No| Q4{Making Progress?}
185
+ Q4 -->|Yes| Select
186
+ Q4 -->|No - Stalled| Replan[Reset Plan & Approach]
187
+ Replan --> Plan
188
+
189
+ Q3 -->|Yes| Synth[Synthesize Final Result]
190
+ Synth --> Done([Return Report])
191
+
192
+ style Start fill:#e1f5e1
193
+ style Plan fill:#fff4e6
194
+ style Select fill:#ffe6e6
195
+ style Execute fill:#e6f3ff
196
+ style Assess fill:#ffd6d6
197
+ style Q1 fill:#ffe6e6
198
+ style Q2 fill:#ffe6e6
199
+ style Q3 fill:#ffe6e6
200
+ style Q4 fill:#ffe6e6
201
+ style Synth fill:#d4edda
202
+ style Done fill:#e1f5e1
203
+ ```
204
+
205
+ ## 6. Hypothesis Agent Workflow
206
+
207
+ ```mermaid
208
+ flowchart LR
209
+ Input[Research Query] --> Domain[Identify Domain<br/>& Key Concepts]
210
+ Domain --> Context[Retrieve Background<br/>Knowledge]
211
+ Context --> Generate[Generate 3-5<br/>Initial Hypotheses]
212
+ Generate --> Refine[Refine for<br/>Testability]
213
+ Refine --> Rank[Rank by<br/>Quality Score]
214
+ Rank --> Output[Return Top<br/>Hypotheses]
215
+
216
+ Output --> Struct[Hypothesis Structure:<br/>• Statement<br/>• Rationale<br/>• Testability Score<br/>• Data Requirements<br/>• Expected Outcomes]
217
+
218
+ style Input fill:#e1f5e1
219
+ style Output fill:#fff4e6
220
+ style Struct fill:#e6f3ff
221
+ ```
222
+
223
+ ## 7. Search Agent Workflow
224
+
225
+ ```mermaid
226
+ flowchart TD
227
+ Input[Hypotheses] --> Strategy[Formulate Search<br/>Strategy per Hypothesis]
228
+
229
+ Strategy --> Multi[Multi-Source Search]
230
+
231
+ Multi --> PubMed[PubMed Search<br/>via MCP]
232
+ Multi --> ArXiv[arXiv Search<br/>via MCP]
233
+ Multi --> BioRxiv[bioRxiv Search<br/>via MCP]
234
+
235
+ PubMed --> Aggregate[Aggregate Results]
236
+ ArXiv --> Aggregate
237
+ BioRxiv --> Aggregate
238
+
239
+ Aggregate --> Filter[Filter & Rank<br/>by Relevance]
240
+ Filter --> Dedup[Deduplicate<br/>Cross-Reference]
241
+ Dedup --> Embed[Embed Documents<br/>via MCP]
242
+ Embed --> Vector[(Vector DB)]
243
+ Vector --> RAGRetrieval[RAG Retrieval<br/>Top-K per Hypothesis]
244
+ RAGRetrieval --> Output[Return Contextualized<br/>Search Results]
245
+
246
+ style Input fill:#fff4e6
247
+ style Multi fill:#ffe6e6
248
+ style Vector fill:#ffe6f0
249
+ style Output fill:#e6f3ff
250
+ ```
251
+
252
+ ## 8. Analysis Agent Workflow
253
+
254
+ ```mermaid
255
+ flowchart TD
256
+ Input1[Hypotheses] --> Extract
257
+ Input2[Search Results] --> Extract[Extract Evidence<br/>per Hypothesis]
258
+
259
+ Extract --> Methods[Determine Analysis<br/>Methods Needed]
260
+
261
+ Methods --> Branch{Requires<br/>Computation?}
262
+ Branch -->|Yes| GenCode[Generate Python<br/>Analysis Code]
263
+ Branch -->|No| Qual[Qualitative<br/>Synthesis]
264
+
265
+ GenCode --> Execute[Execute Code<br/>via MCP Sandbox]
266
+ Execute --> Interpret1[Interpret<br/>Results]
267
+ Qual --> Interpret2[Interpret<br/>Findings]
268
+
269
+ Interpret1 --> Synthesize[Synthesize Evidence<br/>Across Sources]
270
+ Interpret2 --> Synthesize
271
+
272
+ Synthesize --> Verdict[Determine Verdict<br/>per Hypothesis]
273
+ Verdict --> Support[• Supported<br/>• Refuted<br/>• Inconclusive]
274
+ Support --> Gaps[Identify Knowledge<br/>Gaps & Limitations]
275
+ Gaps --> Output[Return Analysis<br/>Report]
276
+
277
+ style Input1 fill:#fff4e6
278
+ style Input2 fill:#e6f3ff
279
+ style Execute fill:#ffe6e6
280
+ style Output fill:#e6ffe6
281
+ ```
282
+
283
+ ## 9. Report Agent Workflow
284
+
285
+ ```mermaid
286
+ flowchart TD
287
+ Input1[Query] --> Assemble
288
+ Input2[Hypotheses] --> Assemble
289
+ Input3[Search Results] --> Assemble
290
+ Input4[Analysis] --> Assemble[Assemble Report<br/>Sections]
291
+
292
+ Assemble --> Exec[Executive Summary]
293
+ Assemble --> Intro[Introduction]
294
+ Assemble --> Methods[Methods]
295
+ Assemble --> Results[Results per<br/>Hypothesis]
296
+ Assemble --> Discussion[Discussion]
297
+ Assemble --> Future[Future Directions]
298
+ Assemble --> Refs[References]
299
+
300
+ Results --> VizCheck{Needs<br/>Visualization?}
301
+ VizCheck -->|Yes| GenViz[Generate Viz Code]
302
+ GenViz --> ExecViz[Execute via MCP<br/>Create Charts]
303
+ ExecViz --> Combine
304
+ VizCheck -->|No| Combine[Combine All<br/>Sections]
305
+
306
+ Exec --> Combine
307
+ Intro --> Combine
308
+ Methods --> Combine
309
+ Discussion --> Combine
310
+ Future --> Combine
311
+ Refs --> Combine
312
+
313
+ Combine --> Format[Format Output]
314
+ Format --> MD[Markdown]
315
+ Format --> PDF[PDF]
316
+ Format --> JSON[JSON]
317
+
318
+ MD --> Output[Return Final<br/>Report]
319
+ PDF --> Output
320
+ JSON --> Output
321
+
322
+ style Input1 fill:#e1f5e1
323
+ style Input2 fill:#fff4e6
324
+ style Input3 fill:#e6f3ff
325
+ style Input4 fill:#e6ffe6
326
+ style Output fill:#d4edda
327
+ ```
328
+
329
+ ## 10. Data Flow & Event Streaming
330
+
331
+ ```mermaid
332
+ flowchart TD
333
+ User[👤 User] -->|Research Query| UI[Gradio UI]
334
+ UI -->|Submit| Manager[Magentic Manager]
335
+
336
+ Manager -->|Event: Planning| UI
337
+ Manager -->|Select Agent| HypAgent[Hypothesis Agent]
338
+ HypAgent -->|Event: Delta/Message| UI
339
+ HypAgent -->|Hypotheses| Context[(Shared Context)]
340
+
341
+ Context -->|Retrieved by| Manager
342
+ Manager -->|Select Agent| SearchAgent[Search Agent]
343
+ SearchAgent -->|MCP Request| WebSearch[Web Search Tool]
344
+ WebSearch -->|Results| SearchAgent
345
+ SearchAgent -->|Event: Delta/Message| UI
346
+ SearchAgent -->|Documents| Context
347
+ SearchAgent -->|Embeddings| VectorDB[(Vector DB)]
348
+
349
+ Context -->|Retrieved by| Manager
350
+ Manager -->|Select Agent| AnalysisAgent[Analysis Agent]
351
+ AnalysisAgent -->|MCP Request| CodeExec[Code Execution Tool]
352
+ CodeExec -->|Results| AnalysisAgent
353
+ AnalysisAgent -->|Event: Delta/Message| UI
354
+ AnalysisAgent -->|Analysis| Context
355
+
356
+ Context -->|Retrieved by| Manager
357
+ Manager -->|Select Agent| ReportAgent[Report Agent]
358
+ ReportAgent -->|MCP Request| CodeExec
359
+ ReportAgent -->|Event: Delta/Message| UI
360
+ ReportAgent -->|Report| Context
361
+
362
+ Manager -->|Event: Final Result| UI
363
+ UI -->|Display| User
364
+
365
+ style User fill:#e1f5e1
366
+ style UI fill:#e6f3ff
367
+ style Manager fill:#ffe6e6
368
+ style Context fill:#ffe6f0
369
+ style VectorDB fill:#ffe6f0
370
+ style WebSearch fill:#f0f0f0
371
+ style CodeExec fill:#f0f0f0
372
+ ```
373
+
374
+ ## 11. MCP Tool Architecture
375
+
376
+ ```mermaid
377
+ graph TB
378
+ subgraph "Agent Layer"
379
+ Manager[Magentic Manager]
380
+ HypAgent[Hypothesis Agent]
381
+ SearchAgent[Search Agent]
382
+ AnalysisAgent[Analysis Agent]
383
+ ReportAgent[Report Agent]
384
+ end
385
+
386
+ subgraph "MCP Protocol Layer"
387
+ Registry[MCP Tool Registry<br/>• Discovers tools<br/>• Routes requests<br/>• Manages connections]
388
+ end
389
+
390
+ subgraph "MCP Servers"
391
+ Server1[Web Search Server<br/>localhost:8001<br/>• PubMed<br/>• arXiv<br/>• bioRxiv]
392
+ Server2[Code Execution Server<br/>localhost:8002<br/>• Sandboxed Python<br/>• Package management]
393
+ Server3[RAG Server<br/>localhost:8003<br/>• Vector embeddings<br/>• Similarity search]
394
+ Server4[Visualization Server<br/>localhost:8004<br/>• Chart generation<br/>• Plot rendering]
395
+ end
396
+
397
+ subgraph "External Services"
398
+ PubMed[PubMed API]
399
+ ArXiv[arXiv API]
400
+ BioRxiv[bioRxiv API]
401
+ Modal[Modal Sandbox]
402
+ ChromaDB[(ChromaDB)]
403
+ end
404
+
405
+ SearchAgent -->|Request| Registry
406
+ AnalysisAgent -->|Request| Registry
407
+ ReportAgent -->|Request| Registry
408
+
409
+ Registry --> Server1
410
+ Registry --> Server2
411
+ Registry --> Server3
412
+ Registry --> Server4
413
+
414
+ Server1 --> PubMed
415
+ Server1 --> ArXiv
416
+ Server1 --> BioRxiv
417
+ Server2 --> Modal
418
+ Server3 --> ChromaDB
419
+
420
+ style Manager fill:#ffe6e6
421
+ style Registry fill:#fff4e6
422
+ style Server1 fill:#e6f3ff
423
+ style Server2 fill:#e6f3ff
424
+ style Server3 fill:#e6f3ff
425
+ style Server4 fill:#e6f3ff
426
+ ```
427
+
428
+ ## 12. Progress Tracking & Stall Detection
429
+
430
+ ```mermaid
431
+ stateDiagram-v2
432
+ [*] --> Initialization: User Query
433
+
434
+ Initialization --> Planning: Manager starts
435
+
436
+ Planning --> AgentExecution: Select agent
437
+
438
+ AgentExecution --> Assessment: Collect results
439
+
440
+ Assessment --> QualityCheck: Evaluate output
441
+
442
+ QualityCheck --> AgentExecution: Poor quality<br/>(retry < max_rounds)
443
+ QualityCheck --> Planning: Poor quality<br/>(try different agent)
444
+ QualityCheck --> NextAgent: Good quality<br/>(task incomplete)
445
+ QualityCheck --> Synthesis: Good quality<br/>(task complete)
446
+
447
+ NextAgent --> AgentExecution: Select next agent
448
+
449
+ state StallDetection <<choice>>
450
+ Assessment --> StallDetection: Check progress
451
+ StallDetection --> Planning: No progress<br/>(stall count < max)
452
+ StallDetection --> ErrorRecovery: No progress<br/>(max stalls reached)
453
+
454
+ ErrorRecovery --> PartialReport: Generate partial results
455
+ PartialReport --> [*]
456
+
457
+ Synthesis --> FinalReport: Combine all outputs
458
+ FinalReport --> [*]
459
+
460
+ note right of QualityCheck
461
+ Manager assesses:
462
+ • Output completeness
463
+ • Quality metrics
464
+ • Progress made
465
+ end note
466
+
467
+ note right of StallDetection
468
+ Stall = no new progress
469
+ after agent execution
470
+ Triggers plan reset
471
+ end note
472
+ ```
473
+
474
+ ## 13. Gradio UI Integration
475
+
476
+ ```mermaid
477
+ graph TD
478
+ App[Gradio App<br/>DeepCritical Research Agent]
479
+
480
+ App --> Input[Input Section]
481
+ App --> Status[Status Section]
482
+ App --> Output[Output Section]
483
+
484
+ Input --> Query[Research Question<br/>Text Area]
485
+ Input --> Controls[Controls]
486
+ Controls --> MaxHyp[Max Hypotheses: 1-10]
487
+ Controls --> MaxRounds[Max Rounds: 5-20]
488
+ Controls --> Submit[Start Research Button]
489
+
490
+ Status --> Log[Real-time Event Log<br/>• Manager planning<br/>• Agent selection<br/>• Execution updates<br/>• Quality assessment]
491
+ Status --> Progress[Progress Tracker<br/>• Current agent<br/>• Round count<br/>• Stall count]
492
+
493
+ Output --> Tabs[Tabbed Results]
494
+ Tabs --> Tab1[Hypotheses Tab<br/>Generated hypotheses with scores]
495
+ Tabs --> Tab2[Search Results Tab<br/>Papers & sources found]
496
+ Tabs --> Tab3[Analysis Tab<br/>Evidence & verdicts]
497
+ Tabs --> Tab4[Report Tab<br/>Final research report]
498
+ Tab4 --> Download[Download Report<br/>MD / PDF / JSON]
499
+
500
+ Submit -.->|Triggers| Workflow[Magentic Workflow]
501
+ Workflow -.->|MagenticOrchestratorMessageEvent| Log
502
+ Workflow -.->|MagenticAgentDeltaEvent| Log
503
+ Workflow -.->|MagenticAgentMessageEvent| Log
504
+ Workflow -.->|MagenticFinalResultEvent| Tab4
505
+
506
+ style App fill:#e1f5e1
507
+ style Input fill:#fff4e6
508
+ style Status fill:#e6f3ff
509
+ style Output fill:#e6ffe6
510
+ style Workflow fill:#ffe6e6
511
+ ```
512
+
513
+ ## 14. Complete System Context
514
+
515
+ ```mermaid
516
+ graph LR
517
+ User[👤 Researcher<br/>Asks research questions] -->|Submits query| DC[DeepCritical<br/>Magentic Workflow]
518
+
519
+ DC -->|Literature search| PubMed[PubMed API<br/>Medical papers]
520
+ DC -->|Preprint search| ArXiv[arXiv API<br/>Scientific preprints]
521
+ DC -->|Biology search| BioRxiv[bioRxiv API<br/>Biology preprints]
522
+ DC -->|Agent reasoning| Claude[Claude API<br/>Sonnet 4 / Opus]
523
+ DC -->|Code execution| Modal[Modal Sandbox<br/>Safe Python env]
524
+ DC -->|Vector storage| Chroma[ChromaDB<br/>Embeddings & RAG]
525
+
526
+ DC -->|Deployed on| HF[HuggingFace Spaces<br/>Gradio 6.0]
527
+
528
+ PubMed -->|Results| DC
529
+ ArXiv -->|Results| DC
530
+ BioRxiv -->|Results| DC
531
+ Claude -->|Responses| DC
532
+ Modal -->|Output| DC
533
+ Chroma -->|Context| DC
534
+
535
+ DC -->|Research report| User
536
+
537
+ style User fill:#e1f5e1
538
+ style DC fill:#ffe6e6
539
+ style PubMed fill:#e6f3ff
540
+ style ArXiv fill:#e6f3ff
541
+ style BioRxiv fill:#e6f3ff
542
+ style Claude fill:#ffd6d6
543
+ style Modal fill:#f0f0f0
544
+ style Chroma fill:#ffe6f0
545
+ style HF fill:#d4edda
546
+ ```
547
+
548
+ ## 15. Workflow Timeline (Simplified)
549
+
550
+ ```mermaid
551
+ gantt
552
+ title DeepCritical Magentic Workflow - Typical Execution
553
+ dateFormat mm:ss
554
+ axisFormat %M:%S
555
+
556
+ section Manager Planning
557
+ Initial planning :p1, 00:00, 10s
558
+
559
+ section Hypothesis Agent
560
+ Generate hypotheses :h1, after p1, 30s
561
+ Manager assessment :h2, after h1, 5s
562
+
563
+ section Search Agent
564
+ Search hypothesis 1 :s1, after h2, 20s
565
+ Search hypothesis 2 :s2, after s1, 20s
566
+ Search hypothesis 3 :s3, after s2, 20s
567
+ RAG processing :s4, after s3, 15s
568
+ Manager assessment :s5, after s4, 5s
569
+
570
+ section Analysis Agent
571
+ Evidence extraction :a1, after s5, 15s
572
+ Code generation :a2, after a1, 20s
573
+ Code execution :a3, after a2, 25s
574
+ Synthesis :a4, after a3, 20s
575
+ Manager assessment :a5, after a4, 5s
576
+
577
+ section Report Agent
578
+ Report assembly :r1, after a5, 30s
579
+ Visualization :r2, after r1, 15s
580
+ Formatting :r3, after r2, 10s
581
+
582
+ section Manager Synthesis
583
+ Final synthesis :f1, after r3, 10s
584
+ ```
585
+
586
+ ---
587
+
588
+ ## Key Differences from Original Design
589
+
590
+ | Aspect | Original (Judge-in-Loop) | New (Magentic) |
591
+ |--------|-------------------------|----------------|
592
+ | **Control Flow** | Fixed sequential phases | Dynamic agent selection |
593
+ | **Quality Control** | Separate Judge Agent | Manager assessment built-in |
594
+ | **Retry Logic** | Phase-level with feedback | Agent-level with adaptation |
595
+ | **Flexibility** | Rigid 4-phase pipeline | Adaptive workflow |
596
+ | **Complexity** | 5 agents (including Judge) | 4 agents (no Judge) |
597
+ | **Progress Tracking** | Manual state management | Built-in round/stall detection |
598
+ | **Agent Coordination** | Sequential handoff | Manager-driven dynamic selection |
599
+ | **Error Recovery** | Retry same phase | Try different agent or replan |
600
+
601
+ ---
602
+
603
+ ## Simplified Design Principles
604
+
605
+ 1. **Manager is Intelligent**: LLM-powered manager handles planning, selection, and quality assessment
606
+ 2. **No Separate Judge**: Manager's assessment phase replaces dedicated Judge Agent
607
+ 3. **Dynamic Workflow**: Agents can be called multiple times in any order based on need
608
+ 4. **Built-in Safety**: max_round_count (15) and max_stall_count (3) prevent infinite loops
609
+ 5. **Event-Driven UI**: Real-time streaming updates to Gradio interface
610
+ 6. **MCP-Powered Tools**: All external capabilities via Model Context Protocol
611
+ 7. **Shared Context**: Centralized state accessible to all agents
612
+ 8. **Progress Awareness**: Manager tracks what's been done and what's needed
613
+
614
+ ---
615
+
616
+ ## Legend
617
+
618
+ - 🔴 **Red/Pink**: Manager, orchestration, decision-making
619
+ - 🟡 **Yellow/Orange**: Specialist agents, processing
620
+ - 🔵 **Blue**: Data, tools, MCP services
621
+ - 🟣 **Purple/Pink**: Storage, databases, state
622
+ - 🟢 **Green**: User interactions, final outputs
623
+ - ⚪ **Gray**: External services, APIs
624
+
625
+ ---
626
+
627
+ ## Implementation Highlights
628
+
629
+ **Simple 4-Agent Setup:**
630
+ ```python
631
+ workflow = (
632
+ MagenticBuilder()
633
+ .participants(
634
+ hypothesis=HypothesisAgent(tools=[background_tool]),
635
+ search=SearchAgent(tools=[web_search, rag_tool]),
636
+ analysis=AnalysisAgent(tools=[code_execution]),
637
+ report=ReportAgent(tools=[code_execution, visualization])
638
+ )
639
+ .with_standard_manager(
640
+ chat_client=AnthropicClient(model="claude-sonnet-4"),
641
+ max_round_count=15, # Prevent infinite loops
642
+ max_stall_count=3 # Detect stuck workflows
643
+ )
644
+ .build()
645
+ )
646
+ ```
647
+
648
+ **Manager handles quality assessment in its instructions:**
649
+ - Checks hypothesis quality (testable, novel, clear)
650
+ - Validates search results (relevant, authoritative, recent)
651
+ - Assesses analysis soundness (methodology, evidence, conclusions)
652
+ - Ensures report completeness (all sections, proper citations)
653
+
654
+ No separate Judge Agent needed - manager does it all!
655
+
656
+ ---
657
+
658
+ **Document Version**: 2.0 (Magentic Simplified)
659
+ **Last Updated**: 2025-11-24
660
+ **Architecture**: Microsoft Magentic Orchestration Pattern
661
+ **Agents**: 4 (Hypothesis, Search, Analysis, Report) + 1 Manager
662
+ **License**: MIT
docs/configuration/CONFIGURATION.md ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration Guide
2
+
3
+ ## Overview
4
+
5
+ DeepCritical uses **Pydantic Settings** for centralized configuration management. All settings are defined in the `Settings` class in `src/utils/config.py` and can be configured via environment variables or a `.env` file.
6
+
7
+ The configuration system provides:
8
+
9
+ - **Type Safety**: Strongly-typed fields with Pydantic validation
10
+ - **Environment File Support**: Automatically loads from `.env` file (if present)
11
+ - **Case-Insensitive**: Environment variables are case-insensitive
12
+ - **Singleton Pattern**: Global `settings` instance for easy access throughout the codebase
13
+ - **Validation**: Automatic validation on load with helpful error messages
14
+
15
+ ## Quick Start
16
+
17
+ 1. Create a `.env` file in the project root
18
+ 2. Set at least one LLM API key (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `HF_TOKEN`)
19
+ 3. Optionally configure other services as needed
20
+ 4. The application will automatically load and validate your configuration
21
+
22
+ ## Configuration System Architecture
23
+
24
+ ### Settings Class
25
+
26
+ The `Settings` class extends `BaseSettings` from `pydantic_settings` and defines all application configuration:
27
+
28
+ ```13:21:src/utils/config.py
29
+ class Settings(BaseSettings):
30
+ """Strongly-typed application settings."""
31
+
32
+ model_config = SettingsConfigDict(
33
+ env_file=".env",
34
+ env_file_encoding="utf-8",
35
+ case_sensitive=False,
36
+ extra="ignore",
37
+ )
38
+ ```
39
+
40
+ ### Singleton Instance
41
+
42
+ A global `settings` instance is available for import:
43
+
44
+ ```234:235:src/utils/config.py
45
+ # Singleton for easy import
46
+ settings = get_settings()
47
+ ```
48
+
49
+ ### Usage Pattern
50
+
51
+ Access configuration throughout the codebase:
52
+
53
+ ```python
54
+ from src.utils.config import settings
55
+
56
+ # Check if API keys are available
57
+ if settings.has_openai_key:
58
+ # Use OpenAI
59
+ pass
60
+
61
+ # Access configuration values
62
+ max_iterations = settings.max_iterations
63
+ web_search_provider = settings.web_search_provider
64
+ ```
65
+
66
+ ## Required Configuration
67
+
68
+ ### LLM Provider
69
+
70
+ You must configure at least one LLM provider. The system supports:
71
+
72
+ - **OpenAI**: Requires `OPENAI_API_KEY`
73
+ - **Anthropic**: Requires `ANTHROPIC_API_KEY`
74
+ - **HuggingFace**: Optional `HF_TOKEN` or `HUGGINGFACE_API_KEY` (can work without key for public models)
75
+
76
+ #### OpenAI Configuration
77
+
78
+ ```bash
79
+ LLM_PROVIDER=openai
80
+ OPENAI_API_KEY=your_openai_api_key_here
81
+ OPENAI_MODEL=gpt-5.1
82
+ ```
83
+
84
+ The default model is defined in the `Settings` class:
85
+
86
+ ```29:29:src/utils/config.py
87
+ openai_model: str = Field(default="gpt-5.1", description="OpenAI model name")
88
+ ```
89
+
90
+ #### Anthropic Configuration
91
+
92
+ ```bash
93
+ LLM_PROVIDER=anthropic
94
+ ANTHROPIC_API_KEY=your_anthropic_api_key_here
95
+ ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
96
+ ```
97
+
98
+ The default model is defined in the `Settings` class:
99
+
100
+ ```30:32:src/utils/config.py
101
+ anthropic_model: str = Field(
102
+ default="claude-sonnet-4-5-20250929", description="Anthropic model"
103
+ )
104
+ ```
105
+
106
+ #### HuggingFace Configuration
107
+
108
+ HuggingFace can work without an API key for public models, but an API key provides higher rate limits:
109
+
110
+ ```bash
111
+ # Option 1: Using HF_TOKEN (preferred)
112
+ HF_TOKEN=your_huggingface_token_here
113
+
114
+ # Option 2: Using HUGGINGFACE_API_KEY (alternative)
115
+ HUGGINGFACE_API_KEY=your_huggingface_api_key_here
116
+
117
+ # Default model
118
+ HUGGINGFACE_MODEL=meta-llama/Llama-3.1-8B-Instruct
119
+ ```
120
+
121
+ The HuggingFace token can be set via either environment variable:
122
+
123
+ ```33:35:src/utils/config.py
124
+ hf_token: str | None = Field(
125
+ default=None, alias="HF_TOKEN", description="HuggingFace API token"
126
+ )
127
+ ```
128
+
129
+ ```57:59:src/utils/config.py
130
+ huggingface_api_key: str | None = Field(
131
+ default=None, description="HuggingFace API token (HF_TOKEN or HUGGINGFACE_API_KEY)"
132
+ )
133
+ ```
134
+
135
+ ## Optional Configuration
136
+
137
+ ### Embedding Configuration
138
+
139
+ DeepCritical supports multiple embedding providers for semantic search and RAG:
140
+
141
+ ```bash
142
+ # Embedding Provider: "openai", "local", or "huggingface"
143
+ EMBEDDING_PROVIDER=local
144
+
145
+ # OpenAI Embedding Model (used by LlamaIndex RAG)
146
+ OPENAI_EMBEDDING_MODEL=text-embedding-3-small
147
+
148
+ # Local Embedding Model (sentence-transformers, used by EmbeddingService)
149
+ LOCAL_EMBEDDING_MODEL=all-MiniLM-L6-v2
150
+
151
+ # HuggingFace Embedding Model
152
+ HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
153
+ ```
154
+
155
+ The embedding provider configuration:
156
+
157
+ ```47:50:src/utils/config.py
158
+ embedding_provider: Literal["openai", "local", "huggingface"] = Field(
159
+ default="local",
160
+ description="Embedding provider to use",
161
+ )
162
+ ```
163
+
164
+ **Note**: OpenAI embeddings require `OPENAI_API_KEY`. The local provider (default) uses sentence-transformers and requires no API key.
165
+
166
+ ### Web Search Configuration
167
+
168
+ DeepCritical supports multiple web search providers:
169
+
170
+ ```bash
171
+ # Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
172
+ # Default: "duckduckgo" (no API key required)
173
+ WEB_SEARCH_PROVIDER=duckduckgo
174
+
175
+ # Serper API Key (for Google search via Serper)
176
+ SERPER_API_KEY=your_serper_api_key_here
177
+
178
+ # SearchXNG Host URL (for self-hosted search)
179
+ SEARCHXNG_HOST=http://localhost:8080
180
+
181
+ # Brave Search API Key
182
+ BRAVE_API_KEY=your_brave_api_key_here
183
+
184
+ # Tavily API Key
185
+ TAVILY_API_KEY=your_tavily_api_key_here
186
+ ```
187
+
188
+ The web search provider configuration:
189
+
190
+ ```71:74:src/utils/config.py
191
+ web_search_provider: Literal["serper", "searchxng", "brave", "tavily", "duckduckgo"] = Field(
192
+ default="duckduckgo",
193
+ description="Web search provider to use",
194
+ )
195
+ ```
196
+
197
+ **Note**: DuckDuckGo is the default and requires no API key, making it ideal for development and testing.
198
+
199
+ ### PubMed Configuration
200
+
201
+ PubMed search supports optional NCBI API key for higher rate limits:
202
+
203
+ ```bash
204
+ # NCBI API Key (optional, for higher rate limits: 10 req/sec vs 3 req/sec)
205
+ NCBI_API_KEY=your_ncbi_api_key_here
206
+ ```
207
+
208
+ The PubMed tool uses this configuration:
209
+
210
+ ```22:29:src/tools/pubmed.py
211
+ def __init__(self, api_key: str | None = None) -> None:
212
+ self.api_key = api_key or settings.ncbi_api_key
213
+ # Ignore placeholder values from .env.example
214
+ if self.api_key == "your-ncbi-key-here":
215
+ self.api_key = None
216
+
217
+ # Use shared rate limiter
218
+ self._limiter = get_pubmed_limiter(self.api_key)
219
+ ```
220
+
221
+ ### Agent Configuration
222
+
223
+ Control agent behavior and research loop execution:
224
+
225
+ ```bash
226
+ # Maximum iterations per research loop (1-50, default: 10)
227
+ MAX_ITERATIONS=10
228
+
229
+ # Search timeout in seconds
230
+ SEARCH_TIMEOUT=30
231
+
232
+ # Use graph-based execution for research flows
233
+ USE_GRAPH_EXECUTION=false
234
+ ```
235
+
236
+ The agent configuration fields:
237
+
238
+ ```80:85:src/utils/config.py
239
+ # Agent Configuration
240
+ max_iterations: int = Field(default=10, ge=1, le=50)
241
+ search_timeout: int = Field(default=30, description="Seconds to wait for search")
242
+ use_graph_execution: bool = Field(
243
+ default=False, description="Use graph-based execution for research flows"
244
+ )
245
+ ```
246
+
247
+ ### Budget & Rate Limiting Configuration
248
+
249
+ Control resource limits for research loops:
250
+
251
+ ```bash
252
+ # Default token budget per research loop (1000-1000000, default: 100000)
253
+ DEFAULT_TOKEN_LIMIT=100000
254
+
255
+ # Default time limit per research loop in minutes (1-120, default: 10)
256
+ DEFAULT_TIME_LIMIT_MINUTES=10
257
+
258
+ # Default iterations limit per research loop (1-50, default: 10)
259
+ DEFAULT_ITERATIONS_LIMIT=10
260
+ ```
261
+
262
+ The budget configuration with validation:
263
+
264
+ ```87:105:src/utils/config.py
265
+ # Budget & Rate Limiting Configuration
266
+ default_token_limit: int = Field(
267
+ default=100000,
268
+ ge=1000,
269
+ le=1000000,
270
+ description="Default token budget per research loop",
271
+ )
272
+ default_time_limit_minutes: int = Field(
273
+ default=10,
274
+ ge=1,
275
+ le=120,
276
+ description="Default time limit per research loop (minutes)",
277
+ )
278
+ default_iterations_limit: int = Field(
279
+ default=10,
280
+ ge=1,
281
+ le=50,
282
+ description="Default iterations limit per research loop",
283
+ )
284
+ ```
285
+
286
+ ### RAG Service Configuration
287
+
288
+ Configure the Retrieval-Augmented Generation service:
289
+
290
+ ```bash
291
+ # ChromaDB collection name for RAG
292
+ RAG_COLLECTION_NAME=deepcritical_evidence
293
+
294
+ # Number of top results to retrieve from RAG (1-50, default: 5)
295
+ RAG_SIMILARITY_TOP_K=5
296
+
297
+ # Automatically ingest evidence into RAG
298
+ RAG_AUTO_INGEST=true
299
+ ```
300
+
301
+ The RAG configuration:
302
+
303
+ ```127:141:src/utils/config.py
304
+ # RAG Service Configuration
305
+ rag_collection_name: str = Field(
306
+ default="deepcritical_evidence",
307
+ description="ChromaDB collection name for RAG",
308
+ )
309
+ rag_similarity_top_k: int = Field(
310
+ default=5,
311
+ ge=1,
312
+ le=50,
313
+ description="Number of top results to retrieve from RAG",
314
+ )
315
+ rag_auto_ingest: bool = Field(
316
+ default=True,
317
+ description="Automatically ingest evidence into RAG",
318
+ )
319
+ ```
320
+
321
+ ### ChromaDB Configuration
322
+
323
+ Configure the vector database for embeddings and RAG:
324
+
325
+ ```bash
326
+ # ChromaDB storage path
327
+ CHROMA_DB_PATH=./chroma_db
328
+
329
+ # Whether to persist ChromaDB to disk
330
+ CHROMA_DB_PERSIST=true
331
+
332
+ # ChromaDB server host (for remote ChromaDB, optional)
333
+ CHROMA_DB_HOST=localhost
334
+
335
+ # ChromaDB server port (for remote ChromaDB, optional)
336
+ CHROMA_DB_PORT=8000
337
+ ```
338
+
339
+ The ChromaDB configuration:
340
+
341
+ ```113:125:src/utils/config.py
342
+ chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")
343
+ chroma_db_persist: bool = Field(
344
+ default=True,
345
+ description="Whether to persist ChromaDB to disk",
346
+ )
347
+ chroma_db_host: str | None = Field(
348
+ default=None,
349
+ description="ChromaDB server host (for remote ChromaDB)",
350
+ )
351
+ chroma_db_port: int | None = Field(
352
+ default=None,
353
+ description="ChromaDB server port (for remote ChromaDB)",
354
+ )
355
+ ```
356
+
357
+ ### External Services
358
+
359
+ #### Modal Configuration
360
+
361
+ Modal is used for secure sandbox execution of statistical analysis:
362
+
363
+ ```bash
364
+ # Modal Token ID (for Modal sandbox execution)
365
+ MODAL_TOKEN_ID=your_modal_token_id_here
366
+
367
+ # Modal Token Secret
368
+ MODAL_TOKEN_SECRET=your_modal_token_secret_here
369
+ ```
370
+
371
+ The Modal configuration:
372
+
373
+ ```110:112:src/utils/config.py
374
+ # External Services
375
+ modal_token_id: str | None = Field(default=None, description="Modal token ID")
376
+ modal_token_secret: str | None = Field(default=None, description="Modal token secret")
377
+ ```
378
+
379
+ ### Logging Configuration
380
+
381
+ Configure structured logging:
382
+
383
+ ```bash
384
+ # Log Level: "DEBUG", "INFO", "WARNING", or "ERROR"
385
+ LOG_LEVEL=INFO
386
+ ```
387
+
388
+ The logging configuration:
389
+
390
+ ```107:108:src/utils/config.py
391
+ # Logging
392
+ log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"
393
+ ```
394
+
395
+ Logging is configured via the `configure_logging()` function:
396
+
397
+ ```212:231:src/utils/config.py
398
+ def configure_logging(settings: Settings) -> None:
399
+ """Configure structured logging with the configured log level."""
400
+ # Set stdlib logging level from settings
401
+ logging.basicConfig(
402
+ level=getattr(logging, settings.log_level),
403
+ format="%(message)s",
404
+ )
405
+
406
+ structlog.configure(
407
+ processors=[
408
+ structlog.stdlib.filter_by_level,
409
+ structlog.stdlib.add_logger_name,
410
+ structlog.stdlib.add_log_level,
411
+ structlog.processors.TimeStamper(fmt="iso"),
412
+ structlog.processors.JSONRenderer(),
413
+ ],
414
+ wrapper_class=structlog.stdlib.BoundLogger,
415
+ context_class=dict,
416
+ logger_factory=structlog.stdlib.LoggerFactory(),
417
+ )
418
+ ```
419
+
420
+ ## Configuration Properties
421
+
422
+ The `Settings` class provides helpful properties for checking configuration state:
423
+
424
+ ### API Key Availability
425
+
426
+ Check which API keys are available:
427
+
428
+ ```171:189:src/utils/config.py
429
+ @property
430
+ def has_openai_key(self) -> bool:
431
+ """Check if OpenAI API key is available."""
432
+ return bool(self.openai_api_key)
433
+
434
+ @property
435
+ def has_anthropic_key(self) -> bool:
436
+ """Check if Anthropic API key is available."""
437
+ return bool(self.anthropic_api_key)
438
+
439
+ @property
440
+ def has_huggingface_key(self) -> bool:
441
+ """Check if HuggingFace API key is available."""
442
+ return bool(self.huggingface_api_key or self.hf_token)
443
+
444
+ @property
445
+ def has_any_llm_key(self) -> bool:
446
+ """Check if any LLM API key is available."""
447
+ return self.has_openai_key or self.has_anthropic_key or self.has_huggingface_key
448
+ ```
449
+
450
+ **Usage:**
451
+
452
+ ```python
453
+ from src.utils.config import settings
454
+
455
+ # Check API key availability
456
+ if settings.has_openai_key:
457
+ # Use OpenAI
458
+ pass
459
+
460
+ if settings.has_anthropic_key:
461
+ # Use Anthropic
462
+ pass
463
+
464
+ if settings.has_huggingface_key:
465
+ # Use HuggingFace
466
+ pass
467
+
468
+ if settings.has_any_llm_key:
469
+ # At least one LLM is available
470
+ pass
471
+ ```
472
+
473
+ ### Service Availability
474
+
475
+ Check if external services are configured:
476
+
477
+ ```143:146:src/utils/config.py
478
+ @property
479
+ def modal_available(self) -> bool:
480
+ """Check if Modal credentials are configured."""
481
+ return bool(self.modal_token_id and self.modal_token_secret)
482
+ ```
483
+
484
+ ```191:204:src/utils/config.py
485
+ @property
486
+ def web_search_available(self) -> bool:
487
+ """Check if web search is available (either no-key provider or API key present)."""
488
+ if self.web_search_provider == "duckduckgo":
489
+ return True # No API key required
490
+ if self.web_search_provider == "serper":
491
+ return bool(self.serper_api_key)
492
+ if self.web_search_provider == "searchxng":
493
+ return bool(self.searchxng_host)
494
+ if self.web_search_provider == "brave":
495
+ return bool(self.brave_api_key)
496
+ if self.web_search_provider == "tavily":
497
+ return bool(self.tavily_api_key)
498
+ return False
499
+ ```
500
+
501
+ **Usage:**
502
+
503
+ ```python
504
+ from src.utils.config import settings
505
+
506
+ # Check service availability
507
+ if settings.modal_available:
508
+ # Use Modal sandbox
509
+ pass
510
+
511
+ if settings.web_search_available:
512
+ # Web search is configured
513
+ pass
514
+ ```
515
+
516
+ ### API Key Retrieval
517
+
518
+ Get the API key for the configured provider:
519
+
520
+ ```148:160:src/utils/config.py
521
+ def get_api_key(self) -> str:
522
+ """Get the API key for the configured provider."""
523
+ if self.llm_provider == "openai":
524
+ if not self.openai_api_key:
525
+ raise ConfigurationError("OPENAI_API_KEY not set")
526
+ return self.openai_api_key
527
+
528
+ if self.llm_provider == "anthropic":
529
+ if not self.anthropic_api_key:
530
+ raise ConfigurationError("ANTHROPIC_API_KEY not set")
531
+ return self.anthropic_api_key
532
+
533
+ raise ConfigurationError(f"Unknown LLM provider: {self.llm_provider}")
534
+ ```
535
+
536
+ For OpenAI-specific operations (e.g., Magentic mode):
537
+
538
+ ```162:169:src/utils/config.py
539
+ def get_openai_api_key(self) -> str:
540
+ """Get OpenAI API key (required for Magentic function calling)."""
541
+ if not self.openai_api_key:
542
+ raise ConfigurationError(
543
+ "OPENAI_API_KEY not set. Magentic mode requires OpenAI for function calling. "
544
+ "Use mode='simple' for other providers."
545
+ )
546
+ return self.openai_api_key
547
+ ```
548
+
549
+ ## Configuration Usage in Codebase
550
+
551
+ The configuration system is used throughout the codebase:
552
+
553
+ ### LLM Factory
554
+
555
+ The LLM factory uses settings to create appropriate models:
556
+
557
+ ```129:144:src/utils/llm_factory.py
558
+ if settings.llm_provider == "huggingface":
559
+ model_name = settings.huggingface_model or "meta-llama/Llama-3.1-8B-Instruct"
560
+ hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
561
+ return HuggingFaceModel(model_name, provider=hf_provider)
562
+
563
+ if settings.llm_provider == "openai":
564
+ if not settings.openai_api_key:
565
+ raise ConfigurationError("OPENAI_API_KEY not set for pydantic-ai")
566
+ provider = OpenAIProvider(api_key=settings.openai_api_key)
567
+ return OpenAIModel(settings.openai_model, provider=provider)
568
+
569
+ if settings.llm_provider == "anthropic":
570
+ if not settings.anthropic_api_key:
571
+ raise ConfigurationError("ANTHROPIC_API_KEY not set for pydantic-ai")
572
+ anthropic_provider = AnthropicProvider(api_key=settings.anthropic_api_key)
573
+ return AnthropicModel(settings.anthropic_model, provider=anthropic_provider)
574
+ ```
575
+
576
+ ### Embedding Service
577
+
578
+ The embedding service uses local embedding model configuration:
579
+
580
+ ```29:31:src/services/embeddings.py
581
+ def __init__(self, model_name: str | None = None):
582
+ self._model_name = model_name or settings.local_embedding_model
583
+ self._model = SentenceTransformer(self._model_name)
584
+ ```
585
+
586
+ ### Orchestrator Factory
587
+
588
+ The orchestrator factory uses settings to determine mode:
589
+
590
+ ```69:80:src/orchestrator_factory.py
591
+ def _determine_mode(explicit_mode: str | None) -> str:
592
+ """Determine which mode to use."""
593
+ if explicit_mode:
594
+ if explicit_mode in ("magentic", "advanced"):
595
+ return "advanced"
596
+ return "simple"
597
+
598
+ # Auto-detect: advanced if paid API key available
599
+ if settings.has_openai_key:
600
+ return "advanced"
601
+
602
+ return "simple"
603
+ ```
604
+
605
+ ## Environment Variables Reference
606
+
607
+ ### Required (at least one LLM)
608
+
609
+ - `OPENAI_API_KEY` - OpenAI API key (required for OpenAI provider)
610
+ - `ANTHROPIC_API_KEY` - Anthropic API key (required for Anthropic provider)
611
+ - `HF_TOKEN` or `HUGGINGFACE_API_KEY` - HuggingFace API token (optional, can work without for public models)
612
+
613
+ #### LLM Configuration Variables
614
+
615
+ - `LLM_PROVIDER` - Provider to use: `"openai"`, `"anthropic"`, or `"huggingface"` (default: `"huggingface"`)
616
+ - `OPENAI_MODEL` - OpenAI model name (default: `"gpt-5.1"`)
617
+ - `ANTHROPIC_MODEL` - Anthropic model name (default: `"claude-sonnet-4-5-20250929"`)
618
+ - `HUGGINGFACE_MODEL` - HuggingFace model ID (default: `"meta-llama/Llama-3.1-8B-Instruct"`)
619
+
620
+ #### Embedding Configuration Variables
621
+
622
+ - `EMBEDDING_PROVIDER` - Provider: `"openai"`, `"local"`, or `"huggingface"` (default: `"local"`)
623
+ - `OPENAI_EMBEDDING_MODEL` - OpenAI embedding model (default: `"text-embedding-3-small"`)
624
+ - `LOCAL_EMBEDDING_MODEL` - Local sentence-transformers model (default: `"all-MiniLM-L6-v2"`)
625
+ - `HUGGINGFACE_EMBEDDING_MODEL` - HuggingFace embedding model (default: `"sentence-transformers/all-MiniLM-L6-v2"`)
626
+
627
+ #### Web Search Configuration Variables
628
+
629
+ - `WEB_SEARCH_PROVIDER` - Provider: `"serper"`, `"searchxng"`, `"brave"`, `"tavily"`, or `"duckduckgo"` (default: `"duckduckgo"`)
630
+ - `SERPER_API_KEY` - Serper API key (required for Serper provider)
631
+ - `SEARCHXNG_HOST` - SearchXNG host URL (required for SearchXNG provider)
632
+ - `BRAVE_API_KEY` - Brave Search API key (required for Brave provider)
633
+ - `TAVILY_API_KEY` - Tavily API key (required for Tavily provider)
634
+
635
+ #### PubMed Configuration Variables
636
+
637
+ - `NCBI_API_KEY` - NCBI API key (optional, increases rate limit from 3 to 10 req/sec)
638
+
639
+ #### Agent Configuration Variables
640
+
641
+ - `MAX_ITERATIONS` - Maximum iterations per research loop (1-50, default: `10`)
642
+ - `SEARCH_TIMEOUT` - Search timeout in seconds (default: `30`)
643
+ - `USE_GRAPH_EXECUTION` - Use graph-based execution (default: `false`)
644
+
645
+ #### Budget Configuration Variables
646
+
647
+ - `DEFAULT_TOKEN_LIMIT` - Default token budget per research loop (1000-1000000, default: `100000`)
648
+ - `DEFAULT_TIME_LIMIT_MINUTES` - Default time limit in minutes (1-120, default: `10`)
649
+ - `DEFAULT_ITERATIONS_LIMIT` - Default iterations limit (1-50, default: `10`)
650
+
651
+ #### RAG Configuration Variables
652
+
653
+ - `RAG_COLLECTION_NAME` - ChromaDB collection name (default: `"deepcritical_evidence"`)
654
+ - `RAG_SIMILARITY_TOP_K` - Number of top results to retrieve (1-50, default: `5`)
655
+ - `RAG_AUTO_INGEST` - Automatically ingest evidence into RAG (default: `true`)
656
+
657
+ #### ChromaDB Configuration Variables
658
+
659
+ - `CHROMA_DB_PATH` - ChromaDB storage path (default: `"./chroma_db"`)
660
+ - `CHROMA_DB_PERSIST` - Whether to persist ChromaDB to disk (default: `true`)
661
+ - `CHROMA_DB_HOST` - ChromaDB server host (optional, for remote ChromaDB)
662
+ - `CHROMA_DB_PORT` - ChromaDB server port (optional, for remote ChromaDB)
663
+
664
+ #### External Services Variables
665
+
666
+ - `MODAL_TOKEN_ID` - Modal token ID (optional, for Modal sandbox execution)
667
+ - `MODAL_TOKEN_SECRET` - Modal token secret (optional, for Modal sandbox execution)
668
+
669
+ #### Logging Configuration Variables
670
+
671
+ - `LOG_LEVEL` - Log level: `"DEBUG"`, `"INFO"`, `"WARNING"`, or `"ERROR"` (default: `"INFO"`)
672
+
673
+ ## Validation
674
+
675
+ Settings are validated on load using Pydantic validation:
676
+
677
+ - **Type Checking**: All fields are strongly typed
678
+ - **Range Validation**: Numeric fields have min/max constraints (e.g., `ge=1, le=50` for `max_iterations`)
679
+ - **Literal Validation**: Enum fields only accept specific values (e.g., `Literal["openai", "anthropic", "huggingface"]`)
680
+ - **Required Fields**: API keys are checked when accessed via `get_api_key()` or `get_openai_api_key()`
681
+
682
+ ### Validation Examples
683
+
684
+ The `max_iterations` field has range validation:
685
+
686
+ ```81:81:src/utils/config.py
687
+ max_iterations: int = Field(default=10, ge=1, le=50)
688
+ ```
689
+
690
+ The `llm_provider` field has literal validation:
691
+
692
+ ```26:28:src/utils/config.py
693
+ llm_provider: Literal["openai", "anthropic", "huggingface"] = Field(
694
+ default="openai", description="Which LLM provider to use"
695
+ )
696
+ ```
697
+
698
+ ## Error Handling
699
+
700
+ Configuration errors raise `ConfigurationError` from `src/utils/exceptions.py`:
701
+
702
+ ```22:25:src/utils/exceptions.py
703
+ class ConfigurationError(DeepCriticalError):
704
+ """Raised when configuration is invalid."""
705
+
706
+ pass
707
+ ```
708
+
709
+ ### Error Handling Example
710
+
711
+ ```python
712
+ from src.utils.config import settings
713
+ from src.utils.exceptions import ConfigurationError
714
+
715
+ try:
716
+ api_key = settings.get_api_key()
717
+ except ConfigurationError as e:
718
+ print(f"Configuration error: {e}")
719
+ ```
720
+
721
+ ### Common Configuration Errors
722
+
723
+ 1. **Missing API Key**: When `get_api_key()` is called but the required API key is not set
724
+ 2. **Invalid Provider**: When `llm_provider` is set to an unsupported value
725
+ 3. **Out of Range**: When numeric values exceed their min/max constraints
726
+ 4. **Invalid Literal**: When enum fields receive unsupported values
727
+
728
+ ## Configuration Best Practices
729
+
730
+ 1. **Use `.env` File**: Store sensitive keys in `.env` file (add to `.gitignore`)
731
+ 2. **Check Availability**: Use properties like `has_openai_key` before accessing API keys
732
+ 3. **Handle Errors**: Always catch `ConfigurationError` when calling `get_api_key()`
733
+ 4. **Validate Early**: Configuration is validated on import, so errors surface immediately
734
+ 5. **Use Defaults**: Leverage sensible defaults for optional configuration
735
+
736
+ ## Future Enhancements
737
+
738
+ The following configurations are planned for future phases:
739
+
740
+ 1. **Additional LLM Providers**: DeepSeek, OpenRouter, Gemini, Perplexity, Azure OpenAI, Local models
741
+ 2. **Model Selection**: Reasoning/main/fast model configuration
742
+ 3. **Service Integration**: Additional service integrations and configurations
743
+
docs/configuration/index.md ADDED
@@ -0,0 +1,746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration Guide
2
+
3
+ ## Overview
4
+
5
+ DeepCritical uses **Pydantic Settings** for centralized configuration management. All settings are defined in the `Settings` class in `src/utils/config.py` and can be configured via environment variables or a `.env` file.
6
+
7
+ The configuration system provides:
8
+
9
+ - **Type Safety**: Strongly-typed fields with Pydantic validation
10
+ - **Environment File Support**: Automatically loads from `.env` file (if present)
11
+ - **Case-Insensitive**: Environment variables are case-insensitive
12
+ - **Singleton Pattern**: Global `settings` instance for easy access throughout the codebase
13
+ - **Validation**: Automatic validation on load with helpful error messages
14
+
15
+ ## Quick Start
16
+
17
+ 1. Create a `.env` file in the project root
18
+ 2. Set at least one LLM API key (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `HF_TOKEN`)
19
+ 3. Optionally configure other services as needed
20
+ 4. The application will automatically load and validate your configuration
21
+
22
+ ## Configuration System Architecture
23
+
24
+ ### Settings Class
25
+
26
+ The [`Settings`][settings-class] class extends `BaseSettings` from `pydantic_settings` and defines all application configuration:
27
+
28
+ ```13:21:src/utils/config.py
29
+ class Settings(BaseSettings):
30
+ """Strongly-typed application settings."""
31
+
32
+ model_config = SettingsConfigDict(
33
+ env_file=".env",
34
+ env_file_encoding="utf-8",
35
+ case_sensitive=False,
36
+ extra="ignore",
37
+ )
38
+ ```
39
+
40
+ [View source](https://github.com/DeepCritical/GradioDemo/blob/main/src/utils/config.py#L13-L21)
41
+
42
+ ### Singleton Instance
43
+
44
+ A global `settings` instance is available for import:
45
+
46
+ ```234:235:src/utils/config.py
47
+ # Singleton for easy import
48
+ settings = get_settings()
49
+ ```
50
+
51
+ [View source](https://github.com/DeepCritical/GradioDemo/blob/main/src/utils/config.py#L234-L235)
52
+
53
+ ### Usage Pattern
54
+
55
+ Access configuration throughout the codebase:
56
+
57
+ ```python
58
+ from src.utils.config import settings
59
+
60
+ # Check if API keys are available
61
+ if settings.has_openai_key:
62
+ # Use OpenAI
63
+ pass
64
+
65
+ # Access configuration values
66
+ max_iterations = settings.max_iterations
67
+ web_search_provider = settings.web_search_provider
68
+ ```
69
+
70
+ ## Required Configuration
71
+
72
+ ### LLM Provider
73
+
74
+ You must configure at least one LLM provider. The system supports:
75
+
76
+ - **OpenAI**: Requires `OPENAI_API_KEY`
77
+ - **Anthropic**: Requires `ANTHROPIC_API_KEY`
78
+ - **HuggingFace**: Optional `HF_TOKEN` or `HUGGINGFACE_API_KEY` (can work without key for public models)
79
+
80
+ #### OpenAI Configuration
81
+
82
+ ```bash
83
+ LLM_PROVIDER=openai
84
+ OPENAI_API_KEY=your_openai_api_key_here
85
+ OPENAI_MODEL=gpt-5.1
86
+ ```
87
+
88
+ The default model is defined in the `Settings` class:
89
+
90
+ ```29:29:src/utils/config.py
91
+ openai_model: str = Field(default="gpt-5.1", description="OpenAI model name")
92
+ ```
93
+
94
+ #### Anthropic Configuration
95
+
96
+ ```bash
97
+ LLM_PROVIDER=anthropic
98
+ ANTHROPIC_API_KEY=your_anthropic_api_key_here
99
+ ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
100
+ ```
101
+
102
+ The default model is defined in the `Settings` class:
103
+
104
+ ```30:32:src/utils/config.py
105
+ anthropic_model: str = Field(
106
+ default="claude-sonnet-4-5-20250929", description="Anthropic model"
107
+ )
108
+ ```
109
+
110
+ #### HuggingFace Configuration
111
+
112
+ HuggingFace can work without an API key for public models, but an API key provides higher rate limits:
113
+
114
+ ```bash
115
+ # Option 1: Using HF_TOKEN (preferred)
116
+ HF_TOKEN=your_huggingface_token_here
117
+
118
+ # Option 2: Using HUGGINGFACE_API_KEY (alternative)
119
+ HUGGINGFACE_API_KEY=your_huggingface_api_key_here
120
+
121
+ # Default model
122
+ HUGGINGFACE_MODEL=meta-llama/Llama-3.1-8B-Instruct
123
+ ```
124
+
125
+ The HuggingFace token can be set via either environment variable:
126
+
127
+ ```33:35:src/utils/config.py
128
+ hf_token: str | None = Field(
129
+ default=None, alias="HF_TOKEN", description="HuggingFace API token"
130
+ )
131
+ ```
132
+
133
+ ```57:59:src/utils/config.py
134
+ huggingface_api_key: str | None = Field(
135
+ default=None, description="HuggingFace API token (HF_TOKEN or HUGGINGFACE_API_KEY)"
136
+ )
137
+ ```
138
+
139
+ ## Optional Configuration
140
+
141
+ ### Embedding Configuration
142
+
143
+ DeepCritical supports multiple embedding providers for semantic search and RAG:
144
+
145
+ ```bash
146
+ # Embedding Provider: "openai", "local", or "huggingface"
147
+ EMBEDDING_PROVIDER=local
148
+
149
+ # OpenAI Embedding Model (used by LlamaIndex RAG)
150
+ OPENAI_EMBEDDING_MODEL=text-embedding-3-small
151
+
152
+ # Local Embedding Model (sentence-transformers, used by EmbeddingService)
153
+ LOCAL_EMBEDDING_MODEL=all-MiniLM-L6-v2
154
+
155
+ # HuggingFace Embedding Model
156
+ HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
157
+ ```
158
+
159
+ The embedding provider configuration:
160
+
161
+ ```47:50:src/utils/config.py
162
+ embedding_provider: Literal["openai", "local", "huggingface"] = Field(
163
+ default="local",
164
+ description="Embedding provider to use",
165
+ )
166
+ ```
167
+
168
+ **Note**: OpenAI embeddings require `OPENAI_API_KEY`. The local provider (default) uses sentence-transformers and requires no API key.
169
+
170
+ ### Web Search Configuration
171
+
172
+ DeepCritical supports multiple web search providers:
173
+
174
+ ```bash
175
+ # Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
176
+ # Default: "duckduckgo" (no API key required)
177
+ WEB_SEARCH_PROVIDER=duckduckgo
178
+
179
+ # Serper API Key (for Google search via Serper)
180
+ SERPER_API_KEY=your_serper_api_key_here
181
+
182
+ # SearchXNG Host URL (for self-hosted search)
183
+ SEARCHXNG_HOST=http://localhost:8080
184
+
185
+ # Brave Search API Key
186
+ BRAVE_API_KEY=your_brave_api_key_here
187
+
188
+ # Tavily API Key
189
+ TAVILY_API_KEY=your_tavily_api_key_here
190
+ ```
191
+
192
+ The web search provider configuration:
193
+
194
+ ```71:74:src/utils/config.py
195
+ web_search_provider: Literal["serper", "searchxng", "brave", "tavily", "duckduckgo"] = Field(
196
+ default="duckduckgo",
197
+ description="Web search provider to use",
198
+ )
199
+ ```
200
+
201
+ **Note**: DuckDuckGo is the default and requires no API key, making it ideal for development and testing.
202
+
203
+ ### PubMed Configuration
204
+
205
+ PubMed search supports optional NCBI API key for higher rate limits:
206
+
207
+ ```bash
208
+ # NCBI API Key (optional, for higher rate limits: 10 req/sec vs 3 req/sec)
209
+ NCBI_API_KEY=your_ncbi_api_key_here
210
+ ```
211
+
212
+ The PubMed tool uses this configuration:
213
+
214
+ ```22:29:src/tools/pubmed.py
215
+ def __init__(self, api_key: str | None = None) -> None:
216
+ self.api_key = api_key or settings.ncbi_api_key
217
+ # Ignore placeholder values from .env.example
218
+ if self.api_key == "your-ncbi-key-here":
219
+ self.api_key = None
220
+
221
+ # Use shared rate limiter
222
+ self._limiter = get_pubmed_limiter(self.api_key)
223
+ ```
224
+
225
+ ### Agent Configuration
226
+
227
+ Control agent behavior and research loop execution:
228
+
229
+ ```bash
230
+ # Maximum iterations per research loop (1-50, default: 10)
231
+ MAX_ITERATIONS=10
232
+
233
+ # Search timeout in seconds
234
+ SEARCH_TIMEOUT=30
235
+
236
+ # Use graph-based execution for research flows
237
+ USE_GRAPH_EXECUTION=false
238
+ ```
239
+
240
+ The agent configuration fields:
241
+
242
+ ```80:85:src/utils/config.py
243
+ # Agent Configuration
244
+ max_iterations: int = Field(default=10, ge=1, le=50)
245
+ search_timeout: int = Field(default=30, description="Seconds to wait for search")
246
+ use_graph_execution: bool = Field(
247
+ default=False, description="Use graph-based execution for research flows"
248
+ )
249
+ ```
250
+
251
+ ### Budget & Rate Limiting Configuration
252
+
253
+ Control resource limits for research loops:
254
+
255
+ ```bash
256
+ # Default token budget per research loop (1000-1000000, default: 100000)
257
+ DEFAULT_TOKEN_LIMIT=100000
258
+
259
+ # Default time limit per research loop in minutes (1-120, default: 10)
260
+ DEFAULT_TIME_LIMIT_MINUTES=10
261
+
262
+ # Default iterations limit per research loop (1-50, default: 10)
263
+ DEFAULT_ITERATIONS_LIMIT=10
264
+ ```
265
+
266
+ The budget configuration with validation:
267
+
268
+ ```87:105:src/utils/config.py
269
+ # Budget & Rate Limiting Configuration
270
+ default_token_limit: int = Field(
271
+ default=100000,
272
+ ge=1000,
273
+ le=1000000,
274
+ description="Default token budget per research loop",
275
+ )
276
+ default_time_limit_minutes: int = Field(
277
+ default=10,
278
+ ge=1,
279
+ le=120,
280
+ description="Default time limit per research loop (minutes)",
281
+ )
282
+ default_iterations_limit: int = Field(
283
+ default=10,
284
+ ge=1,
285
+ le=50,
286
+ description="Default iterations limit per research loop",
287
+ )
288
+ ```
289
+
290
+ ### RAG Service Configuration
291
+
292
+ Configure the Retrieval-Augmented Generation service:
293
+
294
+ ```bash
295
+ # ChromaDB collection name for RAG
296
+ RAG_COLLECTION_NAME=deepcritical_evidence
297
+
298
+ # Number of top results to retrieve from RAG (1-50, default: 5)
299
+ RAG_SIMILARITY_TOP_K=5
300
+
301
+ # Automatically ingest evidence into RAG
302
+ RAG_AUTO_INGEST=true
303
+ ```
304
+
305
+ The RAG configuration:
306
+
307
+ ```127:141:src/utils/config.py
308
+ # RAG Service Configuration
309
+ rag_collection_name: str = Field(
310
+ default="deepcritical_evidence",
311
+ description="ChromaDB collection name for RAG",
312
+ )
313
+ rag_similarity_top_k: int = Field(
314
+ default=5,
315
+ ge=1,
316
+ le=50,
317
+ description="Number of top results to retrieve from RAG",
318
+ )
319
+ rag_auto_ingest: bool = Field(
320
+ default=True,
321
+ description="Automatically ingest evidence into RAG",
322
+ )
323
+ ```
324
+
325
+ ### ChromaDB Configuration
326
+
327
+ Configure the vector database for embeddings and RAG:
328
+
329
+ ```bash
330
+ # ChromaDB storage path
331
+ CHROMA_DB_PATH=./chroma_db
332
+
333
+ # Whether to persist ChromaDB to disk
334
+ CHROMA_DB_PERSIST=true
335
+
336
+ # ChromaDB server host (for remote ChromaDB, optional)
337
+ CHROMA_DB_HOST=localhost
338
+
339
+ # ChromaDB server port (for remote ChromaDB, optional)
340
+ CHROMA_DB_PORT=8000
341
+ ```
342
+
343
+ The ChromaDB configuration:
344
+
345
+ ```113:125:src/utils/config.py
346
+ chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")
347
+ chroma_db_persist: bool = Field(
348
+ default=True,
349
+ description="Whether to persist ChromaDB to disk",
350
+ )
351
+ chroma_db_host: str | None = Field(
352
+ default=None,
353
+ description="ChromaDB server host (for remote ChromaDB)",
354
+ )
355
+ chroma_db_port: int | None = Field(
356
+ default=None,
357
+ description="ChromaDB server port (for remote ChromaDB)",
358
+ )
359
+ ```
360
+
361
+ ### External Services
362
+
363
+ #### Modal Configuration
364
+
365
+ Modal is used for secure sandbox execution of statistical analysis:
366
+
367
+ ```bash
368
+ # Modal Token ID (for Modal sandbox execution)
369
+ MODAL_TOKEN_ID=your_modal_token_id_here
370
+
371
+ # Modal Token Secret
372
+ MODAL_TOKEN_SECRET=your_modal_token_secret_here
373
+ ```
374
+
375
+ The Modal configuration:
376
+
377
+ ```110:112:src/utils/config.py
378
+ # External Services
379
+ modal_token_id: str | None = Field(default=None, description="Modal token ID")
380
+ modal_token_secret: str | None = Field(default=None, description="Modal token secret")
381
+ ```
382
+
383
+ ### Logging Configuration
384
+
385
+ Configure structured logging:
386
+
387
+ ```bash
388
+ # Log Level: "DEBUG", "INFO", "WARNING", or "ERROR"
389
+ LOG_LEVEL=INFO
390
+ ```
391
+
392
+ The logging configuration:
393
+
394
+ ```107:108:src/utils/config.py
395
+ # Logging
396
+ log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"
397
+ ```
398
+
399
+ Logging is configured via the `configure_logging()` function:
400
+
401
+ ```212:231:src/utils/config.py
402
+ def configure_logging(settings: Settings) -> None:
403
+ """Configure structured logging with the configured log level."""
404
+ # Set stdlib logging level from settings
405
+ logging.basicConfig(
406
+ level=getattr(logging, settings.log_level),
407
+ format="%(message)s",
408
+ )
409
+
410
+ structlog.configure(
411
+ processors=[
412
+ structlog.stdlib.filter_by_level,
413
+ structlog.stdlib.add_logger_name,
414
+ structlog.stdlib.add_log_level,
415
+ structlog.processors.TimeStamper(fmt="iso"),
416
+ structlog.processors.JSONRenderer(),
417
+ ],
418
+ wrapper_class=structlog.stdlib.BoundLogger,
419
+ context_class=dict,
420
+ logger_factory=structlog.stdlib.LoggerFactory(),
421
+ )
422
+ ```
423
+
424
+ ## Configuration Properties
425
+
426
+ The `Settings` class provides helpful properties for checking configuration state:
427
+
428
+ ### API Key Availability
429
+
430
+ Check which API keys are available:
431
+
432
+ ```171:189:src/utils/config.py
433
+ @property
434
+ def has_openai_key(self) -> bool:
435
+ """Check if OpenAI API key is available."""
436
+ return bool(self.openai_api_key)
437
+
438
+ @property
439
+ def has_anthropic_key(self) -> bool:
440
+ """Check if Anthropic API key is available."""
441
+ return bool(self.anthropic_api_key)
442
+
443
+ @property
444
+ def has_huggingface_key(self) -> bool:
445
+ """Check if HuggingFace API key is available."""
446
+ return bool(self.huggingface_api_key or self.hf_token)
447
+
448
+ @property
449
+ def has_any_llm_key(self) -> bool:
450
+ """Check if any LLM API key is available."""
451
+ return self.has_openai_key or self.has_anthropic_key or self.has_huggingface_key
452
+ ```
453
+
454
+ **Usage:**
455
+
456
+ ```python
457
+ from src.utils.config import settings
458
+
459
+ # Check API key availability
460
+ if settings.has_openai_key:
461
+ # Use OpenAI
462
+ pass
463
+
464
+ if settings.has_anthropic_key:
465
+ # Use Anthropic
466
+ pass
467
+
468
+ if settings.has_huggingface_key:
469
+ # Use HuggingFace
470
+ pass
471
+
472
+ if settings.has_any_llm_key:
473
+ # At least one LLM is available
474
+ pass
475
+ ```
476
+
477
+ ### Service Availability
478
+
479
+ Check if external services are configured:
480
+
481
+ ```143:146:src/utils/config.py
482
+ @property
483
+ def modal_available(self) -> bool:
484
+ """Check if Modal credentials are configured."""
485
+ return bool(self.modal_token_id and self.modal_token_secret)
486
+ ```
487
+
488
+ ```191:204:src/utils/config.py
489
+ @property
490
+ def web_search_available(self) -> bool:
491
+ """Check if web search is available (either no-key provider or API key present)."""
492
+ if self.web_search_provider == "duckduckgo":
493
+ return True # No API key required
494
+ if self.web_search_provider == "serper":
495
+ return bool(self.serper_api_key)
496
+ if self.web_search_provider == "searchxng":
497
+ return bool(self.searchxng_host)
498
+ if self.web_search_provider == "brave":
499
+ return bool(self.brave_api_key)
500
+ if self.web_search_provider == "tavily":
501
+ return bool(self.tavily_api_key)
502
+ return False
503
+ ```
504
+
505
+ **Usage:**
506
+
507
+ ```python
508
+ from src.utils.config import settings
509
+
510
+ # Check service availability
511
+ if settings.modal_available:
512
+ # Use Modal sandbox
513
+ pass
514
+
515
+ if settings.web_search_available:
516
+ # Web search is configured
517
+ pass
518
+ ```
519
+
520
+ ### API Key Retrieval
521
+
522
+ Get the API key for the configured provider:
523
+
524
+ ```148:160:src/utils/config.py
525
+ def get_api_key(self) -> str:
526
+ """Get the API key for the configured provider."""
527
+ if self.llm_provider == "openai":
528
+ if not self.openai_api_key:
529
+ raise ConfigurationError("OPENAI_API_KEY not set")
530
+ return self.openai_api_key
531
+
532
+ if self.llm_provider == "anthropic":
533
+ if not self.anthropic_api_key:
534
+ raise ConfigurationError("ANTHROPIC_API_KEY not set")
535
+ return self.anthropic_api_key
536
+
537
+ raise ConfigurationError(f"Unknown LLM provider: {self.llm_provider}")
538
+ ```
539
+
540
+ For OpenAI-specific operations (e.g., Magentic mode):
541
+
542
+ ```162:169:src/utils/config.py
543
+ def get_openai_api_key(self) -> str:
544
+ """Get OpenAI API key (required for Magentic function calling)."""
545
+ if not self.openai_api_key:
546
+ raise ConfigurationError(
547
+ "OPENAI_API_KEY not set. Magentic mode requires OpenAI for function calling. "
548
+ "Use mode='simple' for other providers."
549
+ )
550
+ return self.openai_api_key
551
+ ```
552
+
553
+ ## Configuration Usage in Codebase
554
+
555
+ The configuration system is used throughout the codebase:
556
+
557
+ ### LLM Factory
558
+
559
+ The LLM factory uses settings to create appropriate models:
560
+
561
+ ```129:144:src/utils/llm_factory.py
562
+ if settings.llm_provider == "huggingface":
563
+ model_name = settings.huggingface_model or "meta-llama/Llama-3.1-8B-Instruct"
564
+ hf_provider = HuggingFaceProvider(api_key=settings.hf_token)
565
+ return HuggingFaceModel(model_name, provider=hf_provider)
566
+
567
+ if settings.llm_provider == "openai":
568
+ if not settings.openai_api_key:
569
+ raise ConfigurationError("OPENAI_API_KEY not set for pydantic-ai")
570
+ provider = OpenAIProvider(api_key=settings.openai_api_key)
571
+ return OpenAIModel(settings.openai_model, provider=provider)
572
+
573
+ if settings.llm_provider == "anthropic":
574
+ if not settings.anthropic_api_key:
575
+ raise ConfigurationError("ANTHROPIC_API_KEY not set for pydantic-ai")
576
+ anthropic_provider = AnthropicProvider(api_key=settings.anthropic_api_key)
577
+ return AnthropicModel(settings.anthropic_model, provider=anthropic_provider)
578
+ ```
579
+
580
+ ### Embedding Service
581
+
582
+ The embedding service uses local embedding model configuration:
583
+
584
+ ```29:31:src/services/embeddings.py
585
+ def __init__(self, model_name: str | None = None):
586
+ self._model_name = model_name or settings.local_embedding_model
587
+ self._model = SentenceTransformer(self._model_name)
588
+ ```
589
+
590
+ ### Orchestrator Factory
591
+
592
+ The orchestrator factory uses settings to determine mode:
593
+
594
+ ```69:80:src/orchestrator_factory.py
595
+ def _determine_mode(explicit_mode: str | None) -> str:
596
+ """Determine which mode to use."""
597
+ if explicit_mode:
598
+ if explicit_mode in ("magentic", "advanced"):
599
+ return "advanced"
600
+ return "simple"
601
+
602
+ # Auto-detect: advanced if paid API key available
603
+ if settings.has_openai_key:
604
+ return "advanced"
605
+
606
+ return "simple"
607
+ ```
608
+
609
+ ## Environment Variables Reference
610
+
611
+ ### Required (at least one LLM)
612
+
613
+ - `OPENAI_API_KEY` - OpenAI API key (required for OpenAI provider)
614
+ - `ANTHROPIC_API_KEY` - Anthropic API key (required for Anthropic provider)
615
+ - `HF_TOKEN` or `HUGGINGFACE_API_KEY` - HuggingFace API token (optional, can work without for public models)
616
+
617
+ #### LLM Configuration Variables
618
+
619
+ - `LLM_PROVIDER` - Provider to use: `"openai"`, `"anthropic"`, or `"huggingface"` (default: `"huggingface"`)
620
+ - `OPENAI_MODEL` - OpenAI model name (default: `"gpt-5.1"`)
621
+ - `ANTHROPIC_MODEL` - Anthropic model name (default: `"claude-sonnet-4-5-20250929"`)
622
+ - `HUGGINGFACE_MODEL` - HuggingFace model ID (default: `"meta-llama/Llama-3.1-8B-Instruct"`)
623
+
624
+ #### Embedding Configuration Variables
625
+
626
+ - `EMBEDDING_PROVIDER` - Provider: `"openai"`, `"local"`, or `"huggingface"` (default: `"local"`)
627
+ - `OPENAI_EMBEDDING_MODEL` - OpenAI embedding model (default: `"text-embedding-3-small"`)
628
+ - `LOCAL_EMBEDDING_MODEL` - Local sentence-transformers model (default: `"all-MiniLM-L6-v2"`)
629
+ - `HUGGINGFACE_EMBEDDING_MODEL` - HuggingFace embedding model (default: `"sentence-transformers/all-MiniLM-L6-v2"`)
630
+
631
+ #### Web Search Configuration Variables
632
+
633
+ - `WEB_SEARCH_PROVIDER` - Provider: `"serper"`, `"searchxng"`, `"brave"`, `"tavily"`, or `"duckduckgo"` (default: `"duckduckgo"`)
634
+ - `SERPER_API_KEY` - Serper API key (required for Serper provider)
635
+ - `SEARCHXNG_HOST` - SearchXNG host URL (required for SearchXNG provider)
636
+ - `BRAVE_API_KEY` - Brave Search API key (required for Brave provider)
637
+ - `TAVILY_API_KEY` - Tavily API key (required for Tavily provider)
638
+
639
+ #### PubMed Configuration Variables
640
+
641
+ - `NCBI_API_KEY` - NCBI API key (optional, increases rate limit from 3 to 10 req/sec)
642
+
643
+ #### Agent Configuration Variables
644
+
645
+ - `MAX_ITERATIONS` - Maximum iterations per research loop (1-50, default: `10`)
646
+ - `SEARCH_TIMEOUT` - Search timeout in seconds (default: `30`)
647
+ - `USE_GRAPH_EXECUTION` - Use graph-based execution (default: `false`)
648
+
649
+ #### Budget Configuration Variables
650
+
651
+ - `DEFAULT_TOKEN_LIMIT` - Default token budget per research loop (1000-1000000, default: `100000`)
652
+ - `DEFAULT_TIME_LIMIT_MINUTES` - Default time limit in minutes (1-120, default: `10`)
653
+ - `DEFAULT_ITERATIONS_LIMIT` - Default iterations limit (1-50, default: `10`)
654
+
655
+ #### RAG Configuration Variables
656
+
657
+ - `RAG_COLLECTION_NAME` - ChromaDB collection name (default: `"deepcritical_evidence"`)
658
+ - `RAG_SIMILARITY_TOP_K` - Number of top results to retrieve (1-50, default: `5`)
659
+ - `RAG_AUTO_INGEST` - Automatically ingest evidence into RAG (default: `true`)
660
+
661
+ #### ChromaDB Configuration Variables
662
+
663
+ - `CHROMA_DB_PATH` - ChromaDB storage path (default: `"./chroma_db"`)
664
+ - `CHROMA_DB_PERSIST` - Whether to persist ChromaDB to disk (default: `true`)
665
+ - `CHROMA_DB_HOST` - ChromaDB server host (optional, for remote ChromaDB)
666
+ - `CHROMA_DB_PORT` - ChromaDB server port (optional, for remote ChromaDB)
667
+
668
+ #### External Services Variables
669
+
670
+ - `MODAL_TOKEN_ID` - Modal token ID (optional, for Modal sandbox execution)
671
+ - `MODAL_TOKEN_SECRET` - Modal token secret (optional, for Modal sandbox execution)
672
+
673
+ #### Logging Configuration Variables
674
+
675
+ - `LOG_LEVEL` - Log level: `"DEBUG"`, `"INFO"`, `"WARNING"`, or `"ERROR"` (default: `"INFO"`)
676
+
677
+ ## Validation
678
+
679
+ Settings are validated on load using Pydantic validation:
680
+
681
+ - **Type Checking**: All fields are strongly typed
682
+ - **Range Validation**: Numeric fields have min/max constraints (e.g., `ge=1, le=50` for `max_iterations`)
683
+ - **Literal Validation**: Enum fields only accept specific values (e.g., `Literal["openai", "anthropic", "huggingface"]`)
684
+ - **Required Fields**: API keys are checked when accessed via `get_api_key()` or `get_openai_api_key()`
685
+
686
+ ### Validation Examples
687
+
688
+ The `max_iterations` field has range validation:
689
+
690
+ ```81:81:src/utils/config.py
691
+ max_iterations: int = Field(default=10, ge=1, le=50)
692
+ ```
693
+
694
+ The `llm_provider` field has literal validation:
695
+
696
+ ```26:28:src/utils/config.py
697
+ llm_provider: Literal["openai", "anthropic", "huggingface"] = Field(
698
+ default="openai", description="Which LLM provider to use"
699
+ )
700
+ ```
701
+
702
+ ## Error Handling
703
+
704
+ Configuration errors raise `ConfigurationError` from `src/utils/exceptions.py`:
705
+
706
+ ```22:25:src/utils/exceptions.py
707
+ class ConfigurationError(DeepCriticalError):
708
+ """Raised when configuration is invalid."""
709
+
710
+ pass
711
+ ```
712
+
713
+ ### Error Handling Example
714
+
715
+ ```python
716
+ from src.utils.config import settings
717
+ from src.utils.exceptions import ConfigurationError
718
+
719
+ try:
720
+ api_key = settings.get_api_key()
721
+ except ConfigurationError as e:
722
+ print(f"Configuration error: {e}")
723
+ ```
724
+
725
+ ### Common Configuration Errors
726
+
727
+ 1. **Missing API Key**: When `get_api_key()` is called but the required API key is not set
728
+ 2. **Invalid Provider**: When `llm_provider` is set to an unsupported value
729
+ 3. **Out of Range**: When numeric values exceed their min/max constraints
730
+ 4. **Invalid Literal**: When enum fields receive unsupported values
731
+
732
+ ## Configuration Best Practices
733
+
734
+ 1. **Use `.env` File**: Store sensitive keys in `.env` file (add to `.gitignore`)
735
+ 2. **Check Availability**: Use properties like `has_openai_key` before accessing API keys
736
+ 3. **Handle Errors**: Always catch `ConfigurationError` when calling `get_api_key()`
737
+ 4. **Validate Early**: Configuration is validated on import, so errors surface immediately
738
+ 5. **Use Defaults**: Leverage sensible defaults for optional configuration
739
+
740
+ ## Future Enhancements
741
+
742
+ The following configurations are planned for future phases:
743
+
744
+ 1. **Additional LLM Providers**: DeepSeek, OpenRouter, Gemini, Perplexity, Azure OpenAI, Local models
745
+ 2. **Model Selection**: Reasoning/main/fast model configuration
746
+ 3. **Service Integration**: Additional service integrations and configurations
CONTRIBUTING.md → docs/contributing.md RENAMED
@@ -1,26 +1,24 @@
1
- # Contributing to The DETERMINATOR
2
 
3
- Thank you for your interest in contributing to The DETERMINATOR! This guide will help you get started.
4
 
5
  ## Table of Contents
6
 
7
  - [Git Workflow](#git-workflow)
8
  - [Getting Started](#getting-started)
9
  - [Development Commands](#development-commands)
 
 
 
 
 
 
 
10
  - [MCP Integration](#mcp-integration)
11
  - [Common Pitfalls](#common-pitfalls)
12
  - [Key Principles](#key-principles)
13
  - [Pull Request Process](#pull-request-process)
14
 
15
- > **Note**: Additional sections (Code Style, Error Handling, Testing, Implementation Patterns, Code Quality, and Prompt Engineering) are available as separate pages in the [documentation](https://deepcritical.github.io/GradioDemo/contributing/).
16
- > **Note on Project Names**: "The DETERMINATOR" is the product name, "DeepCritical" is the organization/project name, and "determinator" is the Python package name.
17
-
18
- ## Repository Information
19
-
20
- - **GitHub Repository**: [`DeepCritical/GradioDemo`](https://github.com/DeepCritical/GradioDemo) (source of truth, PRs, code review)
21
- - **HuggingFace Space**: [`DataQuests/DeepCritical`](https://huggingface.co/spaces/DataQuests/DeepCritical) (deployment/demo)
22
- - **Package Name**: `determinator` (Python package name in `pyproject.toml`)
23
-
24
  ## Git Workflow
25
 
26
  - `main`: Production-ready (GitHub)
@@ -29,31 +27,9 @@ Thank you for your interest in contributing to The DETERMINATOR! This guide will
29
  - **NEVER** push directly to `main` or `dev` on HuggingFace
30
  - GitHub is source of truth; HuggingFace is for deployment
31
 
32
- ### Dual Repository Setup
33
-
34
- This project uses a dual repository setup:
35
-
36
- - **GitHub (`DeepCritical/GradioDemo`)**: Source of truth for code, PRs, and code review
37
- - **HuggingFace (`DataQuests/DeepCritical`)**: Deployment target for the Gradio demo
38
-
39
- #### Remote Configuration
40
-
41
- When cloning, set up remotes as follows:
42
-
43
- ```bash
44
- # Clone from GitHub
45
- git clone https://github.com/DeepCritical/GradioDemo.git
46
- cd GradioDemo
47
-
48
- # Add HuggingFace remote (optional, for deployment)
49
- git remote add huggingface-upstream https://huggingface.co/spaces/DataQuests/DeepCritical
50
- ```
51
-
52
- **Important**: Never push directly to `main` or `dev` on HuggingFace. Always work through GitHub PRs. GitHub is the source of truth; HuggingFace is for deployment/demo only.
53
-
54
  ## Getting Started
55
 
56
- 1. **Fork the repository** on GitHub: [`DeepCritical/GradioDemo`](https://github.com/DeepCritical/GradioDemo)
57
  2. **Clone your fork**:
58
 
59
  ```bash
@@ -64,8 +40,7 @@ git remote add huggingface-upstream https://huggingface.co/spaces/DataQuests/Dee
64
  3. **Install dependencies**:
65
 
66
  ```bash
67
- uv sync --all-extras
68
- uv run pre-commit install
69
  ```
70
 
71
  4. **Create a feature branch**:
@@ -78,9 +53,7 @@ git remote add huggingface-upstream https://huggingface.co/spaces/DataQuests/Dee
78
  6. **Run checks**:
79
 
80
  ```bash
81
- uv run ruff check src tests
82
- uv run mypy src
83
- uv run pytest --cov=src --cov-report=term-missing tests/unit/ -v -m "not openai" -p no:logfire
84
  ```
85
 
86
  7. **Commit and push**:
@@ -89,72 +62,22 @@ git remote add huggingface-upstream https://huggingface.co/spaces/DataQuests/Dee
89
  git commit -m "Description of changes"
90
  git push origin yourname-feature-name
91
  ```
92
-
93
  8. **Create a pull request** on GitHub
94
 
95
- ## Package Manager
96
-
97
- This project uses [`uv`](https://github.com/astral-sh/uv) as the package manager. All commands should be prefixed with `uv run` to ensure they run in the correct environment.
98
-
99
- ### Installation
100
-
101
- ```bash
102
- # Install uv if you haven't already (recommended: standalone installer)
103
- # Unix/macOS/Linux:
104
- curl -LsSf https://astral.sh/uv/install.sh | sh
105
-
106
- # Windows (PowerShell):
107
- powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
108
-
109
- # Alternative: pipx install uv
110
- # Or: pip install uv
111
-
112
- # Sync all dependencies including dev extras
113
- uv sync --all-extras
114
-
115
- # Install pre-commit hooks
116
- uv run pre-commit install
117
- ```
118
-
119
  ## Development Commands
120
 
121
  ```bash
122
- # Installation
123
- uv sync --all-extras # Install all dependencies including dev
124
- uv run pre-commit install # Install pre-commit hooks
125
-
126
- # Code Quality Checks (run all before committing)
127
- uv run ruff check src tests # Lint with ruff
128
- uv run ruff format src tests # Format with ruff
129
- uv run mypy src # Type checking
130
- uv run pytest --cov=src --cov-report=term-missing tests/unit/ -v -m "not openai" -p no:logfire # Tests with coverage
131
-
132
- # Testing Commands
133
- uv run pytest tests/unit/ -v -m "not openai" -p no:logfire # Run unit tests (excludes OpenAI tests)
134
- uv run pytest tests/ -v -m "huggingface" -p no:logfire # Run HuggingFace tests
135
- uv run pytest tests/ -v -p no:logfire # Run all tests
136
- uv run pytest --cov=src --cov-report=term-missing tests/unit/ -v -m "not openai" -p no:logfire # Tests with terminal coverage
137
- uv run pytest --cov=src --cov-report=html -p no:logfire # Generate HTML coverage report (opens htmlcov/index.html)
138
-
139
- # Documentation Commands
140
- uv run mkdocs build # Build documentation
141
- uv run mkdocs serve # Serve documentation locally (http://127.0.0.1:8000)
142
  ```
143
 
144
- ### Test Markers
145
-
146
- The project uses pytest markers to categorize tests. See [Testing Guidelines](docs/contributing/testing.md) for details:
147
-
148
- - `unit`: Unit tests (mocked, fast)
149
- - `integration`: Integration tests (real APIs)
150
- - `slow`: Slow tests
151
- - `openai`: Tests requiring OpenAI API key
152
- - `huggingface`: Tests requiring HuggingFace API key
153
- - `embedding_provider`: Tests requiring API-based embedding providers
154
- - `local_embeddings`: Tests using local embeddings
155
-
156
- **Note**: The `-p no:logfire` flag disables the logfire plugin to avoid conflicts during testing.
157
-
158
  ## Code Style & Conventions
159
 
160
  ### Type Safety
@@ -163,9 +86,11 @@ The project uses pytest markers to categorize tests. See [Testing Guidelines](do
163
  - Use `mypy --strict` compliance (no `Any` unless absolutely necessary)
164
  - Use `TYPE_CHECKING` imports for circular dependencies:
165
 
166
- <!--codeinclude-->
167
- [TYPE_CHECKING Import Pattern](../src/utils/citation_validator.py) start_line:8 end_line:11
168
- <!--/codeinclude-->
 
 
169
 
170
  ### Pydantic Models
171
 
@@ -200,10 +125,10 @@ result = await loop.run_in_executor(None, cpu_bound_function, args)
200
 
201
  ### Pre-commit
202
 
203
- - Pre-commit hooks run automatically on commit
204
  - Must pass: lint + typecheck + test-cov
205
- - Install hooks with: `uv run pre-commit install`
206
- - Note: `uv sync --all-extras` installs the pre-commit package, but you must run `uv run pre-commit install` separately to set up the git hooks
207
 
208
  ## Error Handling & Logging
209
 
@@ -211,9 +136,10 @@ result = await loop.run_in_executor(None, cpu_bound_function, args)
211
 
212
  Use custom exception hierarchy (`src/utils/exceptions.py`):
213
 
214
- <!--codeinclude-->
215
- [Exception Hierarchy](../src/utils/exceptions.py) start_line:4 end_line:31
216
- <!--/codeinclude-->
 
217
 
218
  ### Error Handling Rules
219
 
@@ -273,7 +199,7 @@ except httpx.HTTPError as e:
273
  1. Write failing test in `tests/unit/`
274
  2. Implement in `src/`
275
  3. Ensure test passes
276
- 4. Run checks: `uv run ruff check src tests && uv run mypy src && uv run pytest --cov=src --cov-report=term-missing tests/unit/ -v -m "not openai" -p no:logfire`
277
 
278
  ### Test Examples
279
 
@@ -294,8 +220,7 @@ async def test_real_pubmed_search():
294
 
295
  ### Test Coverage
296
 
297
- - Run `uv run pytest --cov=src --cov-report=term-missing tests/unit/ -v -m "not openai" -p no:logfire` for coverage report
298
- - Run `uv run pytest --cov=src --cov-report=html -p no:logfire` for HTML coverage report (opens `htmlcov/index.html`)
299
  - Aim for >80% coverage on critical paths
300
  - Exclude: `__init__.py`, `TYPE_CHECKING` blocks
301
 
@@ -339,9 +264,11 @@ class MySearchTool:
339
  - Lazy initialization for optional dependencies (e.g., embeddings, Modal)
340
  - Check requirements before initialization:
341
 
342
- <!--codeinclude-->
343
- [Check Magentic Requirements](../src/utils/llm_factory.py) start_line:152 end_line:170
344
- <!--/codeinclude-->
 
 
345
 
346
  ### State Management
347
 
@@ -353,9 +280,11 @@ class MySearchTool:
353
 
354
  Use `@lru_cache(maxsize=1)` for singletons:
355
 
356
- <!--codeinclude-->
357
- [Singleton Pattern Example](../src/services/statistical_analyzer.py) start_line:252 end_line:255
358
- <!--/codeinclude-->
 
 
359
 
360
  - Lazy initialization to avoid requiring dependencies at import time
361
 
@@ -369,9 +298,22 @@ Use `@lru_cache(maxsize=1)` for singletons:
369
 
370
  Example:
371
 
372
- <!--codeinclude-->
373
- [Search Method Docstring Example](../src/tools/pubmed.py) start_line:51 end_line:58
374
- <!--/codeinclude-->
 
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
  ### Code Comments
377
 
@@ -468,7 +410,7 @@ Example:
468
 
469
  ## Pull Request Process
470
 
471
- 1. Ensure all checks pass: `uv run ruff check src tests && uv run mypy src && uv run pytest --cov=src --cov-report=term-missing tests/unit/ -v -m "not openai" -p no:logfire`
472
  2. Update documentation if needed
473
  3. Add tests for new features
474
  4. Update CHANGELOG if applicable
@@ -476,19 +418,11 @@ Example:
476
  6. Address review feedback
477
  7. Wait for approval before merging
478
 
479
- ## Project Structure
480
-
481
- - `src/`: Main source code
482
- - `tests/`: Test files (`unit/` and `integration/`)
483
- - `docs/`: Documentation source files (MkDocs)
484
- - `examples/`: Example usage scripts
485
- - `pyproject.toml`: Project configuration and dependencies
486
- - `.pre-commit-config.yaml`: Pre-commit hook configuration
487
-
488
  ## Questions?
489
 
490
- - Open an issue on [GitHub](https://github.com/DeepCritical/GradioDemo)
491
- - Check existing [documentation](https://deepcritical.github.io/GradioDemo/)
492
  - Review code examples in the codebase
493
 
494
- Thank you for contributing to The DETERMINATOR!
 
 
1
+ # Contributing to DeepCritical
2
 
3
+ Thank you for your interest in contributing to DeepCritical! This guide will help you get started.
4
 
5
  ## Table of Contents
6
 
7
  - [Git Workflow](#git-workflow)
8
  - [Getting Started](#getting-started)
9
  - [Development Commands](#development-commands)
10
+ - [Code Style & Conventions](#code-style--conventions)
11
+ - [Type Safety](#type-safety)
12
+ - [Error Handling & Logging](#error-handling--logging)
13
+ - [Testing Requirements](#testing-requirements)
14
+ - [Implementation Patterns](#implementation-patterns)
15
+ - [Code Quality & Documentation](#code-quality--documentation)
16
+ - [Prompt Engineering & Citation Validation](#prompt-engineering--citation-validation)
17
  - [MCP Integration](#mcp-integration)
18
  - [Common Pitfalls](#common-pitfalls)
19
  - [Key Principles](#key-principles)
20
  - [Pull Request Process](#pull-request-process)
21
 
 
 
 
 
 
 
 
 
 
22
  ## Git Workflow
23
 
24
  - `main`: Production-ready (GitHub)
 
27
  - **NEVER** push directly to `main` or `dev` on HuggingFace
28
  - GitHub is source of truth; HuggingFace is for deployment
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  ## Getting Started
31
 
32
+ 1. **Fork the repository** on GitHub
33
  2. **Clone your fork**:
34
 
35
  ```bash
 
40
  3. **Install dependencies**:
41
 
42
  ```bash
43
+ make install
 
44
  ```
45
 
46
  4. **Create a feature branch**:
 
53
  6. **Run checks**:
54
 
55
  ```bash
56
+ make check
 
 
57
  ```
58
 
59
  7. **Commit and push**:
 
62
  git commit -m "Description of changes"
63
  git push origin yourname-feature-name
64
  ```
 
65
  8. **Create a pull request** on GitHub
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  ## Development Commands
68
 
69
  ```bash
70
+ make install # Install dependencies + pre-commit
71
+ make check # Lint + typecheck + test (MUST PASS)
72
+ make test # Run unit tests
73
+ make lint # Run ruff
74
+ make format # Format with ruff
75
+ make typecheck # Run mypy
76
+ make test-cov # Test with coverage
77
+ make docs-build # Build documentation
78
+ make docs-serve # Serve documentation locally
 
 
 
 
 
 
 
 
 
 
 
79
  ```
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  ## Code Style & Conventions
82
 
83
  ### Type Safety
 
86
  - Use `mypy --strict` compliance (no `Any` unless absolutely necessary)
87
  - Use `TYPE_CHECKING` imports for circular dependencies:
88
 
89
+ ```python
90
+ from typing import TYPE_CHECKING
91
+ if TYPE_CHECKING:
92
+ from src.services.embeddings import EmbeddingService
93
+ ```
94
 
95
  ### Pydantic Models
96
 
 
125
 
126
  ### Pre-commit
127
 
128
+ - Run `make check` before committing
129
  - Must pass: lint + typecheck + test-cov
130
+ - Pre-commit hooks installed via `make install`
131
+ - **CRITICAL**: Make sure you run the full pre-commit checks before opening a PR (not draft), otherwise Obstacle is the Way will lose his mind
132
 
133
  ## Error Handling & Logging
134
 
 
136
 
137
  Use custom exception hierarchy (`src/utils/exceptions.py`):
138
 
139
+ - `DeepCriticalError` (base)
140
+ - `SearchError` `RateLimitError`
141
+ - `JudgeError`
142
+ - `ConfigurationError`
143
 
144
  ### Error Handling Rules
145
 
 
199
  1. Write failing test in `tests/unit/`
200
  2. Implement in `src/`
201
  3. Ensure test passes
202
+ 4. Run `make check` (lint + typecheck + test)
203
 
204
  ### Test Examples
205
 
 
220
 
221
  ### Test Coverage
222
 
223
+ - Run `make test-cov` for coverage report
 
224
  - Aim for >80% coverage on critical paths
225
  - Exclude: `__init__.py`, `TYPE_CHECKING` blocks
226
 
 
264
  - Lazy initialization for optional dependencies (e.g., embeddings, Modal)
265
  - Check requirements before initialization:
266
 
267
+ ```python
268
+ def check_magentic_requirements() -> None:
269
+ if not settings.has_openai_key:
270
+ raise ConfigurationError("Magentic requires OpenAI")
271
+ ```
272
 
273
  ### State Management
274
 
 
280
 
281
  Use `@lru_cache(maxsize=1)` for singletons:
282
 
283
+ ```python
284
+ @lru_cache(maxsize=1)
285
+ def get_embedding_service() -> EmbeddingService:
286
+ return EmbeddingService()
287
+ ```
288
 
289
  - Lazy initialization to avoid requiring dependencies at import time
290
 
 
298
 
299
  Example:
300
 
301
+ ```python
302
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
303
+ """Search PubMed and return evidence.
304
+
305
+ Args:
306
+ query: The search query string
307
+ max_results: Maximum number of results to return
308
+
309
+ Returns:
310
+ List of Evidence objects
311
+
312
+ Raises:
313
+ SearchError: If the search fails
314
+ RateLimitError: If we hit rate limits
315
+ """
316
+ ```
317
 
318
  ### Code Comments
319
 
 
410
 
411
  ## Pull Request Process
412
 
413
+ 1. Ensure all checks pass: `make check`
414
  2. Update documentation if needed
415
  3. Add tests for new features
416
  4. Update CHANGELOG if applicable
 
418
  6. Address review feedback
419
  7. Wait for approval before merging
420
 
 
 
 
 
 
 
 
 
 
421
  ## Questions?
422
 
423
+ - Open an issue on GitHub
424
+ - Check existing documentation
425
  - Review code examples in the codebase
426
 
427
+ Thank you for contributing to DeepCritical!
428
+
docs/contributing/code-quality.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code Quality & Documentation
2
+
3
+ This document outlines code quality standards and documentation requirements.
4
+
5
+ ## Linting
6
+
7
+ - Ruff with 100-char line length
8
+ - Ignore rules documented in `pyproject.toml`:
9
+ - `PLR0913`: Too many arguments (agents need many params)
10
+ - `PLR0912`: Too many branches (complex orchestrator logic)
11
+ - `PLR0911`: Too many return statements (complex agent logic)
12
+ - `PLR2004`: Magic values (statistical constants)
13
+ - `PLW0603`: Global statement (singleton pattern)
14
+ - `PLC0415`: Lazy imports for optional dependencies
15
+
16
+ ## Type Checking
17
+
18
+ - `mypy --strict` compliance
19
+ - `ignore_missing_imports = true` (for optional dependencies)
20
+ - Exclude: `reference_repos/`, `examples/`
21
+ - All functions must have complete type annotations
22
+
23
+ ## Pre-commit
24
+
25
+ - Run `make check` before committing
26
+ - Must pass: lint + typecheck + test-cov
27
+ - Pre-commit hooks installed via `make install`
28
+
29
+ ## Documentation
30
+
31
+ ### Docstrings
32
+
33
+ - Google-style docstrings for all public functions
34
+ - Include Args, Returns, Raises sections
35
+ - Use type hints in docstrings only if needed for clarity
36
+
37
+ Example:
38
+
39
+ ```python
40
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
41
+ """Search PubMed and return evidence.
42
+
43
+ Args:
44
+ query: The search query string
45
+ max_results: Maximum number of results to return
46
+
47
+ Returns:
48
+ List of Evidence objects
49
+
50
+ Raises:
51
+ SearchError: If the search fails
52
+ RateLimitError: If we hit rate limits
53
+ """
54
+ ```
55
+
56
+ ### Code Comments
57
+
58
+ - Explain WHY, not WHAT
59
+ - Document non-obvious patterns (e.g., why `requests` not `httpx` for ClinicalTrials)
60
+ - Mark critical sections: `# CRITICAL: ...`
61
+ - Document rate limiting rationale
62
+ - Explain async patterns when non-obvious
63
+
64
+ ## See Also
65
+
66
+ - [Code Style](code-style.md) - Code style guidelines
67
+ - [Testing](testing.md) - Testing guidelines
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
docs/contributing/code-style.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code Style & Conventions
2
+
3
+ This document outlines the code style and conventions for DeepCritical.
4
+
5
+ ## Type Safety
6
+
7
+ - **ALWAYS** use type hints for all function parameters and return types
8
+ - Use `mypy --strict` compliance (no `Any` unless absolutely necessary)
9
+ - Use `TYPE_CHECKING` imports for circular dependencies:
10
+
11
+ ```python
12
+ from typing import TYPE_CHECKING
13
+ if TYPE_CHECKING:
14
+ from src.services.embeddings import EmbeddingService
15
+ ```
16
+
17
+ ## Pydantic Models
18
+
19
+ - All data exchange uses Pydantic models (`src/utils/models.py`)
20
+ - Models are frozen (`model_config = {"frozen": True}`) for immutability
21
+ - Use `Field()` with descriptions for all model fields
22
+ - Validate with `ge=`, `le=`, `min_length=`, `max_length=` constraints
23
+
24
+ ## Async Patterns
25
+
26
+ - **ALL** I/O operations must be async (`async def`, `await`)
27
+ - Use `asyncio.gather()` for parallel operations
28
+ - CPU-bound work (embeddings, parsing) must use `run_in_executor()`:
29
+
30
+ ```python
31
+ loop = asyncio.get_running_loop()
32
+ result = await loop.run_in_executor(None, cpu_bound_function, args)
33
+ ```
34
+
35
+ - Never block the event loop with synchronous I/O
36
+
37
+ ## Common Pitfalls
38
+
39
+ 1. **Blocking the event loop**: Never use sync I/O in async functions
40
+ 2. **Missing type hints**: All functions must have complete type annotations
41
+ 3. **Global mutable state**: Use ContextVar or pass via parameters
42
+ 4. **Import errors**: Lazy-load optional dependencies (magentic, modal, embeddings)
43
+
44
+ ## See Also
45
+
46
+ - [Error Handling](error-handling.md) - Error handling guidelines
47
+ - [Implementation Patterns](implementation-patterns.md) - Common patterns
48
+
49
+
50
+
51
+
52
+
53
+
54
+
55
+
56
+
57
+
docs/contributing/error-handling.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Error Handling & Logging
2
+
3
+ This document outlines error handling and logging conventions for DeepCritical.
4
+
5
+ ## Exception Hierarchy
6
+
7
+ Use custom exception hierarchy (`src/utils/exceptions.py`):
8
+
9
+ - `DeepCriticalError` (base)
10
+ - `SearchError` → `RateLimitError`
11
+ - `JudgeError`
12
+ - `ConfigurationError`
13
+
14
+ ## Error Handling Rules
15
+
16
+ - Always chain exceptions: `raise SearchError(...) from e`
17
+ - Log errors with context using `structlog`:
18
+
19
+ ```python
20
+ logger.error("Operation failed", error=str(e), context=value)
21
+ ```
22
+
23
+ - Never silently swallow exceptions
24
+ - Provide actionable error messages
25
+
26
+ ## Logging
27
+
28
+ - Use `structlog` for all logging (NOT `print` or `logging`)
29
+ - Import: `import structlog; logger = structlog.get_logger()`
30
+ - Log with structured data: `logger.info("event", key=value)`
31
+ - Use appropriate levels: DEBUG, INFO, WARNING, ERROR
32
+
33
+ ## Logging Examples
34
+
35
+ ```python
36
+ logger.info("Starting search", query=query, tools=[t.name for t in tools])
37
+ logger.warning("Search tool failed", tool=tool.name, error=str(result))
38
+ logger.error("Assessment failed", error=str(e))
39
+ ```
40
+
41
+ ## Error Chaining
42
+
43
+ Always preserve exception context:
44
+
45
+ ```python
46
+ try:
47
+ result = await api_call()
48
+ except httpx.HTTPError as e:
49
+ raise SearchError(f"API call failed: {e}") from e
50
+ ```
51
+
52
+ ## See Also
53
+
54
+ - [Code Style](code-style.md) - Code style guidelines
55
+ - [Testing](testing.md) - Testing guidelines
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
docs/contributing/implementation-patterns.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Implementation Patterns
2
+
3
+ This document outlines common implementation patterns used in DeepCritical.
4
+
5
+ ## Search Tools
6
+
7
+ All tools implement `SearchTool` protocol (`src/tools/base.py`):
8
+
9
+ - Must have `name` property
10
+ - Must implement `async def search(query, max_results) -> list[Evidence]`
11
+ - Use `@retry` decorator from tenacity for resilience
12
+ - Rate limiting: Implement `_rate_limit()` for APIs with limits (e.g., PubMed)
13
+ - Error handling: Raise `SearchError` or `RateLimitError` on failures
14
+
15
+ Example pattern:
16
+
17
+ ```python
18
+ class MySearchTool:
19
+ @property
20
+ def name(self) -> str:
21
+ return "mytool"
22
+
23
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(...))
24
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
25
+ # Implementation
26
+ return evidence_list
27
+ ```
28
+
29
+ ## Judge Handlers
30
+
31
+ - Implement `JudgeHandlerProtocol` (`async def assess(question, evidence) -> JudgeAssessment`)
32
+ - Use pydantic-ai `Agent` with `output_type=JudgeAssessment`
33
+ - System prompts in `src/prompts/judge.py`
34
+ - Support fallback handlers: `MockJudgeHandler`, `HFInferenceJudgeHandler`
35
+ - Always return valid `JudgeAssessment` (never raise exceptions)
36
+
37
+ ## Agent Factory Pattern
38
+
39
+ - Use factory functions for creating agents (`src/agent_factory/`)
40
+ - Lazy initialization for optional dependencies (e.g., embeddings, Modal)
41
+ - Check requirements before initialization:
42
+
43
+ ```python
44
+ def check_magentic_requirements() -> None:
45
+ if not settings.has_openai_key:
46
+ raise ConfigurationError("Magentic requires OpenAI")
47
+ ```
48
+
49
+ ## State Management
50
+
51
+ - **Magentic Mode**: Use `ContextVar` for thread-safe state (`src/agents/state.py`)
52
+ - **Simple Mode**: Pass state via function parameters
53
+ - Never use global mutable state (except singletons via `@lru_cache`)
54
+
55
+ ## Singleton Pattern
56
+
57
+ Use `@lru_cache(maxsize=1)` for singletons:
58
+
59
+ ```python
60
+ @lru_cache(maxsize=1)
61
+ def get_embedding_service() -> EmbeddingService:
62
+ return EmbeddingService()
63
+ ```
64
+
65
+ - Lazy initialization to avoid requiring dependencies at import time
66
+
67
+ ## See Also
68
+
69
+ - [Code Style](code-style.md) - Code style guidelines
70
+ - [Error Handling](error-handling.md) - Error handling guidelines
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
docs/contributing/index.md ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to DeepCritical
2
+
3
+ Thank you for your interest in contributing to DeepCritical! This guide will help you get started.
4
+
5
+ ## Git Workflow
6
+
7
+ - `main`: Production-ready (GitHub)
8
+ - `dev`: Development integration (GitHub)
9
+ - Use feature branches: `yourname-dev`
10
+ - **NEVER** push directly to `main` or `dev` on HuggingFace
11
+ - GitHub is source of truth; HuggingFace is for deployment
12
+
13
+ ## Development Commands
14
+
15
+ ```bash
16
+ make install # Install dependencies + pre-commit
17
+ make check # Lint + typecheck + test (MUST PASS)
18
+ make test # Run unit tests
19
+ make lint # Run ruff
20
+ make format # Format with ruff
21
+ make typecheck # Run mypy
22
+ make test-cov # Test with coverage
23
+ ```
24
+
25
+ ## Getting Started
26
+
27
+ 1. **Fork the repository** on GitHub
28
+ 2. **Clone your fork**:
29
+ ```bash
30
+ git clone https://github.com/yourusername/GradioDemo.git
31
+ cd GradioDemo
32
+ ```
33
+ 3. **Install dependencies**:
34
+ ```bash
35
+ make install
36
+ ```
37
+ 4. **Create a feature branch**:
38
+ ```bash
39
+ git checkout -b yourname-feature-name
40
+ ```
41
+ 5. **Make your changes** following the guidelines below
42
+ 6. **Run checks**:
43
+ ```bash
44
+ make check
45
+ ```
46
+ 7. **Commit and push**:
47
+ ```bash
48
+ git commit -m "Description of changes"
49
+ git push origin yourname-feature-name
50
+ ```
51
+ 8. **Create a pull request** on GitHub
52
+
53
+ ## Development Guidelines
54
+
55
+ ### Code Style
56
+
57
+ - Follow [Code Style Guidelines](code-style.md)
58
+ - All code must pass `mypy --strict`
59
+ - Use `ruff` for linting and formatting
60
+ - Line length: 100 characters
61
+
62
+ ### Error Handling
63
+
64
+ - Follow [Error Handling Guidelines](error-handling.md)
65
+ - Always chain exceptions: `raise SearchError(...) from e`
66
+ - Use structured logging with `structlog`
67
+ - Never silently swallow exceptions
68
+
69
+ ### Testing
70
+
71
+ - Follow [Testing Guidelines](testing.md)
72
+ - Write tests before implementation (TDD)
73
+ - Aim for >80% coverage on critical paths
74
+ - Use markers: `unit`, `integration`, `slow`
75
+
76
+ ### Implementation Patterns
77
+
78
+ - Follow [Implementation Patterns](implementation-patterns.md)
79
+ - Use factory functions for agent/tool creation
80
+ - Implement protocols for extensibility
81
+ - Use singleton pattern with `@lru_cache(maxsize=1)`
82
+
83
+ ### Prompt Engineering
84
+
85
+ - Follow [Prompt Engineering Guidelines](prompt-engineering.md)
86
+ - Always validate citations
87
+ - Use diverse evidence selection
88
+ - Never trust LLM-generated citations without validation
89
+
90
+ ### Code Quality
91
+
92
+ - Follow [Code Quality Guidelines](code-quality.md)
93
+ - Google-style docstrings for all public functions
94
+ - Explain WHY, not WHAT in comments
95
+ - Mark critical sections: `# CRITICAL: ...`
96
+
97
+ ## MCP Integration
98
+
99
+ ### MCP Tools
100
+
101
+ - Functions in `src/mcp_tools.py` for Claude Desktop
102
+ - Full type hints required
103
+ - Google-style docstrings with Args/Returns sections
104
+ - Formatted string returns (markdown)
105
+
106
+ ### Gradio MCP Server
107
+
108
+ - Enable with `mcp_server=True` in `demo.launch()`
109
+ - Endpoint: `/gradio_api/mcp/`
110
+ - Use `ssr_mode=False` to fix hydration issues in HF Spaces
111
+
112
+ ## Common Pitfalls
113
+
114
+ 1. **Blocking the event loop**: Never use sync I/O in async functions
115
+ 2. **Missing type hints**: All functions must have complete type annotations
116
+ 3. **Hallucinated citations**: Always validate references
117
+ 4. **Global mutable state**: Use ContextVar or pass via parameters
118
+ 5. **Import errors**: Lazy-load optional dependencies (magentic, modal, embeddings)
119
+ 6. **Rate limiting**: Always implement for external APIs
120
+ 7. **Error chaining**: Always use `from e` when raising exceptions
121
+
122
+ ## Key Principles
123
+
124
+ 1. **Type Safety First**: All code must pass `mypy --strict`
125
+ 2. **Async Everything**: All I/O must be async
126
+ 3. **Test-Driven**: Write tests before implementation
127
+ 4. **No Hallucinations**: Validate all citations
128
+ 5. **Graceful Degradation**: Support free tier (HF Inference) when no API keys
129
+ 6. **Lazy Loading**: Don't require optional dependencies at import time
130
+ 7. **Structured Logging**: Use structlog, never print()
131
+ 8. **Error Chaining**: Always preserve exception context
132
+
133
+ ## Pull Request Process
134
+
135
+ 1. Ensure all checks pass: `make check`
136
+ 2. Update documentation if needed
137
+ 3. Add tests for new features
138
+ 4. Update CHANGELOG if applicable
139
+ 5. Request review from maintainers
140
+ 6. Address review feedback
141
+ 7. Wait for approval before merging
142
+
143
+ ## Questions?
144
+
145
+ - Open an issue on GitHub
146
+ - Check existing documentation
147
+ - Review code examples in the codebase
148
+
149
+ Thank you for contributing to DeepCritical!
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
docs/contributing/prompt-engineering.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prompt Engineering & Citation Validation
2
+
3
+ This document outlines prompt engineering guidelines and citation validation rules.
4
+
5
+ ## Judge Prompts
6
+
7
+ - System prompt in `src/prompts/judge.py`
8
+ - Format evidence with truncation (1500 chars per item)
9
+ - Handle empty evidence case separately
10
+ - Always request structured JSON output
11
+ - Use `format_user_prompt()` and `format_empty_evidence_prompt()` helpers
12
+
13
+ ## Hypothesis Prompts
14
+
15
+ - Use diverse evidence selection (MMR algorithm)
16
+ - Sentence-aware truncation (`truncate_at_sentence()`)
17
+ - Format: Drug → Target → Pathway → Effect
18
+ - System prompt emphasizes mechanistic reasoning
19
+ - Use `format_hypothesis_prompt()` with embeddings for diversity
20
+
21
+ ## Report Prompts
22
+
23
+ - Include full citation details for validation
24
+ - Use diverse evidence selection (n=20)
25
+ - **CRITICAL**: Emphasize citation validation rules
26
+ - Format hypotheses with support/contradiction counts
27
+ - System prompt includes explicit JSON structure requirements
28
+
29
+ ## Citation Validation
30
+
31
+ - **ALWAYS** validate references before returning reports
32
+ - Use `validate_references()` from `src/utils/citation_validator.py`
33
+ - Remove hallucinated citations (URLs not in evidence)
34
+ - Log warnings for removed citations
35
+ - Never trust LLM-generated citations without validation
36
+
37
+ ## Citation Validation Rules
38
+
39
+ 1. Every reference URL must EXACTLY match a provided evidence URL
40
+ 2. Do NOT invent, fabricate, or hallucinate any references
41
+ 3. Do NOT modify paper titles, authors, dates, or URLs
42
+ 4. If unsure about a citation, OMIT it rather than guess
43
+ 5. Copy URLs exactly as provided - do not create similar-looking URLs
44
+
45
+ ## Evidence Selection
46
+
47
+ - Use `select_diverse_evidence()` for MMR-based selection
48
+ - Balance relevance vs diversity (lambda=0.7 default)
49
+ - Sentence-aware truncation preserves meaning
50
+ - Limit evidence per prompt to avoid context overflow
51
+
52
+ ## See Also
53
+
54
+ - [Code Quality](code-quality.md) - Code quality guidelines
55
+ - [Error Handling](error-handling.md) - Error handling guidelines
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
docs/contributing/testing.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Testing Requirements
2
+
3
+ This document outlines testing requirements and guidelines for DeepCritical.
4
+
5
+ ## Test Structure
6
+
7
+ - Unit tests in `tests/unit/` (mocked, fast)
8
+ - Integration tests in `tests/integration/` (real APIs, marked `@pytest.mark.integration`)
9
+ - Use markers: `unit`, `integration`, `slow`
10
+
11
+ ## Mocking
12
+
13
+ - Use `respx` for httpx mocking
14
+ - Use `pytest-mock` for general mocking
15
+ - Mock LLM calls in unit tests (use `MockJudgeHandler`)
16
+ - Fixtures in `tests/conftest.py`: `mock_httpx_client`, `mock_llm_response`
17
+
18
+ ## TDD Workflow
19
+
20
+ 1. Write failing test in `tests/unit/`
21
+ 2. Implement in `src/`
22
+ 3. Ensure test passes
23
+ 4. Run `make check` (lint + typecheck + test)
24
+
25
+ ## Test Examples
26
+
27
+ ```python
28
+ @pytest.mark.unit
29
+ async def test_pubmed_search(mock_httpx_client):
30
+ tool = PubMedTool()
31
+ results = await tool.search("metformin", max_results=5)
32
+ assert len(results) > 0
33
+ assert all(isinstance(r, Evidence) for r in results)
34
+
35
+ @pytest.mark.integration
36
+ async def test_real_pubmed_search():
37
+ tool = PubMedTool()
38
+ results = await tool.search("metformin", max_results=3)
39
+ assert len(results) <= 3
40
+ ```
41
+
42
+ ## Test Coverage
43
+
44
+ - Run `make test-cov` for coverage report
45
+ - Aim for >80% coverage on critical paths
46
+ - Exclude: `__init__.py`, `TYPE_CHECKING` blocks
47
+
48
+ ## See Also
49
+
50
+ - [Code Style](code-style.md) - Code style guidelines
51
+ - [Implementation Patterns](implementation-patterns.md) - Common patterns
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
docs/getting-started/examples.md ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Examples
2
+
3
+ This page provides examples of using DeepCritical for various research tasks.
4
+
5
+ ## Basic Research Query
6
+
7
+ ### Example 1: Drug Information
8
+
9
+ **Query**:
10
+ ```
11
+ What are the latest treatments for Alzheimer's disease?
12
+ ```
13
+
14
+ **What DeepCritical Does**:
15
+ 1. Searches PubMed for recent papers
16
+ 2. Searches ClinicalTrials.gov for active trials
17
+ 3. Evaluates evidence quality
18
+ 4. Synthesizes findings into a comprehensive report
19
+
20
+ ### Example 2: Clinical Trial Search
21
+
22
+ **Query**:
23
+ ```
24
+ What clinical trials are investigating metformin for cancer prevention?
25
+ ```
26
+
27
+ **What DeepCritical Does**:
28
+ 1. Searches ClinicalTrials.gov for relevant trials
29
+ 2. Searches PubMed for supporting literature
30
+ 3. Provides trial details and status
31
+ 4. Summarizes findings
32
+
33
+ ## Advanced Research Queries
34
+
35
+ ### Example 3: Comprehensive Review
36
+
37
+ **Query**:
38
+ ```
39
+ Review the evidence for using metformin as an anti-aging intervention,
40
+ including clinical trials, mechanisms of action, and safety profile.
41
+ ```
42
+
43
+ **What DeepCritical Does**:
44
+ 1. Uses deep research mode (multi-section)
45
+ 2. Searches multiple sources in parallel
46
+ 3. Generates sections on:
47
+ - Clinical trials
48
+ - Mechanisms of action
49
+ - Safety profile
50
+ 4. Synthesizes comprehensive report
51
+
52
+ ### Example 4: Hypothesis Testing
53
+
54
+ **Query**:
55
+ ```
56
+ Test the hypothesis that regular exercise reduces Alzheimer's disease risk.
57
+ ```
58
+
59
+ **What DeepCritical Does**:
60
+ 1. Generates testable hypotheses
61
+ 2. Searches for supporting/contradicting evidence
62
+ 3. Performs statistical analysis (if Modal configured)
63
+ 4. Provides verdict: SUPPORTED, REFUTED, or INCONCLUSIVE
64
+
65
+ ## MCP Tool Examples
66
+
67
+ ### Using search_pubmed
68
+
69
+ ```
70
+ Search PubMed for "CRISPR gene editing cancer therapy"
71
+ ```
72
+
73
+ ### Using search_clinical_trials
74
+
75
+ ```
76
+ Find active clinical trials for "diabetes type 2 treatment"
77
+ ```
78
+
79
+ ### Using search_all
80
+
81
+ ```
82
+ Search all sources for "COVID-19 vaccine side effects"
83
+ ```
84
+
85
+ ### Using analyze_hypothesis
86
+
87
+ ```
88
+ Analyze whether vitamin D supplementation reduces COVID-19 severity
89
+ ```
90
+
91
+ ## Code Examples
92
+
93
+ ### Python API Usage
94
+
95
+ ```python
96
+ from src.orchestrator_factory import create_orchestrator
97
+ from src.tools.search_handler import SearchHandler
98
+ from src.agent_factory.judges import create_judge_handler
99
+
100
+ # Create orchestrator
101
+ search_handler = SearchHandler()
102
+ judge_handler = create_judge_handler()
103
+ orchestrator = create_orchestrator(
104
+ search_handler=search_handler,
105
+ judge_handler=judge_handler,
106
+ config={},
107
+ mode="advanced"
108
+ )
109
+
110
+ # Run research query
111
+ query = "What are the latest treatments for Alzheimer's disease?"
112
+ async for event in orchestrator.run(query):
113
+ print(f"Event: {event.type} - {event.data}")
114
+ ```
115
+
116
+ ### Gradio UI Integration
117
+
118
+ ```python
119
+ import gradio as gr
120
+ from src.app import create_research_interface
121
+
122
+ # Create interface
123
+ interface = create_research_interface()
124
+
125
+ # Launch
126
+ interface.launch(server_name="0.0.0.0", server_port=7860)
127
+ ```
128
+
129
+ ## Research Patterns
130
+
131
+ ### Iterative Research
132
+
133
+ Single-loop research with search-judge-synthesize cycles:
134
+
135
+ ```python
136
+ from src.orchestrator.research_flow import IterativeResearchFlow
137
+
138
+ flow = IterativeResearchFlow(
139
+ search_handler=search_handler,
140
+ judge_handler=judge_handler,
141
+ use_graph=False
142
+ )
143
+
144
+ async for event in flow.run(query):
145
+ # Handle events
146
+ pass
147
+ ```
148
+
149
+ ### Deep Research
150
+
151
+ Multi-section parallel research:
152
+
153
+ ```python
154
+ from src.orchestrator.research_flow import DeepResearchFlow
155
+
156
+ flow = DeepResearchFlow(
157
+ search_handler=search_handler,
158
+ judge_handler=judge_handler,
159
+ use_graph=True
160
+ )
161
+
162
+ async for event in flow.run(query):
163
+ # Handle events
164
+ pass
165
+ ```
166
+
167
+ ## Configuration Examples
168
+
169
+ ### Basic Configuration
170
+
171
+ ```bash
172
+ # .env file
173
+ LLM_PROVIDER=openai
174
+ OPENAI_API_KEY=your_key_here
175
+ MAX_ITERATIONS=10
176
+ ```
177
+
178
+ ### Advanced Configuration
179
+
180
+ ```bash
181
+ # .env file
182
+ LLM_PROVIDER=anthropic
183
+ ANTHROPIC_API_KEY=your_key_here
184
+ EMBEDDING_PROVIDER=local
185
+ WEB_SEARCH_PROVIDER=duckduckgo
186
+ MAX_ITERATIONS=20
187
+ DEFAULT_TOKEN_LIMIT=200000
188
+ USE_GRAPH_EXECUTION=true
189
+ ```
190
+
191
+ ## Next Steps
192
+
193
+ - Read the [Configuration Guide](../configuration/index.md) for all options
194
+ - Explore the [Architecture Documentation](../architecture/graph-orchestration.md)
195
+ - Check out the [API Reference](../api/agents.md) for programmatic usage
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
docs/getting-started/installation.md ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Installation
2
+
3
+ This guide will help you install and set up DeepCritical on your system.
4
+
5
+ ## Prerequisites
6
+
7
+ - Python 3.11 or higher
8
+ - `uv` package manager (recommended) or `pip`
9
+ - At least one LLM API key (OpenAI, Anthropic, or HuggingFace)
10
+
11
+ ## Installation Steps
12
+
13
+ ### 1. Install uv (Recommended)
14
+
15
+ `uv` is a fast Python package installer and resolver. Install it with:
16
+
17
+ ```bash
18
+ pip install uv
19
+ ```
20
+
21
+ ### 2. Clone the Repository
22
+
23
+ ```bash
24
+ git clone https://github.com/DeepCritical/GradioDemo.git
25
+ cd GradioDemo
26
+ ```
27
+
28
+ ### 3. Install Dependencies
29
+
30
+ Using `uv` (recommended):
31
+
32
+ ```bash
33
+ uv sync
34
+ ```
35
+
36
+ Using `pip`:
37
+
38
+ ```bash
39
+ pip install -e .
40
+ ```
41
+
42
+ ### 4. Install Optional Dependencies
43
+
44
+ For embeddings support (local sentence-transformers):
45
+
46
+ ```bash
47
+ uv sync --extra embeddings
48
+ ```
49
+
50
+ For Modal sandbox execution:
51
+
52
+ ```bash
53
+ uv sync --extra modal
54
+ ```
55
+
56
+ For Magentic orchestration:
57
+
58
+ ```bash
59
+ uv sync --extra magentic
60
+ ```
61
+
62
+ Install all extras:
63
+
64
+ ```bash
65
+ uv sync --all-extras
66
+ ```
67
+
68
+ ### 5. Configure Environment Variables
69
+
70
+ Create a `.env` file in the project root:
71
+
72
+ ```bash
73
+ # Required: At least one LLM provider
74
+ LLM_PROVIDER=openai # or "anthropic" or "huggingface"
75
+ OPENAI_API_KEY=your_openai_api_key_here
76
+
77
+ # Optional: Other services
78
+ NCBI_API_KEY=your_ncbi_api_key_here # For higher PubMed rate limits
79
+ MODAL_TOKEN_ID=your_modal_token_id
80
+ MODAL_TOKEN_SECRET=your_modal_token_secret
81
+ ```
82
+
83
+ See the [Configuration Guide](../configuration/index.md) for all available options.
84
+
85
+ ### 6. Verify Installation
86
+
87
+ Run the application:
88
+
89
+ ```bash
90
+ uv run gradio run src/app.py
91
+ ```
92
+
93
+ Open your browser to `http://localhost:7860` to verify the installation.
94
+
95
+ ## Development Setup
96
+
97
+ For development, install dev dependencies:
98
+
99
+ ```bash
100
+ uv sync --all-extras --dev
101
+ ```
102
+
103
+ Install pre-commit hooks:
104
+
105
+ ```bash
106
+ uv run pre-commit install
107
+ ```
108
+
109
+ ## Troubleshooting
110
+
111
+ ### Common Issues
112
+
113
+ **Import Errors**:
114
+ - Ensure you've installed all required dependencies
115
+ - Check that Python 3.11+ is being used
116
+
117
+ **API Key Errors**:
118
+ - Verify your `.env` file is in the project root
119
+ - Check that API keys are correctly formatted
120
+ - Ensure at least one LLM provider is configured
121
+
122
+ **Module Not Found**:
123
+ - Run `uv sync` or `pip install -e .` again
124
+ - Check that you're in the correct virtual environment
125
+
126
+ **Port Already in Use**:
127
+ - Change the port in `src/app.py` or use environment variable
128
+ - Kill the process using port 7860
129
+
130
+ ## Next Steps
131
+
132
+ - Read the [Quick Start Guide](quick-start.md)
133
+ - Learn about [MCP Integration](mcp-integration.md)
134
+ - Explore [Examples](examples.md)
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
docs/getting-started/mcp-integration.md ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MCP Integration
2
+
3
+ DeepCritical exposes a Model Context Protocol (MCP) server, allowing you to use its search tools directly from Claude Desktop or other MCP clients.
4
+
5
+ ## What is MCP?
6
+
7
+ The Model Context Protocol (MCP) is a standard for connecting AI assistants to external tools and data sources. DeepCritical implements an MCP server that exposes its search capabilities as MCP tools.
8
+
9
+ ## MCP Server URL
10
+
11
+ When running locally:
12
+
13
+ ```
14
+ http://localhost:7860/gradio_api/mcp/
15
+ ```
16
+
17
+ ## Claude Desktop Configuration
18
+
19
+ ### 1. Locate Configuration File
20
+
21
+ **macOS**:
22
+ ```
23
+ ~/Library/Application Support/Claude/claude_desktop_config.json
24
+ ```
25
+
26
+ **Windows**:
27
+ ```
28
+ %APPDATA%\Claude\claude_desktop_config.json
29
+ ```
30
+
31
+ **Linux**:
32
+ ```
33
+ ~/.config/Claude/claude_desktop_config.json
34
+ ```
35
+
36
+ ### 2. Add DeepCritical Server
37
+
38
+ Edit `claude_desktop_config.json` and add:
39
+
40
+ ```json
41
+ {
42
+ "mcpServers": {
43
+ "deepcritical": {
44
+ "url": "http://localhost:7860/gradio_api/mcp/"
45
+ }
46
+ }
47
+ }
48
+ ```
49
+
50
+ ### 3. Restart Claude Desktop
51
+
52
+ Close and restart Claude Desktop for changes to take effect.
53
+
54
+ ### 4. Verify Connection
55
+
56
+ In Claude Desktop, you should see DeepCritical tools available:
57
+ - `search_pubmed`
58
+ - `search_clinical_trials`
59
+ - `search_biorxiv`
60
+ - `search_all`
61
+ - `analyze_hypothesis`
62
+
63
+ ## Available Tools
64
+
65
+ ### search_pubmed
66
+
67
+ Search peer-reviewed biomedical literature from PubMed.
68
+
69
+ **Parameters**:
70
+ - `query` (string): Search query
71
+ - `max_results` (integer, optional): Maximum number of results (default: 10)
72
+
73
+ **Example**:
74
+ ```
75
+ Search PubMed for "metformin diabetes"
76
+ ```
77
+
78
+ ### search_clinical_trials
79
+
80
+ Search ClinicalTrials.gov for interventional studies.
81
+
82
+ **Parameters**:
83
+ - `query` (string): Search query
84
+ - `max_results` (integer, optional): Maximum number of results (default: 10)
85
+
86
+ **Example**:
87
+ ```
88
+ Search clinical trials for "Alzheimer's disease treatment"
89
+ ```
90
+
91
+ ### search_biorxiv
92
+
93
+ Search bioRxiv/medRxiv preprints via Europe PMC.
94
+
95
+ **Parameters**:
96
+ - `query` (string): Search query
97
+ - `max_results` (integer, optional): Maximum number of results (default: 10)
98
+
99
+ **Example**:
100
+ ```
101
+ Search bioRxiv for "CRISPR gene editing"
102
+ ```
103
+
104
+ ### search_all
105
+
106
+ Search all sources simultaneously (PubMed, ClinicalTrials.gov, Europe PMC).
107
+
108
+ **Parameters**:
109
+ - `query` (string): Search query
110
+ - `max_results` (integer, optional): Maximum number of results per source (default: 10)
111
+
112
+ **Example**:
113
+ ```
114
+ Search all sources for "COVID-19 vaccine efficacy"
115
+ ```
116
+
117
+ ### analyze_hypothesis
118
+
119
+ Perform secure statistical analysis using Modal sandboxes.
120
+
121
+ **Parameters**:
122
+ - `hypothesis` (string): Hypothesis to analyze
123
+ - `data` (string, optional): Data description or code
124
+
125
+ **Example**:
126
+ ```
127
+ Analyze the hypothesis that metformin reduces cancer risk
128
+ ```
129
+
130
+ ## Using Tools in Claude Desktop
131
+
132
+ Once configured, you can ask Claude to use DeepCritical tools:
133
+
134
+ ```
135
+ Use DeepCritical to search PubMed for recent papers on Alzheimer's disease treatments.
136
+ ```
137
+
138
+ Claude will automatically:
139
+ 1. Call the appropriate DeepCritical tool
140
+ 2. Retrieve results
141
+ 3. Use the results in its response
142
+
143
+ ## Troubleshooting
144
+
145
+ ### Connection Issues
146
+
147
+ **Server Not Found**:
148
+ - Ensure DeepCritical is running (`uv run gradio run src/app.py`)
149
+ - Verify the URL in `claude_desktop_config.json` is correct
150
+ - Check that port 7860 is not blocked by firewall
151
+
152
+ **Tools Not Appearing**:
153
+ - Restart Claude Desktop after configuration changes
154
+ - Check Claude Desktop logs for errors
155
+ - Verify MCP server is accessible at the configured URL
156
+
157
+ ### Authentication
158
+
159
+ If DeepCritical requires authentication:
160
+ - Configure API keys in DeepCritical settings
161
+ - Use HuggingFace OAuth login
162
+ - Ensure API keys are valid
163
+
164
+ ## Advanced Configuration
165
+
166
+ ### Custom Port
167
+
168
+ If running on a different port, update the URL:
169
+
170
+ ```json
171
+ {
172
+ "mcpServers": {
173
+ "deepcritical": {
174
+ "url": "http://localhost:8080/gradio_api/mcp/"
175
+ }
176
+ }
177
+ }
178
+ ```
179
+
180
+ ### Multiple Instances
181
+
182
+ You can configure multiple DeepCritical instances:
183
+
184
+ ```json
185
+ {
186
+ "mcpServers": {
187
+ "deepcritical-local": {
188
+ "url": "http://localhost:7860/gradio_api/mcp/"
189
+ },
190
+ "deepcritical-remote": {
191
+ "url": "https://your-server.com/gradio_api/mcp/"
192
+ }
193
+ }
194
+ }
195
+ ```
196
+
197
+ ## Next Steps
198
+
199
+ - Learn about [Configuration](../configuration/index.md) for advanced settings
200
+ - Explore [Examples](examples.md) for use cases
201
+ - Read the [Architecture Documentation](../architecture/graph-orchestration.md)
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+
210
+
211
+
docs/getting-started/quick-start.md ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Quick Start Guide
2
+
3
+ Get up and running with DeepCritical in minutes.
4
+
5
+ ## Start the Application
6
+
7
+ ```bash
8
+ uv run gradio run src/app.py
9
+ ```
10
+
11
+ Open your browser to `http://localhost:7860`.
12
+
13
+ ## First Research Query
14
+
15
+ 1. **Enter a Research Question**
16
+
17
+ Type your research question in the chat interface, for example:
18
+ - "What are the latest treatments for Alzheimer's disease?"
19
+ - "Review the evidence for metformin in cancer prevention"
20
+ - "What clinical trials are investigating COVID-19 vaccines?"
21
+
22
+ 2. **Submit the Query**
23
+
24
+ Click "Submit" or press Enter. The system will:
25
+ - Generate observations about your query
26
+ - Identify knowledge gaps
27
+ - Search multiple sources (PubMed, ClinicalTrials.gov, Europe PMC)
28
+ - Evaluate evidence quality
29
+ - Synthesize findings into a report
30
+
31
+ 3. **Review Results**
32
+
33
+ Watch the real-time progress in the chat interface:
34
+ - Search operations and results
35
+ - Evidence evaluation
36
+ - Report generation
37
+ - Final research report with citations
38
+
39
+ ## Authentication
40
+
41
+ ### HuggingFace OAuth (Recommended)
42
+
43
+ 1. Click "Sign in with HuggingFace" at the top of the app
44
+ 2. Authorize the application
45
+ 3. Your HuggingFace API token will be automatically used
46
+ 4. No need to manually enter API keys
47
+
48
+ ### Manual API Key
49
+
50
+ 1. Open the Settings accordion
51
+ 2. Enter your API key:
52
+ - OpenAI API key
53
+ - Anthropic API key
54
+ - HuggingFace API key
55
+ 3. Click "Save Settings"
56
+ 4. Manual keys take priority over OAuth tokens
57
+
58
+ ## Understanding the Interface
59
+
60
+ ### Chat Interface
61
+
62
+ - **Input**: Enter your research questions here
63
+ - **Messages**: View conversation history and research progress
64
+ - **Streaming**: Real-time updates as research progresses
65
+
66
+ ### Status Indicators
67
+
68
+ - **Searching**: Active search operations
69
+ - **Evaluating**: Evidence quality assessment
70
+ - **Synthesizing**: Report generation
71
+ - **Complete**: Research finished
72
+
73
+ ### Settings
74
+
75
+ - **API Keys**: Configure LLM providers
76
+ - **Research Mode**: Choose iterative or deep research
77
+ - **Budget Limits**: Set token, time, and iteration limits
78
+
79
+ ## Example Queries
80
+
81
+ ### Simple Query
82
+
83
+ ```
84
+ What are the side effects of metformin?
85
+ ```
86
+
87
+ ### Complex Query
88
+
89
+ ```
90
+ Review the evidence for using metformin as an anti-aging intervention,
91
+ including clinical trials, mechanisms of action, and safety profile.
92
+ ```
93
+
94
+ ### Clinical Trial Query
95
+
96
+ ```
97
+ What are the active clinical trials investigating Alzheimer's disease treatments?
98
+ ```
99
+
100
+ ## Next Steps
101
+
102
+ - Learn about [MCP Integration](mcp-integration.md) to use DeepCritical from Claude Desktop
103
+ - Explore [Examples](examples.md) for more use cases
104
+ - Read the [Configuration Guide](../configuration/index.md) for advanced settings
105
+ - Check out the [Architecture Documentation](../architecture/graph-orchestration.md) to understand how it works
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
docs/index.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepCritical
2
+
3
+ **AI-Native Drug Repurposing Research Agent**
4
+
5
+ DeepCritical is a deep research agent system that uses iterative search-and-judge loops to comprehensively answer research questions. The system supports multiple orchestration patterns, graph-based execution, parallel research workflows, and long-running task management with real-time streaming.
6
+
7
+ ## Features
8
+
9
+ - **Multi-Source Search**: PubMed, ClinicalTrials.gov, Europe PMC (includes bioRxiv/medRxiv)
10
+ - **MCP Integration**: Use our tools from Claude Desktop or any MCP client
11
+ - **HuggingFace OAuth**: Sign in with your HuggingFace account to automatically use your API token
12
+ - **Modal Sandbox**: Secure execution of AI-generated statistical code
13
+ - **LlamaIndex RAG**: Semantic search and evidence synthesis
14
+ - **HuggingFace Inference**: Free tier support with automatic fallback
15
+ - **Strongly Typed Composable Graphs**: Graph-based orchestration with Pydantic AI
16
+ - **Specialized Research Teams of Agents**: Multi-agent coordination for complex research tasks
17
+
18
+ ## Quick Start
19
+
20
+ ```bash
21
+ # Install uv if you haven't already
22
+ pip install uv
23
+
24
+ # Sync dependencies
25
+ uv sync
26
+
27
+ # Start the Gradio app
28
+ uv run gradio run src/app.py
29
+ ```
30
+
31
+ Open your browser to `http://localhost:7860`.
32
+
33
+ For detailed installation and setup instructions, see the [Getting Started Guide](getting-started/installation.md).
34
+
35
+ ## Architecture
36
+
37
+ DeepCritical uses a Vertical Slice Architecture:
38
+
39
+ 1. **Search Slice**: Retrieving evidence from PubMed, ClinicalTrials.gov, and Europe PMC
40
+ 2. **Judge Slice**: Evaluating evidence quality using LLMs
41
+ 3. **Orchestrator Slice**: Managing the research loop and UI
42
+
43
+ The system supports three main research patterns:
44
+
45
+ - **Iterative Research**: Single research loop with search-judge-synthesize cycles
46
+ - **Deep Research**: Multi-section parallel research with planning and synthesis
47
+ - **Research Team**: Multi-agent coordination using Magentic framework
48
+
49
+ Learn more about the [Architecture](overview/architecture.md).
50
+
51
+ ## Documentation
52
+
53
+ - [Overview](overview/architecture.md) - System architecture and design
54
+ - [Getting Started](getting-started/installation.md) - Installation and setup
55
+ - [Configuration](configuration/index.md) - Configuration guide
56
+ - [API Reference](api/agents.md) - API documentation
57
+ - [Contributing](contributing.md) - Development guidelines
58
+
59
+ ## Links
60
+
61
+ - [GitHub Repository](https://github.com/DeepCritical/GradioDemo)
62
+ - [HuggingFace Space](https://huggingface.co/spaces/DataQuests/DeepCritical)
63
+
LICENSE.md → docs/license.md RENAMED
@@ -23,3 +23,13 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
  SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
23
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
  SOFTWARE.
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
docs/overview/architecture.md ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Architecture Overview
2
+
3
+ DeepCritical is a deep research agent system that uses iterative search-and-judge loops to comprehensively answer research questions. The system supports multiple orchestration patterns, graph-based execution, parallel research workflows, and long-running task management with real-time streaming.
4
+
5
+ ## Core Architecture
6
+
7
+ ### Orchestration Patterns
8
+
9
+ 1. **Graph Orchestrator** (`src/orchestrator/graph_orchestrator.py`):
10
+ - Graph-based execution using Pydantic AI agents as nodes
11
+ - Supports both iterative and deep research patterns
12
+ - Node types: Agent, State, Decision, Parallel
13
+ - Edge types: Sequential, Conditional, Parallel
14
+ - Conditional routing based on knowledge gaps, budget, and iterations
15
+ - Parallel execution for concurrent research loops
16
+ - Event streaming via `AsyncGenerator[AgentEvent]` for real-time UI updates
17
+ - Fallback to agent chains when graph execution is disabled
18
+
19
+ 2. **Deep Research Flow** (`src/orchestrator/research_flow.py`):
20
+ - **Pattern**: Planner → Parallel Iterative Loops (one per section) → Synthesis
21
+ - Uses `PlannerAgent` to break query into report sections
22
+ - Runs `IterativeResearchFlow` instances in parallel per section via `WorkflowManager`
23
+ - Synthesizes results using `LongWriterAgent` or `ProofreaderAgent`
24
+ - Supports both graph execution (`use_graph=True`) and agent chains (`use_graph=False`)
25
+ - Budget tracking per section and globally
26
+ - State synchronization across parallel loops
27
+
28
+ 3. **Iterative Research Flow** (`src/orchestrator/research_flow.py`):
29
+ - **Pattern**: Generate observations → Evaluate gaps → Select tools → Execute → Judge → Continue/Complete
30
+ - Uses `KnowledgeGapAgent`, `ToolSelectorAgent`, `ThinkingAgent`, `WriterAgent`
31
+ - `JudgeHandler` assesses evidence sufficiency
32
+ - Iterates until research complete or constraints met (iterations, time, tokens)
33
+ - Supports graph execution and agent chains
34
+
35
+ 4. **Magentic Orchestrator** (`src/orchestrator_magentic.py`):
36
+ - Multi-agent coordination using `agent-framework-core`
37
+ - ChatAgent pattern with internal LLMs per agent
38
+ - Uses `MagenticBuilder` with participants: searcher, hypothesizer, judge, reporter
39
+ - Manager orchestrates agents via `OpenAIChatClient`
40
+ - Requires OpenAI API key (function calling support)
41
+ - Event-driven: converts Magentic events to `AgentEvent` for UI streaming
42
+ - Supports long-running workflows with max rounds and stall/reset handling
43
+
44
+ 5. **Hierarchical Orchestrator** (`src/orchestrator_hierarchical.py`):
45
+ - Uses `SubIterationMiddleware` with `ResearchTeam` and `LLMSubIterationJudge`
46
+ - Adapts Magentic ChatAgent to `SubIterationTeam` protocol
47
+ - Event-driven via `asyncio.Queue` for coordination
48
+ - Supports sub-iteration patterns for complex research tasks
49
+
50
+ 6. **Legacy Simple Mode** (`src/legacy_orchestrator.py`):
51
+ - Linear search-judge-synthesize loop
52
+ - Uses `SearchHandlerProtocol` and `JudgeHandlerProtocol`
53
+ - Generator-based design yielding `AgentEvent` objects
54
+ - Backward compatibility for simple use cases
55
+
56
+ ## Long-Running Task Support
57
+
58
+ The system is designed for long-running research tasks with comprehensive state management and streaming:
59
+
60
+ 1. **Event Streaming**:
61
+ - All orchestrators yield `AgentEvent` objects via `AsyncGenerator`
62
+ - Real-time UI updates through Gradio chat interface
63
+ - Event types: `started`, `searching`, `search_complete`, `judging`, `judge_complete`, `looping`, `synthesizing`, `hypothesizing`, `complete`, `error`
64
+ - Metadata includes iteration numbers, tool names, result counts, durations
65
+
66
+ 2. **Budget Tracking** (`src/middleware/budget_tracker.py`):
67
+ - Per-loop and global budget management
68
+ - Tracks: tokens, time (seconds), iterations
69
+ - Budget enforcement at decision nodes
70
+ - Token estimation (~4 chars per token)
71
+ - Early termination when budgets exceeded
72
+ - Budget summaries for monitoring
73
+
74
+ 3. **Workflow Manager** (`src/middleware/workflow_manager.py`):
75
+ - Coordinates parallel research loops
76
+ - Tracks loop status: `pending`, `running`, `completed`, `failed`, `cancelled`
77
+ - Synchronizes evidence between loops and global state
78
+ - Handles errors per loop (doesn't fail all if one fails)
79
+ - Supports loop cancellation and timeout handling
80
+ - Evidence deduplication across parallel loops
81
+
82
+ 4. **State Management** (`src/middleware/state_machine.py`):
83
+ - Thread-safe isolation using `ContextVar` for concurrent requests
84
+ - `WorkflowState` tracks: evidence, conversation history, embedding service
85
+ - Evidence deduplication by URL
86
+ - Semantic search via embedding service
87
+ - State persistence across long-running workflows
88
+ - Supports both iterative and deep research patterns
89
+
90
+ 5. **Gradio UI** (`src/app.py`):
91
+ - Real-time streaming of research progress
92
+ - Accordion-based UI for pending/done operations
93
+ - OAuth integration (HuggingFace)
94
+ - Multiple backend support (API keys, free tier)
95
+ - Handles long-running tasks with progress indicators
96
+ - Event accumulation for pending operations
97
+
98
+ ## Graph Architecture
99
+
100
+ The graph orchestrator (`src/orchestrator/graph_orchestrator.py`) implements a flexible graph-based execution model:
101
+
102
+ **Node Types**:
103
+
104
+ - **Agent Nodes**: Execute Pydantic AI agents (e.g., `KnowledgeGapAgent`, `ToolSelectorAgent`)
105
+ - **State Nodes**: Update or read workflow state (evidence, conversation)
106
+ - **Decision Nodes**: Make routing decisions (research complete?, budget exceeded?)
107
+ - **Parallel Nodes**: Execute multiple nodes concurrently (parallel research loops)
108
+
109
+ **Edge Types**:
110
+
111
+ - **Sequential Edges**: Always traversed (no condition)
112
+ - **Conditional Edges**: Traversed based on condition (e.g., if research complete → writer, else → tool selector)
113
+ - **Parallel Edges**: Used for parallel execution branches
114
+
115
+ **Graph Patterns**:
116
+
117
+ - **Iterative Graph**: `[Input] → [Thinking] → [Knowledge Gap] → [Decision: Complete?] → [Tool Selector] or [Writer]`
118
+ - **Deep Research Graph**: `[Input] → [Planner] → [Parallel Iterative Loops] → [Synthesizer]`
119
+
120
+ **Execution Flow**:
121
+
122
+ 1. Graph construction from nodes and edges
123
+ 2. Graph validation (no cycles, all nodes reachable)
124
+ 3. Graph execution from entry node
125
+ 4. Node execution based on type
126
+ 5. Edge evaluation for next node(s)
127
+ 6. Parallel execution via `asyncio.gather()`
128
+ 7. State updates at state nodes
129
+ 8. Event streaming for UI
130
+
131
+ ## Key Components
132
+
133
+ - **Orchestrators**: Multiple orchestration patterns (`src/orchestrator/`, `src/orchestrator_*.py`)
134
+ - **Research Flows**: Iterative and deep research patterns (`src/orchestrator/research_flow.py`)
135
+ - **Graph Builder**: Graph construction utilities (`src/agent_factory/graph_builder.py`)
136
+ - **Agents**: Pydantic AI agents (`src/agents/`, `src/agent_factory/agents.py`)
137
+ - **Search Tools**: PubMed, ClinicalTrials.gov, Europe PMC, RAG (`src/tools/`)
138
+ - **Judge Handler**: LLM-based evidence assessment (`src/agent_factory/judges.py`)
139
+ - **Embeddings**: Semantic search & deduplication (`src/services/embeddings.py`)
140
+ - **Statistical Analyzer**: Modal sandbox execution (`src/services/statistical_analyzer.py`)
141
+ - **Middleware**: State management, budget tracking, workflow coordination (`src/middleware/`)
142
+ - **MCP Tools**: Claude Desktop integration (`src/mcp_tools.py`)
143
+ - **Gradio UI**: Web interface with MCP server and streaming (`src/app.py`)
144
+
145
+ ## Research Team & Parallel Execution
146
+
147
+ The system supports complex research workflows through:
148
+
149
+ 1. **WorkflowManager**: Coordinates multiple parallel research loops
150
+ - Creates and tracks `ResearchLoop` instances
151
+ - Runs loops in parallel via `asyncio.gather()`
152
+ - Synchronizes evidence to global state
153
+ - Handles loop failures gracefully
154
+
155
+ 2. **Deep Research Pattern**: Breaks complex queries into sections
156
+ - Planner creates report outline with sections
157
+ - Each section runs as independent iterative research loop
158
+ - Loops execute in parallel
159
+ - Evidence shared across loops via global state
160
+ - Final synthesis combines all section results
161
+
162
+ 3. **State Synchronization**: Thread-safe evidence sharing
163
+ - Evidence deduplication by URL
164
+ - Global state accessible to all loops
165
+ - Semantic search across all collected evidence
166
+ - Conversation history tracking per iteration
167
+
168
+ ## Configuration & Modes
169
+
170
+ - **Orchestrator Factory** (`src/orchestrator_factory.py`):
171
+ - Auto-detects mode: "advanced" if OpenAI key available, else "simple"
172
+ - Supports explicit mode selection: "simple", "magentic", "advanced"
173
+ - Lazy imports for optional dependencies
174
+
175
+ - **Research Modes**:
176
+ - `iterative`: Single research loop
177
+ - `deep`: Multi-section parallel research
178
+ - `auto`: Auto-detect based on query complexity
179
+
180
+ - **Execution Modes**:
181
+ - `use_graph=True`: Graph-based execution (parallel, conditional routing)
182
+ - `use_graph=False`: Agent chains (sequential, backward compatible)
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
docs/overview/features.md ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Features
2
+
3
+ DeepCritical provides a comprehensive set of features for AI-assisted research:
4
+
5
+ ## Core Features
6
+
7
+ ### Multi-Source Search
8
+
9
+ - **PubMed**: Search peer-reviewed biomedical literature via NCBI E-utilities
10
+ - **ClinicalTrials.gov**: Search interventional clinical trials
11
+ - **Europe PMC**: Search preprints and peer-reviewed articles (includes bioRxiv/medRxiv)
12
+ - **RAG**: Semantic search within collected evidence using LlamaIndex
13
+
14
+ ### MCP Integration
15
+
16
+ - **Model Context Protocol**: Expose search tools via MCP server
17
+ - **Claude Desktop**: Use DeepCritical tools directly from Claude Desktop
18
+ - **MCP Clients**: Compatible with any MCP-compatible client
19
+
20
+ ### Authentication
21
+
22
+ - **HuggingFace OAuth**: Sign in with HuggingFace account for automatic API token usage
23
+ - **Manual API Keys**: Support for OpenAI, Anthropic, and HuggingFace API keys
24
+ - **Free Tier Support**: Automatic fallback to HuggingFace Inference API
25
+
26
+ ### Secure Code Execution
27
+
28
+ - **Modal Sandbox**: Secure execution of AI-generated statistical code
29
+ - **Isolated Environment**: Network isolation and package version pinning
30
+ - **Safe Execution**: Prevents malicious code execution
31
+
32
+ ### Semantic Search & RAG
33
+
34
+ - **LlamaIndex Integration**: Advanced RAG capabilities
35
+ - **Vector Storage**: ChromaDB for embedding storage
36
+ - **Semantic Deduplication**: Automatic detection of similar evidence
37
+ - **Embedding Service**: Local sentence-transformers (no API key required)
38
+
39
+ ### Orchestration Patterns
40
+
41
+ - **Graph-Based Execution**: Flexible graph orchestration with conditional routing
42
+ - **Parallel Research Loops**: Run multiple research tasks concurrently
43
+ - **Iterative Research**: Single-loop research with search-judge-synthesize cycles
44
+ - **Deep Research**: Multi-section parallel research with planning and synthesis
45
+ - **Magentic Orchestration**: Multi-agent coordination using Microsoft Agent Framework
46
+
47
+ ### Real-Time Streaming
48
+
49
+ - **Event Streaming**: Real-time updates via `AsyncGenerator[AgentEvent]`
50
+ - **Progress Tracking**: Monitor research progress with detailed event metadata
51
+ - **UI Integration**: Seamless integration with Gradio chat interface
52
+
53
+ ### Budget Management
54
+
55
+ - **Token Budget**: Track and limit LLM token usage
56
+ - **Time Budget**: Enforce time limits per research loop
57
+ - **Iteration Budget**: Limit maximum iterations
58
+ - **Per-Loop Budgets**: Independent budgets for parallel research loops
59
+
60
+ ### State Management
61
+
62
+ - **Thread-Safe Isolation**: ContextVar-based state management
63
+ - **Evidence Deduplication**: Automatic URL-based deduplication
64
+ - **Conversation History**: Track iteration history and agent interactions
65
+ - **State Synchronization**: Share evidence across parallel loops
66
+
67
+ ## Advanced Features
68
+
69
+ ### Agent System
70
+
71
+ - **Pydantic AI Agents**: Type-safe agent implementation
72
+ - **Structured Output**: Pydantic models for agent responses
73
+ - **Agent Factory**: Centralized agent creation with fallback support
74
+ - **Specialized Agents**: Knowledge gap, tool selector, writer, proofreader, and more
75
+
76
+ ### Search Tools
77
+
78
+ - **Rate Limiting**: Built-in rate limiting for external APIs
79
+ - **Retry Logic**: Automatic retry with exponential backoff
80
+ - **Query Preprocessing**: Automatic query enhancement and synonym expansion
81
+ - **Evidence Conversion**: Automatic conversion to structured Evidence objects
82
+
83
+ ### Error Handling
84
+
85
+ - **Custom Exceptions**: Hierarchical exception system
86
+ - **Error Chaining**: Preserve exception context
87
+ - **Structured Logging**: Comprehensive logging with structlog
88
+ - **Graceful Degradation**: Fallback handlers for missing dependencies
89
+
90
+ ### Configuration
91
+
92
+ - **Pydantic Settings**: Type-safe configuration management
93
+ - **Environment Variables**: Support for `.env` files
94
+ - **Validation**: Automatic configuration validation
95
+ - **Flexible Providers**: Support for multiple LLM and embedding providers
96
+
97
+ ### Testing
98
+
99
+ - **Unit Tests**: Comprehensive unit test coverage
100
+ - **Integration Tests**: Real API integration tests
101
+ - **Mock Support**: Extensive mocking utilities
102
+ - **Coverage Reports**: Code coverage tracking
103
+
104
+ ## UI Features
105
+
106
+ ### Gradio Interface
107
+
108
+ - **Real-Time Chat**: Interactive chat interface
109
+ - **Streaming Updates**: Live progress updates
110
+ - **Accordion UI**: Organized display of pending/done operations
111
+ - **OAuth Integration**: Seamless HuggingFace authentication
112
+
113
+ ### MCP Server
114
+
115
+ - **RESTful API**: HTTP-based MCP server
116
+ - **Tool Discovery**: Automatic tool registration
117
+ - **Request Handling**: Async request processing
118
+ - **Error Responses**: Structured error responses
119
+
120
+ ## Development Features
121
+
122
+ ### Code Quality
123
+
124
+ - **Type Safety**: Full type hints with mypy strict mode
125
+ - **Linting**: Ruff for code quality
126
+ - **Formatting**: Automatic code formatting
127
+ - **Pre-commit Hooks**: Automated quality checks
128
+
129
+ ### Documentation
130
+
131
+ - **Comprehensive Docs**: Detailed documentation for all components
132
+ - **Code Examples**: Extensive code examples
133
+ - **Architecture Diagrams**: Visual architecture documentation
134
+ - **API Reference**: Complete API documentation
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
docs/overview/quick-start.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Quick Start
2
+
3
+ Get started with DeepCritical in minutes.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ # Install uv if you haven't already
9
+ pip install uv
10
+
11
+ # Sync dependencies
12
+ uv sync
13
+ ```
14
+
15
+ ## Run the UI
16
+
17
+ ```bash
18
+ # Start the Gradio app
19
+ uv run gradio run src/app.py
20
+ ```
21
+
22
+ Open your browser to `http://localhost:7860`.
23
+
24
+ ## Basic Usage
25
+
26
+ ### 1. Authentication (Optional)
27
+
28
+ **HuggingFace OAuth Login**:
29
+ - Click the "Sign in with HuggingFace" button at the top of the app
30
+ - Your HuggingFace API token will be automatically used for AI inference
31
+ - No need to manually enter API keys when logged in
32
+
33
+ **Manual API Key (BYOK)**:
34
+ - Provide your own API key in the Settings accordion
35
+ - Supports HuggingFace, OpenAI, or Anthropic API keys
36
+ - Manual keys take priority over OAuth tokens
37
+
38
+ ### 2. Start a Research Query
39
+
40
+ 1. Enter your research question in the chat interface
41
+ 2. Click "Submit" or press Enter
42
+ 3. Watch the real-time progress as the system:
43
+ - Generates observations
44
+ - Identifies knowledge gaps
45
+ - Searches multiple sources
46
+ - Evaluates evidence
47
+ - Synthesizes findings
48
+ 4. Review the final research report
49
+
50
+ ### 3. MCP Integration (Optional)
51
+
52
+ Connect DeepCritical to Claude Desktop:
53
+
54
+ 1. Add to your `claude_desktop_config.json`:
55
+ ```json
56
+ {
57
+ "mcpServers": {
58
+ "deepcritical": {
59
+ "url": "http://localhost:7860/gradio_api/mcp/"
60
+ }
61
+ }
62
+ }
63
+ ```
64
+
65
+ 2. Restart Claude Desktop
66
+ 3. Use DeepCritical tools directly from Claude Desktop
67
+
68
+ ## Available Tools
69
+
70
+ - `search_pubmed`: Search peer-reviewed biomedical literature
71
+ - `search_clinical_trials`: Search ClinicalTrials.gov
72
+ - `search_biorxiv`: Search bioRxiv/medRxiv preprints
73
+ - `search_all`: Search all sources simultaneously
74
+ - `analyze_hypothesis`: Secure statistical analysis using Modal sandboxes
75
+
76
+ ## Next Steps
77
+
78
+ - Read the [Installation Guide](../getting-started/installation.md) for detailed setup
79
+ - Learn about [Configuration](../configuration/index.md)
80
+ - Explore the [Architecture](../architecture/graph-orchestration.md)
81
+ - Check out [Examples](../getting-started/examples.md)
82
+
docs/team.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Team
2
+
3
+ DeepCritical is developed by a team of researchers and developers working on AI-assisted research.
4
+
5
+ ## Team Members
6
+
7
+ ### The-Obstacle-Is-The-Way
8
+
9
+ - GitHub: [The-Obstacle-Is-The-Way](https://github.com/The-Obstacle-Is-The-Way)
10
+
11
+ ### MarioAderman
12
+
13
+ - GitHub: [MarioAderman](https://github.com/MarioAderman)
14
+
15
+ ### Josephrp
16
+
17
+ - GitHub: [Josephrp](https://github.com/Josephrp)
18
+
19
+ ## About
20
+
21
+ The DeepCritical team met online in the Alzheimer's Critical Literature Review Group in the Hugging Science initiative. We're building the agent framework we want to use for AI-assisted research to turn the vast amounts of clinical data into cures.
22
+
23
+ ## Contributing
24
+
25
+ We welcome contributions! See the [Contributing Guide](contributing/index.md) for details.
26
+
27
+ ## Links
28
+
29
+ - [GitHub Repository](https://github.com/DeepCritical/GradioDemo)
30
+ - [HuggingFace Space](https://huggingface.co/spaces/DataQuests/DeepCritical)
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
examples/README.md ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepCritical Examples
2
+
3
+ **NO MOCKS. NO FAKE DATA. REAL SCIENCE.**
4
+
5
+ These demos run the REAL drug repurposing research pipeline with actual API calls.
6
+
7
+ ---
8
+
9
+ ## Prerequisites
10
+
11
+ You MUST have API keys configured:
12
+
13
+ ```bash
14
+ # Copy the example and add your keys
15
+ cp .env.example .env
16
+
17
+ # Required (pick one):
18
+ OPENAI_API_KEY=sk-...
19
+ ANTHROPIC_API_KEY=sk-ant-...
20
+
21
+ # Optional (higher PubMed rate limits):
22
+ NCBI_API_KEY=your-key
23
+ ```
24
+
25
+ ---
26
+
27
+ ## Examples
28
+
29
+ ### 1. Search Demo (No LLM Required)
30
+
31
+ Demonstrates REAL parallel search across PubMed, ClinicalTrials.gov, and Europe PMC.
32
+
33
+ ```bash
34
+ uv run python examples/search_demo/run_search.py "metformin cancer"
35
+ ```
36
+
37
+ **What's REAL:**
38
+ - Actual NCBI E-utilities API calls (PubMed)
39
+ - Actual ClinicalTrials.gov API calls
40
+ - Actual Europe PMC API calls (includes preprints)
41
+ - Real papers, real trials, real preprints
42
+
43
+ ---
44
+
45
+ ### 2. Embeddings Demo (No LLM Required)
46
+
47
+ Demonstrates REAL semantic search and deduplication.
48
+
49
+ ```bash
50
+ uv run python examples/embeddings_demo/run_embeddings.py
51
+ ```
52
+
53
+ **What's REAL:**
54
+ - Actual sentence-transformers model (all-MiniLM-L6-v2)
55
+ - Actual ChromaDB vector storage
56
+ - Real cosine similarity computations
57
+ - Real semantic deduplication
58
+
59
+ ---
60
+
61
+ ### 3. Orchestrator Demo (LLM Required)
62
+
63
+ Demonstrates the REAL search-judge-synthesize loop.
64
+
65
+ ```bash
66
+ uv run python examples/orchestrator_demo/run_agent.py "metformin cancer"
67
+ uv run python examples/orchestrator_demo/run_agent.py "aspirin alzheimer" --iterations 5
68
+ ```
69
+
70
+ **What's REAL:**
71
+ - Real PubMed + ClinicalTrials + Europe PMC searches
72
+ - Real LLM judge evaluating evidence quality
73
+ - Real iterative refinement based on LLM decisions
74
+ - Real research synthesis
75
+
76
+ ---
77
+
78
+ ### 4. Magentic Demo (OpenAI Required)
79
+
80
+ Demonstrates REAL multi-agent coordination using Microsoft Agent Framework.
81
+
82
+ ```bash
83
+ # Requires OPENAI_API_KEY specifically
84
+ uv run python examples/orchestrator_demo/run_magentic.py "metformin cancer"
85
+ ```
86
+
87
+ **What's REAL:**
88
+ - Real MagenticBuilder orchestration
89
+ - Real SearchAgent, JudgeAgent, HypothesisAgent, ReportAgent
90
+ - Real manager-based coordination
91
+
92
+ ---
93
+
94
+ ### 5. Hypothesis Demo (LLM Required)
95
+
96
+ Demonstrates REAL mechanistic hypothesis generation.
97
+
98
+ ```bash
99
+ uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
100
+ uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
101
+ ```
102
+
103
+ **What's REAL:**
104
+ - Real PubMed + Web search first
105
+ - Real embedding-based deduplication
106
+ - Real LLM generating Drug -> Target -> Pathway -> Effect chains
107
+ - Real knowledge gap identification
108
+
109
+ ---
110
+
111
+ ### 6. Full-Stack Demo (LLM Required)
112
+
113
+ **THE COMPLETE PIPELINE** - All phases working together.
114
+
115
+ ```bash
116
+ uv run python examples/full_stack_demo/run_full.py "metformin Alzheimer's"
117
+ uv run python examples/full_stack_demo/run_full.py "sildenafil heart failure" -i 3
118
+ ```
119
+
120
+ **What's REAL:**
121
+ 1. Real PubMed + ClinicalTrials + Europe PMC evidence collection
122
+ 2. Real embedding-based semantic deduplication
123
+ 3. Real LLM mechanistic hypothesis generation
124
+ 4. Real LLM evidence quality assessment
125
+ 5. Real LLM structured scientific report generation
126
+
127
+ Output: Publication-quality research report with validated citations.
128
+
129
+ ---
130
+
131
+ ## API Key Requirements
132
+
133
+ | Example | LLM Required | Keys |
134
+ |---------|--------------|------|
135
+ | search_demo | No | Optional: `NCBI_API_KEY` |
136
+ | embeddings_demo | No | None |
137
+ | orchestrator_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
138
+ | run_magentic | Yes | `OPENAI_API_KEY` (Magentic requires OpenAI) |
139
+ | hypothesis_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
140
+ | full_stack_demo | Yes | `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` |
141
+
142
+ ---
143
+
144
+ ## Architecture
145
+
146
+ ```text
147
+ User Query
148
+ |
149
+ v
150
+ [REAL Search] --> PubMed + ClinicalTrials + Europe PMC APIs
151
+ |
152
+ v
153
+ [REAL Embeddings] --> Actual sentence-transformers
154
+ |
155
+ v
156
+ [REAL Hypothesis] --> Actual LLM reasoning
157
+ |
158
+ v
159
+ [REAL Judge] --> Actual LLM assessment
160
+ |
161
+ +---> Need more? --> Loop back to Search
162
+ |
163
+ +---> Sufficient --> Continue
164
+ |
165
+ v
166
+ [REAL Report] --> Actual LLM synthesis
167
+ |
168
+ v
169
+ Publication-Quality Research Report
170
+ ```
171
+
172
+ ---
173
+
174
+ ## Why No Mocks?
175
+
176
+ > "Authenticity is the feature."
177
+
178
+ Mocks belong in `tests/unit/`, not in demos. When you run these examples, you see:
179
+ - Real papers from real databases
180
+ - Real AI reasoning about real evidence
181
+ - Real scientific hypotheses
182
+ - Real research reports
183
+
184
+ This is what DeepCritical actually does. No fake data. No canned responses.
examples/embeddings_demo/run_embeddings.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demo: Semantic Search & Deduplication (Phase 6).
4
+
5
+ This script demonstrates embedding-based capabilities using REAL data:
6
+ - Fetches REAL abstracts from PubMed
7
+ - Embeds text with sentence-transformers
8
+ - Performs semantic deduplication on LIVE research data
9
+
10
+ Usage:
11
+ uv run python examples/embeddings_demo/run_embeddings.py
12
+ """
13
+
14
+ import asyncio
15
+
16
+ from src.services.embeddings import EmbeddingService
17
+ from src.tools.pubmed import PubMedTool
18
+
19
+
20
+ def create_fresh_service(name_suffix: str = "") -> EmbeddingService:
21
+ """Create a fresh embedding service with unique collection name."""
22
+ import uuid
23
+
24
+ # Create service with unique collection by modifying the internal collection
25
+ service = EmbeddingService.__new__(EmbeddingService)
26
+ service._model = __import__("sentence_transformers").SentenceTransformer("all-MiniLM-L6-v2")
27
+ service._client = __import__("chromadb").Client()
28
+ collection_name = f"demo_{name_suffix}_{uuid.uuid4().hex[:8]}"
29
+ service._collection = service._client.create_collection(
30
+ name=collection_name, metadata={"hnsw:space": "cosine"}
31
+ )
32
+ return service
33
+
34
+
35
+ async def demo_real_pipeline() -> None:
36
+ """Run the demo using REAL PubMed data."""
37
+ print("\n" + "=" * 60)
38
+ print("DeepCritical Embeddings Demo (REAL DATA)")
39
+ print("=" * 60)
40
+
41
+ # 1. Fetch Real Data
42
+ query = "metformin mechanism of action"
43
+ print(f"\n[1] Fetching real papers for: '{query}'...")
44
+ pubmed = PubMedTool()
45
+ # Fetch enough results to likely get some overlap/redundancy
46
+ evidence = await pubmed.search(query, max_results=10)
47
+
48
+ print(f" Found {len(evidence)} papers.")
49
+ print("\n Sample Titles:")
50
+ for i, e in enumerate(evidence[:3], 1):
51
+ print(f" {i}. {e.citation.title[:80]}...")
52
+
53
+ # 2. Embed Data
54
+ print("\n[2] Embedding abstracts (sentence-transformers)...")
55
+ service = create_fresh_service("real_demo")
56
+
57
+ # 3. Semantic Search
58
+ print("\n[3] Semantic Search Demo")
59
+ print(" Indexing evidence...")
60
+ for e in evidence:
61
+ # Use URL as ID for uniqueness
62
+ await service.add_evidence(
63
+ evidence_id=e.citation.url,
64
+ content=e.content,
65
+ metadata={
66
+ "source": e.citation.source,
67
+ "title": e.citation.title,
68
+ "date": e.citation.date,
69
+ },
70
+ )
71
+
72
+ semantic_query = "activation of AMPK pathway"
73
+ print(f" Searching for concept: '{semantic_query}'")
74
+ results = await service.search_similar(semantic_query, n_results=2)
75
+
76
+ print(" Top matches:")
77
+ for i, r in enumerate(results, 1):
78
+ similarity = 1 - r["distance"]
79
+ print(f" {i}. [{similarity:.1%} match] {r['metadata']['title'][:70]}...")
80
+
81
+ # 4. Semantic Deduplication
82
+ print("\n[4] Semantic Deduplication Demo")
83
+ # Create a FRESH service for deduplication so we don't clash with Step 3's index
84
+ dedup_service = create_fresh_service("dedup_demo")
85
+
86
+ print(" Checking for redundant papers (threshold=0.85)...")
87
+
88
+ # To force a duplicate for demo purposes, let's double the evidence list
89
+ # simulating finding the same papers again or very similar ones
90
+ duplicated_evidence = evidence + evidence[:2]
91
+ print(f" Input pool: {len(duplicated_evidence)} items (with artificial duplicates added)")
92
+
93
+ unique = await dedup_service.deduplicate(duplicated_evidence, threshold=0.85)
94
+
95
+ print(f" Output pool: {len(unique)} unique items")
96
+ print(f" Removed {len(duplicated_evidence) - len(unique)} duplicates.")
97
+
98
+ print("\n" + "=" * 60)
99
+ print("Demo complete! Verified with REAL PubMed data.")
100
+ print("=" * 60 + "\n")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ asyncio.run(demo_real_pipeline())