""" Nuthatch MCP Server Wraps Nuthatch API v2.3.1 as reusable MCP tools Runs locally with FastMCP and supports STDIO transport Features: - 7 core tools for bird species reference data - Rich media: images (Unsplash) + audio (xeno-canto.org) - Taxonomic search and conservation status filtering - JSON responses for easy integration - Rate limiting and error handling Difference from eBird: - eBird: Real-time sightings, location-based hotspots - Nuthatch: Species reference, images, audio, taxonomy - Focus: "what is this bird?" vs. "Where can I see it?" """ import os import sys import requests import json import time from typing import Optional, Dict, List, Any from fastmcp import FastMCP from dotenv import load_dotenv # ============================================================================ # CONFIGURATION & SETUP # ============================================================================ load_dotenv() NUTHATCH_API_KEY = os.getenv("NUTHATCH_API_KEY") NUTHATCH_BASE_URL = os.getenv("NUTHATCH_BASE_URL", "https://nuthatch.lastelm.software/v2") DEFAULT_TIMEOUT = 15 # Rate limiting: 500 requests/hour = 7.2s safe, but 1s acceptable for demos # Demo sessions are bursty (5-10 requests in 30 seconds, then idle) # 1 second = 60 requests/minute max = 360/hour in worst case (still under 500) RATE_LIMIT_DELAY = 1.0 # Balance between responsiveness and API limits if not NUTHATCH_API_KEY: # Print to stderr to avoid corrupting STDIO MCP protocol (stdout must be JSON-RPC only) print("[WARNING]: NUTHATCH_API_KEY not found in .env", file=sys.stderr) print(" Get one from: https://nuthatch.lastelm.software/", file=sys.stderr) # Tool configuration - enable/disable as needed ENABLED_TOOLS = { "search_birds": True, "get_bird_info": True, "get_bird_images": True, "get_bird_audio": True, "search_by_family": True, "filter_by_status": True, "get_all_families": True, } # Authentication configuration for HTTP mode NUTHATCH_MCP_AUTH_KEY = os.getenv("NUTHATCH_MCP_AUTH_KEY") # Initialize FastMCP server with optional auth if NUTHATCH_MCP_AUTH_KEY: # HTTP mode with authentication from fastmcp.server.auth.providers.debug import DebugTokenVerifier auth = DebugTokenVerifier( validate=lambda token: token == NUTHATCH_MCP_AUTH_KEY, client_id="nuthatch-mcp-client" ) mcp = FastMCP("Nuthatch Bird Reference", auth=auth) else: # Development: No authentication mcp = FastMCP("Nuthatch Bird Reference") # Rate limiting tracker _last_request_time = 0 # ============================================================================ # HELPER FUNCTIONS # ============================================================================ def _rate_limit(): """Enforce rate limiting to avoid exceeding Nuthatch's API limits (500/hour)""" global _last_request_time elapsed = time.time() - _last_request_time if elapsed < RATE_LIMIT_DELAY: time.sleep(RATE_LIMIT_DELAY - elapsed) _last_request_time = time.time() def _make_request(endpoint: str, params: Optional[Dict] = None) -> Optional[Dict]: """ Centralized request handler with error handling and rate limiting. IMPORTANT: Header name is case-sensitive! "API-Key" Args: endpoint: API endpoint path (e.g., "/birds") params: Query parameters dictionary Returns: JSON response data or None on error """ _rate_limit() try: headers = {"API-Key": NUTHATCH_API_KEY} # Case-sensitive! url = f"{NUTHATCH_BASE_URL}{endpoint}" response = requests.get( url, headers=headers, params=params or {}, timeout=DEFAULT_TIMEOUT, ) if response.status_code == 200: return response.json() elif response.status_code == 400: print(f" Bad Request ({url}): {response.text[:400]}", flush=True) return None elif response.status_code == 401: print(f" Unauthorized ({url}): Check your NUTHATCH_API_KEY - body={response.text[:400]}", flush=True) return None elif response.status_code == 404: print(f" Not found ({url}): Invalid endpoint or resource - body={response.text[:400]}", flush=True) return None else: print( f" HTTP {response.status_code} for {url} " f"params={params or {}} body={response.text[:400]}", flush=True, ) return None except requests.Timeout: print(f" Request timeout after {DEFAULT_TIMEOUT}s for {endpoint}", flush=True) return None except requests.ConnectionError: print(f" Connection error calling {endpoint} - check network", flush=True) return None except Exception as e: print(f" Unexpected error calling {endpoint}: {str(e)}", flush=True) return None def _format_success_response(data: Any, **kwargs) -> str: """Format a successful response as JSON""" response = {"status": "success", "data": data} response.update(kwargs) return json.dumps(response) def _format_error_response(error: str) -> str: """Format an error response as JSON""" return json.dumps({"status": "error", "error": error}) # ============================================================================ # TOOL 1: search_birds # ============================================================================ # Use case: User asks "What cardinals exist?" or classifier returns "Northern Cardinal" # This tool provides multi-filter search across the species database def search_birds( name: str = "", family: str = "", region: str = "", status: str = "", has_images: bool = True, page_size: int = 10 ) -> str: """ Search for bird species using multiple filters. Comprehensive search tool that combines name, taxonomy, geography, and media filters. Great for exploratory queries like "show me all cardinals" or "endangered birds". Can accept: - User input: "cardinals", "eagles", "finches" - Classifier output: "Northern Cardinal" -> search for similar species - Taxonomic queries: family="Cardinalidae" Args: name: Common or scientific name (partial match) family: Scientific family name (e.g., "Cardinalidae", "Anatidae") region: Geographic region ("North America", "Western Europe") status: Conservation status ("Low Concern", "Endangered", etc.) has_images: Only returns birds with images (default: True) page_size: Maximum results to return (max: 100) Returns: JSON with matching birds and their basic info Example: search_birds(name="cardinal", has_images=True) -> Returns all cardinal species with images """ if not name and not family and not region and not status: return _format_error_response("At least one search filter required") if page_size > 100: page_size = 100 try: params = {"pageSize": page_size} # Nuthatch API expects "true"/"false" strings, not Python booleans if has_images is not None: params["hasImg"] = "true" if has_images else "false" if name: # API is case-sensitive - convert to lowercase for reliable matching params["name"] = name.lower() if family: # API is case-sensitive - convert to lowercase for reliable matching params["family"] = family.lower() if region: params["region"] = region if status: params["status"] = status data = _make_request("/birds", params) if data is None: return _format_error_response("Failed to fetch birds") if not data.get('entities'): return _format_success_response( [], count=0, total_count=0, filters={"name": name, "family": family, "region": region, "status": status} ) # Format results with essential info birds = [ { "name": bird['name'], "scientific_name": bird['sciName'], "family": bird.get('family', 'Unknown'), "order": bird.get('order', 'Unknown'), "status": bird.get('status', 'Unknown'), "region": bird.get('region', []), "image_count": len(bird.get('images', [])), "has_audio": len(bird.get('recordings', [])) > 0 } for bird in data['entities'] ] return _format_success_response( birds, count=len(birds), total_count=data.get('totalCount', 0), filters={"name": name, "family": family, "region": region, "status": status} ) except Exception as e: return _format_error_response(f"Search failed: {str(e)}") # Register as MCP tool mcp.tool()(search_birds) # ============================================================================ # TOOL 2: get_bird_info # ============================================================================ # Use case: After classifier identifies a bird, get complete species details def get_bird_info(name: str) -> str: """ Get comprehensive information about a specific bird species. Returns all available data: taxonomy, size, conservation status, image/audio counts, and geographic range. Can accept: - User input: "Northern Cardinal" - Classifier output: Species name from image classification Args: name: Common or scientific name of the bird Returns: JSON with complete species information Example: get_bird_info("Northern Cardinal") -> Returns full details: family, size, status, media counts, etc. """ if not name or len(name.strip()) < 2: return _format_error_response("Bird name required (minimum 2 characters)") try: # Search for exact or closest match # API is case-sensitive - convert to lowercase for reliable matching params = {"name": name.lower(), "pageSize": 1} data = _make_request("/birds", params) if data is None or not data.get('entities'): return _format_error_response(f"Bird '{name}' not found in database") bird = data['entities'][0] # Compile comprehensive info info = { "name": bird['name'], "scientific_name": bird['sciName'], "family": bird.get('family', 'Unknown'), "order": bird.get('order', 'Unknown'), "status": bird.get('status', 'Unknown'), "region": bird.get('region', []), "length_cm": { "min": bird.get('lengthMin'), "max": bird.get('lengthMax') } if bird.get('lengthMin') else None, "wingspan_cm": { "min": bird.get('wingspanMin'), "max": bird.get('wingspanMax') } if bird.get('wingspanMin') else None, "image_count": len(bird.get('images', [])), "audio_count": len(bird.get('recordings', [])), "has_images": len(bird.get('images', [])) > 0, "has_audio": len(bird.get('recordings', [])) > 0 } return _format_success_response(info, bird_name=name) except Exception as e: return _format_error_response(f"Lookup failed: {str(e)}") # Register as MCP tool mcp.tool()(get_bird_info) # ============================================================================ # TOOL 3: get_bird_images # ============================================================================ # Use case: Show reference images to compare with user's uploaded photo def get_bird_images(name: str, max_images: int = 5) -> str: """ Get image URLs for a bird species. Returns high-quality reference images from Unsplash and curator photos. Perfect for visual comparison with user's uploaded photo. Can accept: - User input: "Show me pictures of a cardinal" - Classifier output: Species name -> fetch reference images Args: name: Common or scientific name of the bird max_images: Maximum number of image URLs to return (default: 5) Returns: JSON with image URLs and bird identification Example: get_bird_images("Northern Cardinal", max_images=3) -> Returns 3 image URLs for visual comparison """ if not name or len(name.strip()) < 2: return _format_error_response("Bird name required (minimum 2 characters)") try: # API is case-sensitive - convert to lowercase for reliable matching params = {"name": name.lower(), "pageSize": 1, "hasImg": "true"} data = _make_request("/birds", params) if data is None or not data.get('entities'): return _format_error_response(f"Bird '{name}' not found or has no images") bird = data['entities'][0] images = bird.get('images', []) if not images: return _format_error_response(f"No images available for '{bird['name']}'") return _format_success_response( images[:max_images], bird_name=bird['name'], scientific_name=bird['sciName'], total_images=len(images), returned_count=min(len(images), max_images) ) except Exception as e: return _format_error_response(f"Image lookup failed: {str(e)}") # Register as MCP tool mcp.tool()(get_bird_images) # ============================================================================ # TOOL 4: get_bird_audio # ============================================================================ # Use case: Provide audio recordings so user can learn bird's call/song def get_bird_audio(name: str, max_recordings: int = 5) -> str: """ Get audio recordings for a bird species. Returns recordings from xeno-canto.org with location, date, and type info. Great for learning bird calls and songs. Can accept: - User input: "What does a cardinal sound like?" - Classifier output: Species name -> fetch audio examples Args: name: Common or scientific name of the bird max_recordings: Maximum number of recordings to return (default: 5) Returns: JSON with recording metadata and download URLs Example: get_bird_audio("Northern Cardinal", max_recordings=3) -> Returns 3 audio recordings with metadata """ if not name or len(name.strip()) < 2: return _format_error_response("Bird name required (minimum 2 characters)") try: # API is case-sensitive - convert to lowercase for reliable matching params = {"name": name.lower(), "pageSize": 1} data = _make_request("/birds", params) if data is None or not data.get('entities'): return _format_error_response(f"Bird '{name}' not found") bird = data['entities'][0] recordings = bird.get('recordings', []) if not recordings: return _format_error_response(f"No audio recordings available for '{bird['name']}'") # Format recording info (keep essential fields only) formatted_recordings = [ { "type": rec.get('type', 'Unknown'), "location": rec.get('loc', 'Unknown'), "country": rec.get('cnt', 'Unknown'), "date": rec.get('date', 'Unknown'), "recordist": rec.get('rec', 'Unknown'), "file_url": rec.get('file', ''), "xeno_canto_url": rec.get('url', ''), "quality": rec.get('q', ''), "length": rec.get('length', 'Unknown') } for rec in recordings[:max_recordings] ] return _format_success_response( formatted_recordings, bird_name=bird['name'], scientific_name=bird['sciName'], total_recordings=len(recordings), returned_count=min(len(recordings), max_recordings) ) except Exception as e: return _format_error_response(f"Audio lookup failed: {str(e)}") # Register as MCP tool mcp.tool()(get_bird_audio) # ============================================================================ # TOOL 5: search_by_family # ============================================================================ # Use case: "Show me all birds in the same family as this cardinal" def search_by_family(family_name: str, max_results: int = 20) -> str: """ Get all bird species in a taxonomic family. Great for exploring related species after identifying a bird. Example: "This is a cardinal. What other cardinals exist?" Can accept: - User input: "Show me all finches" - Derived from classification: After identifying a Cardinalidae member Args: family_name: Scientific family name (e.g., "Cardinalidae", "Fringillidae") max_results: Maximum species to return (default: 20) Returns: JSON with all species in that family Example: search_by_family("Cardinalidae") -> Returns Northern Cardinal, Pyrrhuloxia, Rose-breasted Grosbeak, etc. """ if not family_name or len(family_name.strip()) < 2: return _format_error_response("Family name required (minimum 2 characters)") try: # API is case-sensitive - convert to lowercase for reliable matching params = {"family": family_name.lower(), "pageSize": min(max_results, 100)} data = _make_request("/birds", params) if data is None: return _format_error_response("Failed to fetch family data") if not data.get('entities'): return _format_error_response(f"No birds found in family '{family_name}'") # Format family members birds = [ { "name": bird['name'], "scientific_name": bird['sciName'], "status": bird.get('status', 'Unknown'), "region": bird.get('region', []), "has_images": len(bird.get('images', [])) > 0, "has_audio": len(bird.get('recordings', [])) > 0 } for bird in data['entities'] ] return _format_success_response( birds, family=family_name, count=len(birds), total_count=data.get('totalCount', 0) ) except Exception as e: return _format_error_response(f"Family search failed: {str(e)}") # Register as MCP tool mcp.tool()(search_by_family) # ============================================================================ # TOOL 6: filter_by_status # ============================================================================ # Use case: "Show me endangered birds" or conservation awareness def filter_by_status(status: str, region: str = "", max_results: int = 20) -> str: """ Find birds by conservation status. Great for conservation awareness and educational purposes. Common statuses: "Low Concern", "Endangered", "Threatened", "Vulnerable" Can accept: - User input: "Show me endangered birds" - Educational queries: "What birds are threatened?" Args: status: Conservation status to filter by region: Optional geographic filter ("North America", "Western Europe") max_results: Maximum birds to return (default: 20) Returns: JSON with birds matching the conservation status Example: filter_by_status("Endangered", region="North America") -> Returns endangered birds in North America """ if not status or len(status.strip()) < 2: return _format_error_response("Conservation status required") try: params = {"status": status, "pageSize": min(max_results, 100)} if region: params["region"] = region data = _make_request("/birds", params) if data is None: return _format_error_response("Failed to fetch status data") if not data.get('entities'): return _format_error_response( f"No birds found with status '{status}'" + (f" in region '{region}'" if region else "") ) # Format results birds = [ { "name": bird['name'], "scientific_name": bird['sciName'], "family": bird.get('family', 'Unknown'), "status": bird.get('status', 'Unknown'), "region": bird.get('region', []), "has_images": len(bird.get('images', [])) > 0 } for bird in data['entities'] ] return _format_success_response( birds, status=status, region=region or "All regions", count=len(birds), total_count=data.get('totalCount', 0) ) except Exception as e: return _format_error_response(f"Status filter failed: {str(e)}") # Register as MCP tool mcp.tool()(filter_by_status) # ============================================================================ # TOOL 7: get_all_families # ============================================================================ # Use case: "What bird families are in the database?" def get_all_families(region: str = "") -> str: """ Get list of all unique bird families in the database. Useful for taxonomic exploration and understanding database coverage. Can accept: - User input: "What families are covered?" - Educational queries: "Show me all bird families" Args: region: Optional geographic filter ("North America", "Western Europe") Returns: JSON with unique family names Example: get_all_families(region="North America") -> Returns ["Anatidae", "Cardinalidae", "Fringillidae", ...] """ try: # Fetch large sample to get comprehensive family list params = {"pageSize": 100} if region: params["region"] = region data = _make_request("/birds", params) if data is None: return _format_error_response("Failed to fetch family data") if not data.get('entities'): return _format_error_response("No birds found") # Extract unique families families = list(set( bird.get('family', 'Unknown') for bird in data['entities'] if bird.get('family') )) families.sort() return _format_success_response( families, region=region or "All regions", count=len(families), note="This is a sample - database may contain more families" ) except Exception as e: return _format_error_response(f"Family listing failed: {str(e)}") # Register as MCP tool mcp.tool()(get_all_families) # ============================================================================ # SERVER STARTUP WITH STDIO TRANSPORT # ============================================================================ def main(): """Start the MCP server with dual transport support (STDIO or HTTP).""" # Determine transport mode from command line args is_http_mode = "--http" in sys.argv or "--streamable-http" in sys.argv # For STDIO mode, all informational output must go to stderr (stdout is for JSON-RPC only) # For HTTP mode, can use stdout output = sys.stdout if is_http_mode else sys.stderr print("\n"+"="*70, file=output) print("🐦 [NUTHATCH MCP SERVER] - Starting...", file=output) print("="*70, file=output) print(f"[API KEY]: {'✅ Configured' if NUTHATCH_API_KEY else '❌ Missing'}", file=output) print("\n[AVAILABLE TOOLS]:", file=output) tools_list = [ "1. search_birds - Multi-filter bird search", "2. get_bird_info - Complete species information", "3. get_bird_images - Reference image URLs", "4. get_bird_audio - Audio recordings from xeno-canto", "5. search_by_family - All species in taxonomic family", "6. filter_by_status - Birds by conservation status", "7. get_all_families - List all bird families" ] for tool in tools_list: print(f" ✓ {tool}", file=output) print("\n[DATA SOURCE]:", file=output) print(" • Images: Unsplash + curator photos", file=output) print(" • Audio: xeno-canto.org recordings", file=output) print(" • Coverage: 1000+ species (North America, Western Europe)", file=output) print("\n"+"="*70, file=output) if is_http_mode: # Extract port and host from command line args port = 8001 # Default to 8001 to avoid conflict with other MCP servers host = "127.0.0.1" for i, arg in enumerate(sys.argv): if arg == "--port" and i + 1 < len(sys.argv): port = int(sys.argv[i + 1]) elif arg == "--host" and i + 1 < len(sys.argv): host = sys.argv[i + 1] # Auth status based on initialization auth_status = "🔒 Protected (auth required)" if NUTHATCH_MCP_AUTH_KEY else "🔓 No authentication (development mode)" print("[TRANSPORT]: Starting streamable-http MCP server", file=output) print(f"[HOST]: {host}", file=output) print(f"[PORT]: {port}", file=output) print(f"[URL]: http://{host}:{port}", file=output) print(f"[AUTH]: {auth_status}", file=output) print("[NOTE]: This is MCP over HTTP for web clients", file=output) print("="*70+"\n", file=output) # Run with streamable-http transport (auth configured at FastMCP init) mcp.run(transport="streamable-http", host=host, port=port) else: print("[TRANSPORT]: Running as stdio MCP server", file=output) print("[NOTE]: For HTTP transport, use: python nuthatch_tools.py --http", file=output) print("="*70+"\n", file=output) # Run as stdio MCP server (default for agent integration) mcp.run(transport="stdio") if __name__ == "__main__": main()