Spaces:
Running
Running
Ensuring agentic consistency; fixing structured output errors
Browse files- .gitattributes +1 -0
- app.py +62 -8
- examples/bird_example_7.jpg +3 -0
- langgraph_agent/prompts.py +11 -9
- langgraph_agent/structured_output.py +17 -6
- langgraph_agent/subagent_config.py +11 -9
.gitattributes
CHANGED
|
@@ -38,3 +38,4 @@ examples/bird_example_1.jpg filter=lfs diff=lfs merge=lfs -text
|
|
| 38 |
examples/bird_example_2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
examples/bird_exmample_4.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
examples/bird_example_4.jpg filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 38 |
examples/bird_example_2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
examples/bird_exmample_4.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
examples/bird_example_4.jpg filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
examples/bird_example_7.jpg filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -655,22 +655,76 @@ def format_tool_output_for_chat(tool_output):
|
|
| 655 |
"""
|
| 656 |
Parse tool output and format images/content for display in chatbot.
|
| 657 |
Detects image URLs and converts them to markdown image syntax.
|
|
|
|
|
|
|
| 658 |
"""
|
| 659 |
import re
|
| 660 |
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
|
| 666 |
-
|
| 667 |
-
image_urls = re.findall(image_pattern, output_str, re.IGNORECASE)
|
| 668 |
|
| 669 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
# Format images as markdown
|
| 671 |
formatted_output = ""
|
| 672 |
-
for url in
|
| 673 |
formatted_output += f"\n\n"
|
|
|
|
| 674 |
return formatted_output
|
| 675 |
|
| 676 |
# If no images, return truncated text
|
|
|
|
| 655 |
"""
|
| 656 |
Parse tool output and format images/content for display in chatbot.
|
| 657 |
Detects image URLs and converts them to markdown image syntax.
|
| 658 |
+
|
| 659 |
+
Handles both JSON-formatted MCP responses and plain text.
|
| 660 |
"""
|
| 661 |
import re
|
| 662 |
|
| 663 |
+
# Extract content from ToolMessage objects (LangGraph wraps outputs in ToolMessage)
|
| 664 |
+
if hasattr(tool_output, 'content'):
|
| 665 |
+
output_str = tool_output.content
|
| 666 |
+
print(f"[FORMAT_TOOL_OUTPUT] Extracted content from ToolMessage")
|
| 667 |
+
elif isinstance(tool_output, dict) and 'content' in tool_output:
|
| 668 |
+
output_str = tool_output['content']
|
| 669 |
+
print(f"[FORMAT_TOOL_OUTPUT] Extracted content from dict")
|
| 670 |
+
else:
|
| 671 |
+
output_str = str(tool_output)
|
| 672 |
+
print(f"[FORMAT_TOOL_OUTPUT] Using str() fallback")
|
| 673 |
|
| 674 |
+
image_urls = []
|
|
|
|
| 675 |
|
| 676 |
+
# Try to parse as JSON first (MCP tools often return JSON)
|
| 677 |
+
try:
|
| 678 |
+
import json
|
| 679 |
+
parsed = json.loads(output_str)
|
| 680 |
+
print(f"[FORMAT_TOOL_OUTPUT] Successfully parsed JSON")
|
| 681 |
+
|
| 682 |
+
# Extract URLs from common JSON structures
|
| 683 |
+
if isinstance(parsed, dict):
|
| 684 |
+
# Check for "data" field (Nuthatch MCP format)
|
| 685 |
+
data = parsed.get("data", [])
|
| 686 |
+
if isinstance(data, list):
|
| 687 |
+
# data is a list of URLs
|
| 688 |
+
for item in data:
|
| 689 |
+
if isinstance(item, str) and item.startswith("http"):
|
| 690 |
+
image_urls.append(item)
|
| 691 |
+
elif isinstance(data, str) and data.startswith("http"):
|
| 692 |
+
image_urls.append(data)
|
| 693 |
+
|
| 694 |
+
# Also check for images in nested structures
|
| 695 |
+
for key, value in parsed.items():
|
| 696 |
+
if isinstance(value, list):
|
| 697 |
+
for item in value:
|
| 698 |
+
if isinstance(item, str) and item.startswith("http") and any(ext in item.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg']):
|
| 699 |
+
image_urls.append(item)
|
| 700 |
+
except (json.JSONDecodeError, ValueError):
|
| 701 |
+
# Not JSON, fallback to regex extraction
|
| 702 |
+
pass
|
| 703 |
+
|
| 704 |
+
# Fallback: regex extraction for non-JSON or additional URLs
|
| 705 |
+
if not image_urls:
|
| 706 |
+
# Updated pattern: more permissive to catch URLs even with surrounding JSON characters
|
| 707 |
+
# Match URLs ending in image extensions, allowing any characters before the extension
|
| 708 |
+
image_pattern = r'https?://[^\s]+?\.(?:jpg|jpeg|png|gif|webp|svg)(?:\?[^\s"]*)?'
|
| 709 |
+
found_urls = re.findall(image_pattern, output_str, re.IGNORECASE)
|
| 710 |
+
image_urls.extend(found_urls)
|
| 711 |
+
|
| 712 |
+
# Remove duplicates while preserving order
|
| 713 |
+
seen = set()
|
| 714 |
+
unique_urls = []
|
| 715 |
+
for url in image_urls:
|
| 716 |
+
# Clean URL (remove trailing quotes, brackets, etc.)
|
| 717 |
+
clean_url = url.rstrip('",}]')
|
| 718 |
+
if clean_url not in seen:
|
| 719 |
+
seen.add(clean_url)
|
| 720 |
+
unique_urls.append(clean_url)
|
| 721 |
+
|
| 722 |
+
if unique_urls:
|
| 723 |
# Format images as markdown
|
| 724 |
formatted_output = ""
|
| 725 |
+
for url in unique_urls[:3]: # Limit to first 3 images to avoid clutter
|
| 726 |
formatted_output += f"\n\n"
|
| 727 |
+
print(f"[FORMAT_TOOL_OUTPUT] β
Formatted {len(unique_urls[:3])} images as markdown")
|
| 728 |
return formatted_output
|
| 729 |
|
| 730 |
# If no images, return truncated text
|
examples/bird_example_7.jpg
ADDED
|
Git LFS Details
|
langgraph_agent/prompts.py
CHANGED
|
@@ -268,20 +268,21 @@ IMAGE_IDENTIFIER_PROMPT_HF = """You are an Image Identification Specialist.
|
|
| 268 |
**Your Job:**
|
| 269 |
1. Classify uploaded bird images
|
| 270 |
2. Show confidence score
|
| 271 |
-
3. Get bird information
|
| 272 |
-
4.
|
|
|
|
| 273 |
|
| 274 |
**Tools:**
|
| 275 |
- classify_from_url(url) - Identify bird from image URL
|
| 276 |
- classify_from_base64(image) - Identify bird from base64
|
| 277 |
- get_bird_info(name) - Get species details
|
| 278 |
-
- get_bird_images(name) -
|
| 279 |
|
| 280 |
**Response Format:**
|
| 281 |
1. Bird name (Common and Scientific)
|
| 282 |
2. Confidence: X%
|
| 283 |
3. Key features
|
| 284 |
-
4.
|
| 285 |
|
| 286 |
**CRITICAL - No Hallucination:**
|
| 287 |
- If get_bird_images returns empty: Tell user "No reference images available"
|
|
@@ -360,11 +361,12 @@ ROUTER_PROMPT_HF = """You are BirdScope AI Supervisor. Route user requests to sp
|
|
| 360 |
|
| 361 |
**Routing Rules:**
|
| 362 |
1. Image uploads β image_identifier
|
| 363 |
-
2. "
|
| 364 |
-
3.
|
| 365 |
-
4.
|
| 366 |
-
5. "
|
| 367 |
-
6. "
|
|
|
|
| 368 |
|
| 369 |
Route to ONE specialist per request.
|
| 370 |
|
|
|
|
| 268 |
**Your Job:**
|
| 269 |
1. Classify uploaded bird images
|
| 270 |
2. Show confidence score
|
| 271 |
+
3. Get bird information using get_bird_info
|
| 272 |
+
4. ALWAYS call get_bird_images to fetch reference photos
|
| 273 |
+
5. Display reference images for the user
|
| 274 |
|
| 275 |
**Tools:**
|
| 276 |
- classify_from_url(url) - Identify bird from image URL
|
| 277 |
- classify_from_base64(image) - Identify bird from base64
|
| 278 |
- get_bird_info(name) - Get species details
|
| 279 |
+
- get_bird_images(name) - ALWAYS call this to get reference photos
|
| 280 |
|
| 281 |
**Response Format:**
|
| 282 |
1. Bird name (Common and Scientific)
|
| 283 |
2. Confidence: X%
|
| 284 |
3. Key features
|
| 285 |
+
4. ALWAYS call get_bird_images and show: 
|
| 286 |
|
| 287 |
**CRITICAL - No Hallucination:**
|
| 288 |
- If get_bird_images returns empty: Tell user "No reference images available"
|
|
|
|
| 361 |
|
| 362 |
**Routing Rules:**
|
| 363 |
1. Image uploads β image_identifier
|
| 364 |
+
2. "Show me image" or "picture" or "photo" requests β image_identifier
|
| 365 |
+
3. Species info by name β image_identifier
|
| 366 |
+
4. "Search" or "find" or "examples" or "list birds" β generalist
|
| 367 |
+
5. "Audio" or "sound" or "song" β generalist
|
| 368 |
+
6. "Conservation" or "endangered" β taxonomy_specialist
|
| 369 |
+
7. "Family" or "families" β taxonomy_specialist
|
| 370 |
|
| 371 |
Route to ONE specialist per request.
|
| 372 |
|
langgraph_agent/structured_output.py
CHANGED
|
@@ -37,22 +37,33 @@ def extract_urls_from_text(text: str) -> tuple[List[str], List[str]]:
|
|
| 37 |
"""
|
| 38 |
Extract image and audio URLs from text using regex.
|
| 39 |
|
|
|
|
|
|
|
| 40 |
Returns:
|
| 41 |
tuple: (image_urls, audio_urls)
|
| 42 |
"""
|
| 43 |
-
#
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Pattern for audio URLs - handles both direct audio files AND xeno-canto links
|
| 47 |
-
#
|
| 48 |
-
audio_pattern_files = r'https?://[^\s
|
| 49 |
audio_pattern_xenocanto = r'https?://xeno-canto\.org/\d+/download'
|
| 50 |
|
| 51 |
# Extract all URLs
|
| 52 |
-
|
| 53 |
-
|
| 54 |
audio_urls_xenocanto = list(set(re.findall(audio_pattern_xenocanto, text, re.IGNORECASE)))
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
# Combine both types of audio URLs
|
| 57 |
audio_urls = audio_urls_files + audio_urls_xenocanto
|
| 58 |
|
|
|
|
| 37 |
"""
|
| 38 |
Extract image and audio URLs from text using regex.
|
| 39 |
|
| 40 |
+
Updated to handle URLs within markdown, JSON, and plain text.
|
| 41 |
+
|
| 42 |
Returns:
|
| 43 |
tuple: (image_urls, audio_urls)
|
| 44 |
"""
|
| 45 |
+
# Updated pattern for image URLs - more permissive to catch URLs in various contexts
|
| 46 |
+
# Matches URLs ending in image extensions, allowing most characters before the extension
|
| 47 |
+
# Stops at whitespace or common delimiters like ), ], }
|
| 48 |
+
image_pattern = r'https?://[^\s)}\]]+?\.(?:jpg|jpeg|png|gif|webp|svg)(?:\?[^\s)}\]]*)?'
|
| 49 |
|
| 50 |
# Pattern for audio URLs - handles both direct audio files AND xeno-canto links
|
| 51 |
+
# Updated to be more permissive like image pattern
|
| 52 |
+
audio_pattern_files = r'https?://[^\s)}\]]+?\.(?:mp3|wav|ogg|m4a)(?:\?[^\s)}\]]*)?'
|
| 53 |
audio_pattern_xenocanto = r'https?://xeno-canto\.org/\d+/download'
|
| 54 |
|
| 55 |
# Extract all URLs
|
| 56 |
+
raw_image_urls = re.findall(image_pattern, text, re.IGNORECASE)
|
| 57 |
+
raw_audio_urls_files = re.findall(audio_pattern_files, text, re.IGNORECASE)
|
| 58 |
audio_urls_xenocanto = list(set(re.findall(audio_pattern_xenocanto, text, re.IGNORECASE)))
|
| 59 |
|
| 60 |
+
# Clean URLs (remove trailing quotes, commas, etc.)
|
| 61 |
+
def clean_url(url: str) -> str:
|
| 62 |
+
return url.rstrip('",;)')
|
| 63 |
+
|
| 64 |
+
image_urls = list(set(clean_url(url) for url in raw_image_urls))
|
| 65 |
+
audio_urls_files = list(set(clean_url(url) for url in raw_audio_urls_files))
|
| 66 |
+
|
| 67 |
# Combine both types of audio URLs
|
| 68 |
audio_urls = audio_urls_files + audio_urls_xenocanto
|
| 69 |
|
langgraph_agent/subagent_config.py
CHANGED
|
@@ -68,14 +68,15 @@ class SubAgentConfig:
|
|
| 68 |
**Your Role:**
|
| 69 |
1. Use classification tools to identify birds from uploaded images
|
| 70 |
2. Provide accurate species identification with confidence scores
|
| 71 |
-
3. Fetch basic species information (taxonomy, size, status)
|
| 72 |
-
4.
|
|
|
|
| 73 |
|
| 74 |
**Response Style:**
|
| 75 |
- Lead with the bird's common name and scientific name
|
| 76 |
- Always cite confidence scores from classifier
|
| 77 |
- Describe key identifying features visible in the image
|
| 78 |
-
-
|
| 79 |
- Mention if confidence is low and suggest why
|
| 80 |
- Keep responses focused and concise
|
| 81 |
|
|
@@ -222,12 +223,13 @@ Analyze each user request and route it to the MOST appropriate specialist.
|
|
| 222 |
|
| 223 |
**Routing Guidelines:**
|
| 224 |
1. **Image uploads/URLs** β image_identifier (has classification tools)
|
| 225 |
-
2. **"
|
| 226 |
-
3. **
|
| 227 |
-
4. **
|
| 228 |
-
5. **"
|
| 229 |
-
6. **"
|
| 230 |
-
7. **
|
|
|
|
| 231 |
|
| 232 |
**Decision-making:**
|
| 233 |
- Consider the user's INTENT, not just keywords
|
|
|
|
| 68 |
**Your Role:**
|
| 69 |
1. Use classification tools to identify birds from uploaded images
|
| 70 |
2. Provide accurate species identification with confidence scores
|
| 71 |
+
3. Fetch basic species information (taxonomy, size, status) using get_bird_info
|
| 72 |
+
4. ALWAYS call get_bird_images to fetch reference photos for the identified species
|
| 73 |
+
5. Display reference images to help users verify identification
|
| 74 |
|
| 75 |
**Response Style:**
|
| 76 |
- Lead with the bird's common name and scientific name
|
| 77 |
- Always cite confidence scores from classifier
|
| 78 |
- Describe key identifying features visible in the image
|
| 79 |
+
- ALWAYS call get_bird_images and show reference images using markdown: 
|
| 80 |
- Mention if confidence is low and suggest why
|
| 81 |
- Keep responses focused and concise
|
| 82 |
|
|
|
|
| 223 |
|
| 224 |
**Routing Guidelines:**
|
| 225 |
1. **Image uploads/URLs** β image_identifier (has classification tools)
|
| 226 |
+
2. **"Show me image"/"picture"/"photo" requests** β image_identifier (ONLY agent with get_bird_images tool)
|
| 227 |
+
3. **Species info by name** β image_identifier (has get_bird_info and get_bird_images)
|
| 228 |
+
4. **"Search"/"find"/"examples"/"list birds"** β generalist (has search_birds tool for database queries)
|
| 229 |
+
5. **"Audio"/"sound"/"song"/"call"/"recording"** β generalist (has audio search and retrieval)
|
| 230 |
+
6. **"Family"/"families" + broad questions** β taxonomy_specialist (has family tools)
|
| 231 |
+
7. **"Conservation"/"endangered"/"threatened"** β taxonomy_specialist (has status filters)
|
| 232 |
+
8. **Taxonomic relationships** β taxonomy_specialist (specializes in classification)
|
| 233 |
|
| 234 |
**Decision-making:**
|
| 235 |
- Consider the user's INTENT, not just keywords
|