import json from typing import Any, List from logging_config import get_logger logger = get_logger("utils.json_extractor") def _find_balanced_closing_index(text: str, start_index: int) -> int: start_char = text[start_index] end_char = "}" if start_char == "{" else "]" depth = 0 in_double_quote = False in_single_quote = False in_backtick = False in_line_comment = False in_block_comment = False is_escaped = False length = len(text) i = start_index while i < length: char = text[i] next_char = text[i + 1] if i + 1 < length else "" if is_escaped: is_escaped = False i += 1 continue if char == "\\" and not in_line_comment and not in_block_comment: is_escaped = True i += 1 continue if in_line_comment: if char == "\n": in_line_comment = False i += 1 continue if in_block_comment: if char == "*" and next_char == "/": in_block_comment = False i += 2 continue i += 1 continue if not in_double_quote and not in_single_quote and not in_backtick: if char == "/" and next_char == "/": in_line_comment = True i += 2 continue if char == "/" and next_char == "*": in_block_comment = True i += 2 continue if in_double_quote: if char == '"': in_double_quote = False i += 1 continue if in_single_quote: if char == "'": in_single_quote = False i += 1 continue if in_backtick: if char == "`": in_backtick = False i += 1 continue if char == '"': in_double_quote = True i += 1 continue if char == "'": in_single_quote = True i += 1 continue if char == "`": in_backtick = True i += 1 continue if char == start_char: depth += 1 elif char == end_char: depth -= 1 if depth == 0: return i i += 1 return -1 def extract_json_from_content(content: str) -> List[Any]: logger.debug("Starting JSON extraction from content of length %d", len(content)) if not content or not isinstance(content, str): logger.warning("Received empty or non-string content for JSON extraction") return [] found_blocks: List[Any] = [] cursor = 0 length = len(content) while cursor < length: if content[cursor] not in {"{", "["}: cursor += 1 continue end_index = _find_balanced_closing_index(content, cursor) if end_index == -1: logger.debug("No balanced closing bracket found at cursor=%d", cursor) cursor += 1 continue raw_candidate = content[cursor: end_index + 1] try: parsed = json.loads(raw_candidate) logger.debug("Successfully parsed JSON block at cursor=%d", cursor) found_blocks.append(parsed) cursor = end_index + 1 except json.JSONDecodeError: cursor += 1 logger.info("JSON extraction complete: found %d block(s)", len(found_blocks)) return found_blocks