Spaces:
Running
Running
| import json | |
| from typing import Any, List | |
| from logging_config import get_logger | |
| logger = get_logger("utils.json_extractor") | |
| def _find_balanced_closing_index(text: str, start_index: int) -> int: | |
| start_char = text[start_index] | |
| end_char = "}" if start_char == "{" else "]" | |
| depth = 0 | |
| in_double_quote = False | |
| in_single_quote = False | |
| in_backtick = False | |
| in_line_comment = False | |
| in_block_comment = False | |
| is_escaped = False | |
| length = len(text) | |
| i = start_index | |
| while i < length: | |
| char = text[i] | |
| next_char = text[i + 1] if i + 1 < length else "" | |
| if is_escaped: | |
| is_escaped = False | |
| i += 1 | |
| continue | |
| if char == "\\" and not in_line_comment and not in_block_comment: | |
| is_escaped = True | |
| i += 1 | |
| continue | |
| if in_line_comment: | |
| if char == "\n": | |
| in_line_comment = False | |
| i += 1 | |
| continue | |
| if in_block_comment: | |
| if char == "*" and next_char == "/": | |
| in_block_comment = False | |
| i += 2 | |
| continue | |
| i += 1 | |
| continue | |
| if not in_double_quote and not in_single_quote and not in_backtick: | |
| if char == "/" and next_char == "/": | |
| in_line_comment = True | |
| i += 2 | |
| continue | |
| if char == "/" and next_char == "*": | |
| in_block_comment = True | |
| i += 2 | |
| continue | |
| if in_double_quote: | |
| if char == '"': | |
| in_double_quote = False | |
| i += 1 | |
| continue | |
| if in_single_quote: | |
| if char == "'": | |
| in_single_quote = False | |
| i += 1 | |
| continue | |
| if in_backtick: | |
| if char == "`": | |
| in_backtick = False | |
| i += 1 | |
| continue | |
| if char == '"': | |
| in_double_quote = True | |
| i += 1 | |
| continue | |
| if char == "'": | |
| in_single_quote = True | |
| i += 1 | |
| continue | |
| if char == "`": | |
| in_backtick = True | |
| i += 1 | |
| continue | |
| if char == start_char: | |
| depth += 1 | |
| elif char == end_char: | |
| depth -= 1 | |
| if depth == 0: | |
| return i | |
| i += 1 | |
| return -1 | |
| def extract_json_from_content(content: str) -> List[Any]: | |
| logger.debug("Starting JSON extraction from content of length %d", len(content)) | |
| if not content or not isinstance(content, str): | |
| logger.warning("Received empty or non-string content for JSON extraction") | |
| return [] | |
| found_blocks: List[Any] = [] | |
| cursor = 0 | |
| length = len(content) | |
| while cursor < length: | |
| if content[cursor] not in {"{", "["}: | |
| cursor += 1 | |
| continue | |
| end_index = _find_balanced_closing_index(content, cursor) | |
| if end_index == -1: | |
| logger.debug("No balanced closing bracket found at cursor=%d", cursor) | |
| cursor += 1 | |
| continue | |
| raw_candidate = content[cursor: end_index + 1] | |
| try: | |
| parsed = json.loads(raw_candidate) | |
| logger.debug("Successfully parsed JSON block at cursor=%d", cursor) | |
| found_blocks.append(parsed) | |
| cursor = end_index + 1 | |
| except json.JSONDecodeError: | |
| cursor += 1 | |
| logger.info("JSON extraction complete: found %d block(s)", len(found_blocks)) | |
| return found_blocks | |