ai_chat_api / utils /json_extractor.py
Soumik Bose
ok
a243ca8
import json
from typing import Any, List
from logging_config import get_logger
logger = get_logger("utils.json_extractor")
def _find_balanced_closing_index(text: str, start_index: int) -> int:
start_char = text[start_index]
end_char = "}" if start_char == "{" else "]"
depth = 0
in_double_quote = False
in_single_quote = False
in_backtick = False
in_line_comment = False
in_block_comment = False
is_escaped = False
length = len(text)
i = start_index
while i < length:
char = text[i]
next_char = text[i + 1] if i + 1 < length else ""
if is_escaped:
is_escaped = False
i += 1
continue
if char == "\\" and not in_line_comment and not in_block_comment:
is_escaped = True
i += 1
continue
if in_line_comment:
if char == "\n":
in_line_comment = False
i += 1
continue
if in_block_comment:
if char == "*" and next_char == "/":
in_block_comment = False
i += 2
continue
i += 1
continue
if not in_double_quote and not in_single_quote and not in_backtick:
if char == "/" and next_char == "/":
in_line_comment = True
i += 2
continue
if char == "/" and next_char == "*":
in_block_comment = True
i += 2
continue
if in_double_quote:
if char == '"':
in_double_quote = False
i += 1
continue
if in_single_quote:
if char == "'":
in_single_quote = False
i += 1
continue
if in_backtick:
if char == "`":
in_backtick = False
i += 1
continue
if char == '"':
in_double_quote = True
i += 1
continue
if char == "'":
in_single_quote = True
i += 1
continue
if char == "`":
in_backtick = True
i += 1
continue
if char == start_char:
depth += 1
elif char == end_char:
depth -= 1
if depth == 0:
return i
i += 1
return -1
def extract_json_from_content(content: str) -> List[Any]:
logger.debug("Starting JSON extraction from content of length %d", len(content))
if not content or not isinstance(content, str):
logger.warning("Received empty or non-string content for JSON extraction")
return []
found_blocks: List[Any] = []
cursor = 0
length = len(content)
while cursor < length:
if content[cursor] not in {"{", "["}:
cursor += 1
continue
end_index = _find_balanced_closing_index(content, cursor)
if end_index == -1:
logger.debug("No balanced closing bracket found at cursor=%d", cursor)
cursor += 1
continue
raw_candidate = content[cursor: end_index + 1]
try:
parsed = json.loads(raw_candidate)
logger.debug("Successfully parsed JSON block at cursor=%d", cursor)
found_blocks.append(parsed)
cursor = end_index + 1
except json.JSONDecodeError:
cursor += 1
logger.info("JSON extraction complete: found %d block(s)", len(found_blocks))
return found_blocks