mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-01 17:13:22 +00:00
core: Speed up json parse for large strings (#24036)
for a large string: - old 4.657918874989264 - new 0.023724667000351474
This commit is contained in:
parent
160fc7f246
commit
859e434932
@ -58,7 +58,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
||||
pass
|
||||
|
||||
# Initialize variables.
|
||||
new_s = ""
|
||||
new_chars = []
|
||||
stack = []
|
||||
is_inside_string = False
|
||||
escaped = False
|
||||
@ -90,29 +90,27 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
||||
return None
|
||||
|
||||
# Append the processed character to the new string.
|
||||
new_s += char
|
||||
new_chars.append(char)
|
||||
|
||||
# If we're still inside a string at the end of processing,
|
||||
# we need to close the string.
|
||||
if is_inside_string:
|
||||
new_s += '"'
|
||||
new_chars.append('"')
|
||||
|
||||
# Reverse the stack to get the closing characters.
|
||||
stack.reverse()
|
||||
|
||||
# Try to parse mods of string until we succeed or run out of characters.
|
||||
while new_s:
|
||||
final_s = new_s
|
||||
|
||||
while new_chars:
|
||||
# Close any remaining open structures in the reverse
|
||||
# order that they were opened.
|
||||
for closing_char in reversed(stack):
|
||||
final_s += closing_char
|
||||
|
||||
# Attempt to parse the modified string as JSON.
|
||||
try:
|
||||
return json.loads(final_s, strict=strict)
|
||||
return json.loads("".join(new_chars + stack), strict=strict)
|
||||
except json.JSONDecodeError:
|
||||
# If we still can't parse the string as JSON,
|
||||
# try removing the last character
|
||||
new_s = new_s[:-1]
|
||||
new_chars.pop()
|
||||
|
||||
# If we got here, we ran out of characters to remove
|
||||
# and still couldn't parse the string as JSON, so return the parse error
|
||||
@ -120,6 +118,9 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
||||
return json.loads(s, strict=strict)
|
||||
|
||||
|
||||
_json_markdown_re = re.compile(r"```(json)?(.*)", re.DOTALL)
|
||||
|
||||
|
||||
def parse_json_markdown(
|
||||
json_string: str, *, parser: Callable[[str], Any] = parse_partial_json
|
||||
) -> dict:
|
||||
@ -136,7 +137,7 @@ def parse_json_markdown(
|
||||
return _parse_json(json_string, parser=parser)
|
||||
except json.JSONDecodeError:
|
||||
# Try to find JSON string within triple backticks
|
||||
match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
|
||||
match = _json_markdown_re.search(json_string)
|
||||
|
||||
# If no match found, assume the entire string is a JSON string
|
||||
if match is None:
|
||||
@ -147,11 +148,14 @@ def parse_json_markdown(
|
||||
return _parse_json(json_str, parser=parser)
|
||||
|
||||
|
||||
_json_strip_chars = " \n\r\t`"
|
||||
|
||||
|
||||
def _parse_json(
|
||||
json_str: str, *, parser: Callable[[str], Any] = parse_partial_json
|
||||
) -> dict:
|
||||
# Strip whitespace and newlines from the start and end
|
||||
json_str = json_str.strip().strip("`")
|
||||
# Strip whitespace,newlines,backtick from the start and end
|
||||
json_str = json_str.strip(_json_strip_chars)
|
||||
|
||||
# handle newlines and other special characters inside the returned value
|
||||
json_str = _custom_parser(json_str)
|
||||
|
Loading…
Reference in New Issue
Block a user