mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-03 18:24:10 +00:00
core: Speed up json parse for large strings (#24036)
for a large string: - old 4.657918874989264 - new 0.023724667000351474
This commit is contained in:
parent
160fc7f246
commit
859e434932
@ -58,7 +58,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
# Initialize variables.
|
# Initialize variables.
|
||||||
new_s = ""
|
new_chars = []
|
||||||
stack = []
|
stack = []
|
||||||
is_inside_string = False
|
is_inside_string = False
|
||||||
escaped = False
|
escaped = False
|
||||||
@ -90,29 +90,27 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# Append the processed character to the new string.
|
# Append the processed character to the new string.
|
||||||
new_s += char
|
new_chars.append(char)
|
||||||
|
|
||||||
# If we're still inside a string at the end of processing,
|
# If we're still inside a string at the end of processing,
|
||||||
# we need to close the string.
|
# we need to close the string.
|
||||||
if is_inside_string:
|
if is_inside_string:
|
||||||
new_s += '"'
|
new_chars.append('"')
|
||||||
|
|
||||||
|
# Reverse the stack to get the closing characters.
|
||||||
|
stack.reverse()
|
||||||
|
|
||||||
# Try to parse mods of string until we succeed or run out of characters.
|
# Try to parse mods of string until we succeed or run out of characters.
|
||||||
while new_s:
|
while new_chars:
|
||||||
final_s = new_s
|
|
||||||
|
|
||||||
# Close any remaining open structures in the reverse
|
# Close any remaining open structures in the reverse
|
||||||
# order that they were opened.
|
# order that they were opened.
|
||||||
for closing_char in reversed(stack):
|
|
||||||
final_s += closing_char
|
|
||||||
|
|
||||||
# Attempt to parse the modified string as JSON.
|
# Attempt to parse the modified string as JSON.
|
||||||
try:
|
try:
|
||||||
return json.loads(final_s, strict=strict)
|
return json.loads("".join(new_chars + stack), strict=strict)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# If we still can't parse the string as JSON,
|
# If we still can't parse the string as JSON,
|
||||||
# try removing the last character
|
# try removing the last character
|
||||||
new_s = new_s[:-1]
|
new_chars.pop()
|
||||||
|
|
||||||
# If we got here, we ran out of characters to remove
|
# If we got here, we ran out of characters to remove
|
||||||
# and still couldn't parse the string as JSON, so return the parse error
|
# and still couldn't parse the string as JSON, so return the parse error
|
||||||
@ -120,6 +118,9 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
|
|||||||
return json.loads(s, strict=strict)
|
return json.loads(s, strict=strict)
|
||||||
|
|
||||||
|
|
||||||
|
_json_markdown_re = re.compile(r"```(json)?(.*)", re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
def parse_json_markdown(
|
def parse_json_markdown(
|
||||||
json_string: str, *, parser: Callable[[str], Any] = parse_partial_json
|
json_string: str, *, parser: Callable[[str], Any] = parse_partial_json
|
||||||
) -> dict:
|
) -> dict:
|
||||||
@ -136,7 +137,7 @@ def parse_json_markdown(
|
|||||||
return _parse_json(json_string, parser=parser)
|
return _parse_json(json_string, parser=parser)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# Try to find JSON string within triple backticks
|
# Try to find JSON string within triple backticks
|
||||||
match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
|
match = _json_markdown_re.search(json_string)
|
||||||
|
|
||||||
# If no match found, assume the entire string is a JSON string
|
# If no match found, assume the entire string is a JSON string
|
||||||
if match is None:
|
if match is None:
|
||||||
@ -147,11 +148,14 @@ def parse_json_markdown(
|
|||||||
return _parse_json(json_str, parser=parser)
|
return _parse_json(json_str, parser=parser)
|
||||||
|
|
||||||
|
|
||||||
|
_json_strip_chars = " \n\r\t`"
|
||||||
|
|
||||||
|
|
||||||
def _parse_json(
|
def _parse_json(
|
||||||
json_str: str, *, parser: Callable[[str], Any] = parse_partial_json
|
json_str: str, *, parser: Callable[[str], Any] = parse_partial_json
|
||||||
) -> dict:
|
) -> dict:
|
||||||
# Strip whitespace and newlines from the start and end
|
# Strip whitespace,newlines,backtick from the start and end
|
||||||
json_str = json_str.strip().strip("`")
|
json_str = json_str.strip(_json_strip_chars)
|
||||||
|
|
||||||
# handle newlines and other special characters inside the returned value
|
# handle newlines and other special characters inside the returned value
|
||||||
json_str = _custom_parser(json_str)
|
json_str = _custom_parser(json_str)
|
||||||
|
Loading…
Reference in New Issue
Block a user