core[patch]: Fix jsonOutputParser fails if a json value contains ``` inside it. (#19717)

- **Issue:** fix #19646 
- @baskaryan, @eyurtsev PTAL
This commit is contained in:
Guangdong Liu 2024-03-29 03:01:09 +08:00 committed by GitHub
parent f7042321f1
commit 0571f886d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 11 deletions

View File

@ -137,6 +137,9 @@ def parse_json_markdown(
Returns:
The parsed JSON object as a Python dictionary.
"""
try:
return _parse_json(json_string, parser=parser)
except json.JSONDecodeError:
# Try to find JSON string within triple backticks
match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
@ -146,7 +149,12 @@ def parse_json_markdown(
else:
# If match found, use the content within the backticks
json_str = match.group(2)
return _parse_json(json_str, parser=parser)
def _parse_json(
json_str: str, *, parser: Callable[[str], Any] = parse_partial_json
) -> dict:
# Strip whitespace and newlines from the start and end
json_str = json_str.strip().strip("`")
@ -154,9 +162,7 @@ def parse_json_markdown(
json_str = _custom_parser(json_str)
# Parse the JSON string into a Python dictionary
parsed = parser(json_str)
return parsed
return parser(json_str)
def parse_and_check_json_markdown(text: str, expected_keys: List[str]) -> dict:

View File

@ -69,6 +69,10 @@ JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
}
```"""
JSON_WITH_PART_MARKDOWN_CODE_BLOCK = """
{\"valid_json\": "hey ```print(hello world!)``` hey"}
"""
JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
{
"action": "Final Answer",
@ -191,6 +195,11 @@ def test_parse_json_with_code_blocks() -> None:
assert parsed == {"foo": "```bar```"}
def test_parse_json_with_part_code_blocks() -> None:
parsed = parse_json_markdown(JSON_WITH_PART_MARKDOWN_CODE_BLOCK)
assert parsed == {"valid_json": "hey ```print(hello world!)``` hey"}
def test_parse_json_with_code_blocks_and_newlines() -> None:
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)