langchain[minor], core[minor]: update json, pydantic parser. add openai-json structured output runnable (#16914)

This commit is contained in:
Bagatur
2024-02-08 11:59:06 -08:00
committed by GitHub
parent e22c4d4eb0
commit 852973d616
8 changed files with 370 additions and 270 deletions

View File

@@ -35,7 +35,7 @@ def _custom_parser(multiline_string: str) -> str:
multiline_string = multiline_string.decode()
multiline_string = re.sub(
r'("action_input"\:\s*")(.*)(")',
r'("action_input"\:\s*")(.*?)(")',
_replace_new_line,
multiline_string,
flags=re.DOTALL,
@@ -138,7 +138,7 @@ def parse_json_markdown(
The parsed JSON object as a Python dictionary.
"""
# Try to find JSON string within triple backticks
match = re.search(r"```(json)?(.*)(```)?", json_string, re.DOTALL)
match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)
# If no match found, assume the entire string is a JSON string
if match is None:
@@ -148,7 +148,7 @@ def parse_json_markdown(
json_str = match.group(2)
# Strip whitespace and newlines from the start and end
json_str = json_str.strip()
json_str = json_str.strip().strip("`")
# handle newlines and other special characters inside the returned value
json_str = _custom_parser(json_str)
@@ -211,7 +211,8 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
try:
return parse_json_markdown(text)
except JSONDecodeError as e:
raise OutputParserException(f"Invalid json output: {text}") from e
msg = f"Invalid json output: {text}"
raise OutputParserException(msg, llm_output=text) from e
def parse(self, text: str) -> Any:
return self.parse_result([Generation(text=text)])

View File

@@ -70,21 +70,7 @@ JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
{
"action": "Final Answer",
"action_input": "```bar\n<div id="1" class=\"value\">\n\ttext\n</div>```"
}
```"""
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{"foo": "bar", "bar": "foo"}"
}
```"""
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
{
"action": "Final Answer",
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
"action_input": "```bar\n<div id=\\"1\\" class=\\"value\\">\n\ttext\n</div>```"
}
```"""
@@ -202,6 +188,8 @@ def test_parse_json_with_code_blocks() -> None:
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
assert parsed == {"foo": "```bar```"}
def test_parse_json_with_code_blocks_and_newlines() -> None:
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)
assert parsed == {
@@ -211,8 +199,6 @@ def test_parse_json_with_code_blocks() -> None:
TEST_CASES_ESCAPED_QUOTES = [
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
]