Add strict flag to the JSON parser (#9471)

This updates the default configuration since I think it's almost always
what we want to happen. But we should evaluate whether there are any issues.
This commit is contained in:
Eugene Yurtsev 2023-08-19 22:02:12 -04:00 committed by GitHub
parent 09a92bb9bf
commit e51bccdb28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 101 additions and 26 deletions

View File

@ -37,16 +37,32 @@ class OutputFunctionsParser(BaseGenerationOutputParser[Any]):
class JsonOutputFunctionsParser(OutputFunctionsParser):
"""Parse an output as the Json object."""
strict: bool = False
"""Whether to allow non-JSON-compliant strings.
See: https://docs.python.org/3/library/json.html#encoders-and-decoders
Useful when the parsed output may include unicode characters or new lines.
"""
def parse_result(self, result: List[Generation]) -> Any:
function_call_info = super().parse_result(result)
if self.args_only:
try:
return json.loads(function_call_info)
return json.loads(function_call_info, strict=self.strict)
except (json.JSONDecodeError, TypeError) as exc:
raise OutputParserException(
f"Could not parse function call data: {exc}"
)
else:
try:
function_call_info["arguments"] = json.loads(
function_call_info["arguments"], strict=self.strict
)
except (json.JSONDecodeError, TypeError) as exc:
raise OutputParserException(
f"Could not parse function call data: {exc}"
)
function_call_info["arguments"] = json.loads(function_call_info["arguments"])
return function_call_info

View File

@ -1,4 +1,4 @@
import json
from typing import Any, Dict
import pytest
@ -9,40 +9,99 @@ from langchain.schema import BaseMessage, ChatGeneration, OutputParserException
from langchain.schema.messages import AIMessage, HumanMessage
@pytest.fixture
def ai_message() -> AIMessage:
"""Return a simple AIMessage."""
content = "This is a test message"
args = json.dumps(
{
"arg1": "value1",
def test_json_output_function_parser() -> None:
"""Test the JSON output function parser is configured with robust defaults."""
message = AIMessage(
content="This is a test message",
additional_kwargs={
"function_call": {
"name": "function_name",
"arguments": '{"arg1": "code\ncode"}',
}
},
)
function_call = {"name": "function_name", "arguments": args}
additional_kwargs = {"function_call": function_call}
return AIMessage(content=content, additional_kwargs=additional_kwargs)
def test_json_output_function_parser(ai_message: AIMessage) -> None:
"""Test that the JsonOutputFunctionsParser with full output."""
chat_generation = ChatGeneration(message=ai_message)
chat_generation = ChatGeneration(message=message)
# Full output
# Test that the parsers defaults are configured to parse in non-strict mode
parser = JsonOutputFunctionsParser(args_only=False)
result = parser.parse_result([chat_generation])
assert result == {"arguments": {"arg1": "value1"}, "name": "function_name"}
assert result == {"arguments": {"arg1": "code\ncode"}, "name": "function_name"}
# Args only
parser = JsonOutputFunctionsParser(args_only=True)
result = parser.parse_result([chat_generation])
assert result == {"arg1": "value1"}
assert result == {"arg1": "code\ncode"}
# Verify that the original message is not modified
assert ai_message.additional_kwargs == {
"function_call": {"name": "function_name", "arguments": '{"arg1": "value1"}'}
assert message.additional_kwargs == {
"function_call": {
"name": "function_name",
"arguments": '{"arg1": "code\ncode"}',
}
}
@pytest.mark.parametrize(
"config",
[
{
"args_only": False,
"strict": False,
"args": '{"arg1": "value1"}',
"result": {"arguments": {"arg1": "value1"}, "name": "function_name"},
"exception": None,
},
{
"args_only": True,
"strict": False,
"args": '{"arg1": "value1"}',
"result": {"arg1": "value1"},
"exception": None,
},
{
"args_only": True,
"strict": False,
"args": '{"code": "print(2+\n2)"}',
"result": {"code": "print(2+\n2)"},
"exception": None,
},
{
"args_only": True,
"strict": False,
"args": '{"code": "你好)"}',
"result": {"code": "你好)"},
"exception": None,
},
{
"args_only": True,
"strict": True,
"args": '{"code": "print(2+\n2)"}',
"exception": OutputParserException,
},
],
)
def test_json_output_function_parser_strictness(config: Dict[str, Any]) -> None:
"""Test parsing with JSON strictness on and off."""
args = config["args"]
message = AIMessage(
content="This is a test message",
additional_kwargs={
"function_call": {"name": "function_name", "arguments": args}
},
)
chat_generation = ChatGeneration(message=message)
# Full output
parser = JsonOutputFunctionsParser(
strict=config["strict"], args_only=config["args_only"]
)
if config["exception"] is not None:
with pytest.raises(config["exception"]):
parser.parse_result([chat_generation])
else:
assert parser.parse_result([chat_generation]) == config["result"]
@pytest.mark.parametrize(