Update chat agents' output parser to extract action by regex (#7511)

Currently `ChatOutputParser` extracts actions by splitting the text on
"```", and then load the second part as a json string.

But sometimes the LLM will wrap the action in markdown code block like:

````markdown
```json
{
  "action": "foo",
  "action_input": "bar"
}
```
````

Splitting text on "```" will cause `OutputParserException` in such case.

This PR changes the behaviour to extract the `$JSON_BLOB` by regex, so
that it can handle both ` ``` ``` ` and ` ```json ``` `

@hinthornw

---------

Co-authored-by: Junlin Zhou <jlzhou@zjuici.com>
This commit is contained in:
Junlin Zhou 2023-07-12 15:12:02 +08:00 committed by GitHub
parent ebcb144342
commit 5f17c57174
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 1 deletions

View File

@ -1,4 +1,5 @@
import json
import re
from typing import Union
from langchain.agents.agent import AgentOutputParser
@ -9,13 +10,19 @@ FINAL_ANSWER_ACTION = "Final Answer:"
class ChatOutputParser(AgentOutputParser):
pattern = re.compile(r"^.*?`{3}(?:json)?\n(.*?)`{3}.*?$", re.DOTALL)
def get_format_instructions(self) -> str:
return FORMAT_INSTRUCTIONS
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
includes_answer = FINAL_ANSWER_ACTION in text
try:
action = text.split("```")[1]
found = self.pattern.search(text)
if not found:
# Fast fail to parse Final Answer.
raise ValueError("action not found")
action = found.group(1)
response = json.loads(action.strip())
includes_action = "action" in response
if includes_answer and includes_action:

View File

@ -0,0 +1,47 @@
"""Unittests for langchain.agents.chat package."""
from typing import Tuple
from langchain.agents.chat.output_parser import ChatOutputParser
from langchain.schema import AgentAction
output_parser = ChatOutputParser()
def get_action_and_input(text: str) -> Tuple[str, str]:
output = output_parser.parse(text)
if isinstance(output, AgentAction):
return output.tool, str(output.tool_input)
else:
return "Final Answer", output.return_values["output"]
def test_parse_with_language() -> None:
llm_output = """I can use the `foo` tool to achieve the goal.
Action:
```json
{
"action": "foo",
"action_input": "bar"
}
```
"""
action, action_input = get_action_and_input(llm_output)
assert action == "foo"
assert action_input == "bar"
def test_parse_without_language() -> None:
llm_output = """I can use the `foo` tool to achieve the goal.
Action:
```
{
"action": "foo",
"action_input": "bar"
}
```
"""
action, action_input = get_action_and_input(llm_output)
assert action == "foo"
assert action_input == "bar"