Enhancement on feature/yaml output parser (#14674)

Adding to my previously, already merged PR I made some further
improvements:

* Added documentation to the existing Pydantic Parser notebook, with an
example using LCEL and `with_retry()` on `OutputParserException`.
* Added an additional output example to the prompt
* More lenient parser in terms of LLM output format
* Amended unit test

FYI @hwchase17

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Thomas B
2024-01-02 01:49:58 +01:00
committed by GitHub
parent ff10f30149
commit 9d8468a576
5 changed files with 153 additions and 8 deletions

View File

@@ -28,11 +28,12 @@ Here is the output schema:
YAML_FORMAT_INSTRUCTIONS = """The output should be formatted as a YAML instance that conforms to the given JSON schema below.
As an example, for the schema
# Examples
## Schema
```
{{'title': 'Players', 'description': 'A list of players', 'type': 'array', 'items': {{'$ref': '#/definitions/Player'}}, 'definitions': {{'Player': {{'title': 'Player', 'type': 'object', 'properties': {{'name': {{'title': 'Name', 'description': 'Player name', 'type': 'string'}}, 'avg': {{'title': 'Avg', 'description': 'Batting average', 'type': 'number'}}}}, 'required': ['name', 'avg']}}}}}}
{{"title": "Players", "description": "A list of players", "type": "array", "items": {{"$ref": "#/definitions/Player"}}, "definitions": {{"Player": {{"title": "Player", "type": "object", "properties": {{"name": {{"title": "Name", "description": "Player name", "type": "string"}}, "avg": {{"title": "Avg", "description": "Batting average", "type": "number"}}}}, "required": ["name", "avg"]}}}}}}
```
a well formatted instance would be:
## Well formatted instance
```
- name: John Doe
avg: 0.3
@@ -40,12 +41,22 @@ a well formatted instance would be:
avg: 1.4
```
## Schema
```
{{"properties": {{"habit": {{ "description": "A common daily habit", "type": "string" }}, "sustainable_alternative": {{ "description": "An environmentally friendly alternative to the habit", "type": "string"}}}}, "required": ["habit", "sustainable_alternative"]}}
```
## Well formatted instance
```
habit: Using disposable water bottles for daily hydration.
sustainable_alternative: Switch to a reusable water bottle to reduce plastic waste and decrease your environmental footprint.
```
Please follow the standard YAML formatting conventions with an indent of 2 spaces and make sure that the data types adhere strictly to the following JSON schema:
```
{schema}
```
Make sure to always enclose the YAML output in triple backticks (```)"""
Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!"""
PANDAS_DATAFRAME_FORMAT_INSTRUCTIONS = """The output should be formatted as a string as the operation, followed by a colon, followed by the column or row to be queried on, followed by optional array parameters.

View File

@@ -30,6 +30,9 @@ class YamlOutputParser(BaseOutputParser[T]):
yaml_str = ""
if match:
yaml_str = match.group("yaml")
else:
# If no backticks were present, try to parse the entire output as yaml.
yaml_str = text
json_object = yaml.safe_load(yaml_str)
return self.pydantic_object.parse_obj(json_object)
@@ -37,7 +40,7 @@ class YamlOutputParser(BaseOutputParser[T]):
except (yaml.YAMLError, ValidationError) as e:
name = self.pydantic_object.__name__
msg = f"Failed to parse {name} from completion {text}. Got: {e}"
raise OutputParserException(msg, llm_output=text)
raise OutputParserException(msg, llm_output=text) from e
def get_format_instructions(self) -> str:
schema = self.pydantic_object.schema()

View File

@@ -2,6 +2,7 @@
from enum import Enum
from typing import Optional
import pytest
from langchain_core.exceptions import OutputParserException
from langchain_core.pydantic_v1 import BaseModel, Field
@@ -39,6 +40,15 @@ for_new_lines: |
escape_newline:
```"""
DEF_RESULT_NO_BACKTICKS = """
action: Update
action_input: The yamlOutputParser class is powerful
additional_fields: null
for_new_lines: |
not_escape_newline:
escape_newline:
"""
# action 'update' with a lowercase 'u' to test schema validation failure.
DEF_RESULT_FAIL = """```yaml
@@ -55,16 +65,17 @@ DEF_EXPECTED_RESULT = TestModel(
)
def test_yaml_output_parser() -> None:
@pytest.mark.parametrize("result", [DEF_RESULT, DEF_RESULT_NO_BACKTICKS])
def test_yaml_output_parser(result: str) -> None:
"""Test yamlOutputParser."""
yaml_parser: YamlOutputParser[TestModel] = YamlOutputParser(
pydantic_object=TestModel
)
result = yaml_parser.parse(DEF_RESULT)
model = yaml_parser.parse(result)
print("parse_result:", result)
assert DEF_EXPECTED_RESULT == result
assert DEF_EXPECTED_RESULT == model
def test_yaml_output_parser_fail() -> None: