mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-22 11:00:37 +00:00
Enhancement on feature/yaml output parser (#14674)
Adding to my previously, already merged PR I made some further improvements: * Added documentation to the existing Pydantic Parser notebook, with an example using LCEL and `with_retry()` on `OutputParserException`. * Added an additional output example to the prompt * More lenient parser in terms of LLM output format * Amended unit test FYI @hwchase17 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
@@ -28,11 +28,12 @@ Here is the output schema:
|
||||
|
||||
YAML_FORMAT_INSTRUCTIONS = """The output should be formatted as a YAML instance that conforms to the given JSON schema below.
|
||||
|
||||
As an example, for the schema
|
||||
# Examples
|
||||
## Schema
|
||||
```
|
||||
{{'title': 'Players', 'description': 'A list of players', 'type': 'array', 'items': {{'$ref': '#/definitions/Player'}}, 'definitions': {{'Player': {{'title': 'Player', 'type': 'object', 'properties': {{'name': {{'title': 'Name', 'description': 'Player name', 'type': 'string'}}, 'avg': {{'title': 'Avg', 'description': 'Batting average', 'type': 'number'}}}}, 'required': ['name', 'avg']}}}}}}
|
||||
{{"title": "Players", "description": "A list of players", "type": "array", "items": {{"$ref": "#/definitions/Player"}}, "definitions": {{"Player": {{"title": "Player", "type": "object", "properties": {{"name": {{"title": "Name", "description": "Player name", "type": "string"}}, "avg": {{"title": "Avg", "description": "Batting average", "type": "number"}}}}, "required": ["name", "avg"]}}}}}}
|
||||
```
|
||||
a well formatted instance would be:
|
||||
## Well formatted instance
|
||||
```
|
||||
- name: John Doe
|
||||
avg: 0.3
|
||||
@@ -40,12 +41,22 @@ a well formatted instance would be:
|
||||
avg: 1.4
|
||||
```
|
||||
|
||||
## Schema
|
||||
```
|
||||
{{"properties": {{"habit": {{ "description": "A common daily habit", "type": "string" }}, "sustainable_alternative": {{ "description": "An environmentally friendly alternative to the habit", "type": "string"}}}}, "required": ["habit", "sustainable_alternative"]}}
|
||||
```
|
||||
## Well formatted instance
|
||||
```
|
||||
habit: Using disposable water bottles for daily hydration.
|
||||
sustainable_alternative: Switch to a reusable water bottle to reduce plastic waste and decrease your environmental footprint.
|
||||
```
|
||||
|
||||
Please follow the standard YAML formatting conventions with an indent of 2 spaces and make sure that the data types adhere strictly to the following JSON schema:
|
||||
```
|
||||
{schema}
|
||||
```
|
||||
|
||||
Make sure to always enclose the YAML output in triple backticks (```)"""
|
||||
Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!"""
|
||||
|
||||
|
||||
PANDAS_DATAFRAME_FORMAT_INSTRUCTIONS = """The output should be formatted as a string as the operation, followed by a colon, followed by the column or row to be queried on, followed by optional array parameters.
|
||||
|
@@ -30,6 +30,9 @@ class YamlOutputParser(BaseOutputParser[T]):
|
||||
yaml_str = ""
|
||||
if match:
|
||||
yaml_str = match.group("yaml")
|
||||
else:
|
||||
# If no backticks were present, try to parse the entire output as yaml.
|
||||
yaml_str = text
|
||||
|
||||
json_object = yaml.safe_load(yaml_str)
|
||||
return self.pydantic_object.parse_obj(json_object)
|
||||
@@ -37,7 +40,7 @@ class YamlOutputParser(BaseOutputParser[T]):
|
||||
except (yaml.YAMLError, ValidationError) as e:
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from completion {text}. Got: {e}"
|
||||
raise OutputParserException(msg, llm_output=text)
|
||||
raise OutputParserException(msg, llm_output=text) from e
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
schema = self.pydantic_object.schema()
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
@@ -39,6 +40,15 @@ for_new_lines: |
|
||||
escape_newline:
|
||||
|
||||
```"""
|
||||
DEF_RESULT_NO_BACKTICKS = """
|
||||
action: Update
|
||||
action_input: The yamlOutputParser class is powerful
|
||||
additional_fields: null
|
||||
for_new_lines: |
|
||||
not_escape_newline:
|
||||
escape_newline:
|
||||
|
||||
"""
|
||||
|
||||
# action 'update' with a lowercase 'u' to test schema validation failure.
|
||||
DEF_RESULT_FAIL = """```yaml
|
||||
@@ -55,16 +65,17 @@ DEF_EXPECTED_RESULT = TestModel(
|
||||
)
|
||||
|
||||
|
||||
def test_yaml_output_parser() -> None:
|
||||
@pytest.mark.parametrize("result", [DEF_RESULT, DEF_RESULT_NO_BACKTICKS])
|
||||
def test_yaml_output_parser(result: str) -> None:
|
||||
"""Test yamlOutputParser."""
|
||||
|
||||
yaml_parser: YamlOutputParser[TestModel] = YamlOutputParser(
|
||||
pydantic_object=TestModel
|
||||
)
|
||||
|
||||
result = yaml_parser.parse(DEF_RESULT)
|
||||
model = yaml_parser.parse(result)
|
||||
print("parse_result:", result)
|
||||
assert DEF_EXPECTED_RESULT == result
|
||||
assert DEF_EXPECTED_RESULT == model
|
||||
|
||||
|
||||
def test_yaml_output_parser_fail() -> None:
|
||||
|
Reference in New Issue
Block a user