mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-30 10:23:30 +00:00
Enhancement on feature/yaml output parser (#14674)
Adding to my previously, already merged PR I made some further improvements: * Added documentation to the existing Pydantic Parser notebook, with an example using LCEL and `with_retry()` on `OutputParserException`. * Added an additional output example to the prompt * More lenient parser in terms of LLM output format * Amended unit test FYI @hwchase17 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
ff10f30149
commit
9d8468a576
@ -25,6 +25,7 @@ This is a list of output parsers LangChain supports. The table below has various
|
||||
| [OutputFixing](./output_fixing) | | | ✅ | `str \| Message` | | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output. | | |
|
||||
| [RetryWithError](./retry) | | | ✅ | `str \| Message` | | Wraps another output parser. If that output parser errors, then this will pass the original inputs, the bad output, and the error message to an LLM and ask it to fix it. Compared to OutputFixingParser, this one also sends the original instructions. | | |
|
||||
| [Pydantic](./pydantic) | | ✅ | | `str \| Message` | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format. | | |
|
||||
| [YAML](./yaml) | | ✅ | | `str \| Message` | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format. Uses YAML to encode it. | | |
|
||||
| [PandasDataFrame](./pandas_dataframe) | | ✅ | | `str \| Message` | `dict` | Useful for doing operations with pandas DataFrames. | | |
|
||||
| [Enum](./enum) | | ✅ | | `str \| Message` | `Enum` | Parses response into one of the provided enum values. | | |
|
||||
| [Datetime](./datetime) | | ✅ | | `str \| Message` | `datetime.datetime` | Parses response into a datetime string. | | |
|
||||
|
119
docs/docs/modules/model_io/output_parsers/types/yaml.ipynb
Normal file
119
docs/docs/modules/model_io/output_parsers/types/yaml.ipynb
Normal file
@ -0,0 +1,119 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72b1b316",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YAML parser\n",
|
||||
"This output parser allows users to specify an arbitrary schema and query LLMs for outputs that conform to that schema, using YAML to format their response.\n",
|
||||
"\n",
|
||||
"Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed YAML. In the OpenAI family, DaVinci can do reliably but Curie's ability already drops off dramatically. \n",
|
||||
"\n",
|
||||
"You can optionally use Pydantic to declare your data model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cd33369f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.output_parsers import YamlOutputParser\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9b4d242f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a1090014",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define your desired data structure.\n",
|
||||
"class Joke(BaseModel):\n",
|
||||
" setup: str = Field(description=\"question to set up a joke\")\n",
|
||||
" punchline: str = Field(description=\"answer to resolve the joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "4ccf45a3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup=\"Why don't scientists trust atoms?\", punchline='Because they make up everything!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# And a query intented to prompt a language model to populate the data structure.\n",
|
||||
"joke_query = \"Tell me a joke.\"\n",
|
||||
"\n",
|
||||
"# Set up a parser + inject instructions into the prompt template.\n",
|
||||
"parser = YamlOutputParser(pydantic_object=Joke)\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
|
||||
" input_variables=[\"query\"],\n",
|
||||
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | model | parser\n",
|
||||
"\n",
|
||||
"chain.invoke({\"query\": joke_query})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a4d12261",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -28,11 +28,12 @@ Here is the output schema:
|
||||
|
||||
YAML_FORMAT_INSTRUCTIONS = """The output should be formatted as a YAML instance that conforms to the given JSON schema below.
|
||||
|
||||
As an example, for the schema
|
||||
# Examples
|
||||
## Schema
|
||||
```
|
||||
{{'title': 'Players', 'description': 'A list of players', 'type': 'array', 'items': {{'$ref': '#/definitions/Player'}}, 'definitions': {{'Player': {{'title': 'Player', 'type': 'object', 'properties': {{'name': {{'title': 'Name', 'description': 'Player name', 'type': 'string'}}, 'avg': {{'title': 'Avg', 'description': 'Batting average', 'type': 'number'}}}}, 'required': ['name', 'avg']}}}}}}
|
||||
{{"title": "Players", "description": "A list of players", "type": "array", "items": {{"$ref": "#/definitions/Player"}}, "definitions": {{"Player": {{"title": "Player", "type": "object", "properties": {{"name": {{"title": "Name", "description": "Player name", "type": "string"}}, "avg": {{"title": "Avg", "description": "Batting average", "type": "number"}}}}, "required": ["name", "avg"]}}}}}}
|
||||
```
|
||||
a well formatted instance would be:
|
||||
## Well formatted instance
|
||||
```
|
||||
- name: John Doe
|
||||
avg: 0.3
|
||||
@ -40,12 +41,22 @@ a well formatted instance would be:
|
||||
avg: 1.4
|
||||
```
|
||||
|
||||
## Schema
|
||||
```
|
||||
{{"properties": {{"habit": {{ "description": "A common daily habit", "type": "string" }}, "sustainable_alternative": {{ "description": "An environmentally friendly alternative to the habit", "type": "string"}}}}, "required": ["habit", "sustainable_alternative"]}}
|
||||
```
|
||||
## Well formatted instance
|
||||
```
|
||||
habit: Using disposable water bottles for daily hydration.
|
||||
sustainable_alternative: Switch to a reusable water bottle to reduce plastic waste and decrease your environmental footprint.
|
||||
```
|
||||
|
||||
Please follow the standard YAML formatting conventions with an indent of 2 spaces and make sure that the data types adhere strictly to the following JSON schema:
|
||||
```
|
||||
{schema}
|
||||
```
|
||||
|
||||
Make sure to always enclose the YAML output in triple backticks (```)"""
|
||||
Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!"""
|
||||
|
||||
|
||||
PANDAS_DATAFRAME_FORMAT_INSTRUCTIONS = """The output should be formatted as a string as the operation, followed by a colon, followed by the column or row to be queried on, followed by optional array parameters.
|
||||
|
@ -30,6 +30,9 @@ class YamlOutputParser(BaseOutputParser[T]):
|
||||
yaml_str = ""
|
||||
if match:
|
||||
yaml_str = match.group("yaml")
|
||||
else:
|
||||
# If no backticks were present, try to parse the entire output as yaml.
|
||||
yaml_str = text
|
||||
|
||||
json_object = yaml.safe_load(yaml_str)
|
||||
return self.pydantic_object.parse_obj(json_object)
|
||||
@ -37,7 +40,7 @@ class YamlOutputParser(BaseOutputParser[T]):
|
||||
except (yaml.YAMLError, ValidationError) as e:
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from completion {text}. Got: {e}"
|
||||
raise OutputParserException(msg, llm_output=text)
|
||||
raise OutputParserException(msg, llm_output=text) from e
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
schema = self.pydantic_object.schema()
|
||||
|
@ -2,6 +2,7 @@
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
@ -39,6 +40,15 @@ for_new_lines: |
|
||||
escape_newline:
|
||||
|
||||
```"""
|
||||
DEF_RESULT_NO_BACKTICKS = """
|
||||
action: Update
|
||||
action_input: The yamlOutputParser class is powerful
|
||||
additional_fields: null
|
||||
for_new_lines: |
|
||||
not_escape_newline:
|
||||
escape_newline:
|
||||
|
||||
"""
|
||||
|
||||
# action 'update' with a lowercase 'u' to test schema validation failure.
|
||||
DEF_RESULT_FAIL = """```yaml
|
||||
@ -55,16 +65,17 @@ DEF_EXPECTED_RESULT = TestModel(
|
||||
)
|
||||
|
||||
|
||||
def test_yaml_output_parser() -> None:
|
||||
@pytest.mark.parametrize("result", [DEF_RESULT, DEF_RESULT_NO_BACKTICKS])
|
||||
def test_yaml_output_parser(result: str) -> None:
|
||||
"""Test yamlOutputParser."""
|
||||
|
||||
yaml_parser: YamlOutputParser[TestModel] = YamlOutputParser(
|
||||
pydantic_object=TestModel
|
||||
)
|
||||
|
||||
result = yaml_parser.parse(DEF_RESULT)
|
||||
model = yaml_parser.parse(result)
|
||||
print("parse_result:", result)
|
||||
assert DEF_EXPECTED_RESULT == result
|
||||
assert DEF_EXPECTED_RESULT == model
|
||||
|
||||
|
||||
def test_yaml_output_parser_fail() -> None:
|
||||
|
Loading…
Reference in New Issue
Block a user