mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 18:50:33 +00:00
style(core): more cleanup all around (#33711)
This commit is contained in:
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
|
|||||||
"""Base abstract message class.
|
"""Base abstract message class.
|
||||||
|
|
||||||
Messages are the inputs and outputs of a chat model.
|
Messages are the inputs and outputs of a chat model.
|
||||||
|
|
||||||
|
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
|
||||||
|
[`AIMessage`][langchain.messages.AIMessage], and
|
||||||
|
[`SystemMessage`][langchain.messages.SystemMessage].
|
||||||
"""
|
"""
|
||||||
|
|
||||||
content: str | list[str | dict]
|
content: str | list[str | dict]
|
||||||
|
|||||||
@@ -1,4 +1,20 @@
|
|||||||
"""**OutputParser** classes parse the output of an LLM call."""
|
"""`OutputParser` classes parse the output of an LLM call into structured data.
|
||||||
|
|
||||||
|
!!! tip "Structured output"
|
||||||
|
|
||||||
|
Output parsers emerged as an early solution to the challenge of obtaining structured
|
||||||
|
output from LLMs.
|
||||||
|
|
||||||
|
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||||
|
natively. In such cases, using output parsers may be unnecessary, and you should
|
||||||
|
leverage the model's built-in capabilities for structured output. Refer to the
|
||||||
|
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
|
||||||
|
for guidance on how to achieve structured output directly.
|
||||||
|
|
||||||
|
Output parsers remain valuable when working with models that do not support
|
||||||
|
structured output natively, or when you require additional processing or validation
|
||||||
|
of the model's output beyond its inherent capabilities.
|
||||||
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
|||||||
@@ -135,6 +135,9 @@ class BaseOutputParser(
|
|||||||
|
|
||||||
Example:
|
Example:
|
||||||
```python
|
```python
|
||||||
|
# Implement a simple boolean output parser
|
||||||
|
|
||||||
|
|
||||||
class BooleanOutputParser(BaseOutputParser[bool]):
|
class BooleanOutputParser(BaseOutputParser[bool]):
|
||||||
true_val: str = "YES"
|
true_val: str = "YES"
|
||||||
false_val: str = "NO"
|
false_val: str = "NO"
|
||||||
|
|||||||
@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
|||||||
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
||||||
"""Parse the output of an LLM call to a JSON object.
|
"""Parse the output of an LLM call to a JSON object.
|
||||||
|
|
||||||
|
Probably the most reliable output parser for getting structured data that does *not*
|
||||||
|
use function calling.
|
||||||
|
|
||||||
When used in streaming mode, it will yield partial JSON objects containing
|
When used in streaming mode, it will yield partial JSON objects containing
|
||||||
all the keys that have been returned so far.
|
all the keys that have been returned so far.
|
||||||
|
|
||||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations
|
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
|
||||||
describing the difference between the previous and the current object.
|
difference between the previous and the current object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ def droplastn(
|
|||||||
|
|
||||||
|
|
||||||
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||||
"""Parse the output of an LLM call to a list."""
|
"""Parse the output of a model to a list."""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _type(self) -> str:
|
def _type(self) -> str:
|
||||||
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
|||||||
buffer = ""
|
buffer = ""
|
||||||
for chunk in input:
|
for chunk in input:
|
||||||
if isinstance(chunk, BaseMessage):
|
if isinstance(chunk, BaseMessage):
|
||||||
# extract text
|
# Extract text
|
||||||
chunk_content = chunk.content
|
chunk_content = chunk.content
|
||||||
if not isinstance(chunk_content, str):
|
if not isinstance(chunk_content, str):
|
||||||
continue
|
continue
|
||||||
buffer += chunk_content
|
buffer += chunk_content
|
||||||
else:
|
else:
|
||||||
# add current chunk to buffer
|
# Add current chunk to buffer
|
||||||
buffer += chunk
|
buffer += chunk
|
||||||
# parse buffer into a list of parts
|
# Parse buffer into a list of parts
|
||||||
try:
|
try:
|
||||||
done_idx = 0
|
done_idx = 0
|
||||||
# yield only complete parts
|
# Yield only complete parts
|
||||||
for m in droplastn(self.parse_iter(buffer), 1):
|
for m in droplastn(self.parse_iter(buffer), 1):
|
||||||
done_idx = m.end()
|
done_idx = m.end()
|
||||||
yield [m.group(1)]
|
yield [m.group(1)]
|
||||||
buffer = buffer[done_idx:]
|
buffer = buffer[done_idx:]
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
parts = self.parse(buffer)
|
parts = self.parse(buffer)
|
||||||
# yield only complete parts
|
# Yield only complete parts
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
for part in parts[:-1]:
|
for part in parts[:-1]:
|
||||||
yield [part]
|
yield [part]
|
||||||
buffer = parts[-1]
|
buffer = parts[-1]
|
||||||
# yield the last part
|
# Yield the last part
|
||||||
for part in self.parse(buffer):
|
for part in self.parse(buffer):
|
||||||
yield [part]
|
yield [part]
|
||||||
|
|
||||||
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
|||||||
buffer = ""
|
buffer = ""
|
||||||
async for chunk in input:
|
async for chunk in input:
|
||||||
if isinstance(chunk, BaseMessage):
|
if isinstance(chunk, BaseMessage):
|
||||||
# extract text
|
# Extract text
|
||||||
chunk_content = chunk.content
|
chunk_content = chunk.content
|
||||||
if not isinstance(chunk_content, str):
|
if not isinstance(chunk_content, str):
|
||||||
continue
|
continue
|
||||||
buffer += chunk_content
|
buffer += chunk_content
|
||||||
else:
|
else:
|
||||||
# add current chunk to buffer
|
# Add current chunk to buffer
|
||||||
buffer += chunk
|
buffer += chunk
|
||||||
# parse buffer into a list of parts
|
# Parse buffer into a list of parts
|
||||||
try:
|
try:
|
||||||
done_idx = 0
|
done_idx = 0
|
||||||
# yield only complete parts
|
# Yield only complete parts
|
||||||
for m in droplastn(self.parse_iter(buffer), 1):
|
for m in droplastn(self.parse_iter(buffer), 1):
|
||||||
done_idx = m.end()
|
done_idx = m.end()
|
||||||
yield [m.group(1)]
|
yield [m.group(1)]
|
||||||
buffer = buffer[done_idx:]
|
buffer = buffer[done_idx:]
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
parts = self.parse(buffer)
|
parts = self.parse(buffer)
|
||||||
# yield only complete parts
|
# Yield only complete parts
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
for part in parts[:-1]:
|
for part in parts[:-1]:
|
||||||
yield [part]
|
yield [part]
|
||||||
buffer = parts[-1]
|
buffer = parts[-1]
|
||||||
# yield the last part
|
# Yield the last part
|
||||||
for part in self.parse(buffer):
|
for part in self.parse(buffer):
|
||||||
yield [part]
|
yield [part]
|
||||||
|
|
||||||
|
|
||||||
class CommaSeparatedListOutputParser(ListOutputParser):
|
class CommaSeparatedListOutputParser(ListOutputParser):
|
||||||
"""Parse the output of an LLM call to a comma-separated list."""
|
"""Parse the output of a model to a comma-separated list."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_lc_serializable(cls) -> bool:
|
def is_lc_serializable(cls) -> bool:
|
||||||
"""Return True as this class is serializable."""
|
"""Return `True` as this class is serializable."""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
|||||||
)
|
)
|
||||||
return [item for sublist in reader for item in sublist]
|
return [item for sublist in reader for item in sublist]
|
||||||
except csv.Error:
|
except csv.Error:
|
||||||
# keep old logic for backup
|
# Keep old logic for backup
|
||||||
return [part.strip() for part in text.split(",")]
|
return [part.strip() for part in text.split(",")]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
|
|||||||
|
|
||||||
|
|
||||||
class StrOutputParser(BaseTransformOutputParser[str]):
|
class StrOutputParser(BaseTransformOutputParser[str]):
|
||||||
"""OutputParser that parses LLMResult into the top likely string."""
|
"""OutputParser that parses `LLMResult` into the top likely string."""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_lc_serializable(cls) -> bool:
|
def is_lc_serializable(cls) -> bool:
|
||||||
"""StrOutputParser is serializable.
|
"""`StrOutputParser` is serializable.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True
|
`True`
|
||||||
"""
|
"""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|||||||
@@ -43,19 +43,19 @@ class _StreamingParser:
|
|||||||
"""Streaming parser for XML.
|
"""Streaming parser for XML.
|
||||||
|
|
||||||
This implementation is pulled into a class to avoid implementation
|
This implementation is pulled into a class to avoid implementation
|
||||||
drift between transform and atransform of the XMLOutputParser.
|
drift between transform and atransform of the `XMLOutputParser`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
|
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
|
||||||
"""Initialize the streaming parser.
|
"""Initialize the streaming parser.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
|
||||||
See documentation in XMLOutputParser for more information.
|
`'xml'`. See documentation in `XMLOutputParser` for more information.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ImportError: If defusedxml is not installed and the defusedxml
|
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
|
||||||
parser is requested.
|
requested.
|
||||||
"""
|
"""
|
||||||
if parser == "defusedxml":
|
if parser == "defusedxml":
|
||||||
if not _HAS_DEFUSEDXML:
|
if not _HAS_DEFUSEDXML:
|
||||||
@@ -79,10 +79,10 @@ class _StreamingParser:
|
|||||||
"""Parse a chunk of text.
|
"""Parse a chunk of text.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
|
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
|
||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
A dictionary representing the parsed XML element.
|
A `dict` representing the parsed XML element.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
|
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
|
||||||
@@ -147,46 +147,49 @@ class _StreamingParser:
|
|||||||
|
|
||||||
|
|
||||||
class XMLOutputParser(BaseTransformOutputParser):
|
class XMLOutputParser(BaseTransformOutputParser):
|
||||||
"""Parse an output using xml format."""
|
"""Parse an output using xml format.
|
||||||
|
|
||||||
|
Returns a dictionary of tags.
|
||||||
|
"""
|
||||||
|
|
||||||
tags: list[str] | None = None
|
tags: list[str] | None = None
|
||||||
"""Tags to tell the LLM to expect in the XML output.
|
"""Tags to tell the LLM to expect in the XML output.
|
||||||
|
|
||||||
Note this may not be perfect depending on the LLM implementation.
|
Note this may not be perfect depending on the LLM implementation.
|
||||||
|
|
||||||
For example, with tags=["foo", "bar", "baz"]:
|
For example, with `tags=["foo", "bar", "baz"]`:
|
||||||
|
|
||||||
1. A well-formatted XML instance:
|
1. A well-formatted XML instance:
|
||||||
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
|
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
|
||||||
|
|
||||||
2. A badly-formatted XML instance (missing closing tag for 'bar'):
|
2. A badly-formatted XML instance (missing closing tag for 'bar'):
|
||||||
"<foo>\n <bar>\n </foo>"
|
`"<foo>\n <bar>\n </foo>"`
|
||||||
|
|
||||||
3. A badly-formatted XML instance (unexpected 'tag' element):
|
3. A badly-formatted XML instance (unexpected 'tag' element):
|
||||||
"<foo>\n <tag>\n </tag>\n</foo>"
|
`"<foo>\n <tag>\n </tag>\n</foo>"`
|
||||||
"""
|
"""
|
||||||
encoding_matcher: re.Pattern = re.compile(
|
encoding_matcher: re.Pattern = re.compile(
|
||||||
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
|
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
|
||||||
)
|
)
|
||||||
parser: Literal["defusedxml", "xml"] = "defusedxml"
|
parser: Literal["defusedxml", "xml"] = "defusedxml"
|
||||||
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
|
||||||
|
|
||||||
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
|
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
|
||||||
present in some distributions of Python's standard library xml.
|
present in some distributions of Python's standard library xml.
|
||||||
`defusedxml` is a wrapper around the standard library parser that
|
`defusedxml` is a wrapper around the standard library parser that
|
||||||
sets up the parser with secure defaults.
|
sets up the parser with secure defaults.
|
||||||
* 'xml' is the standard library parser.
|
* `'xml'` is the standard library parser.
|
||||||
|
|
||||||
Use `xml` only if you are sure that your distribution of the standard library
|
Use `xml` only if you are sure that your distribution of the standard library is not
|
||||||
is not vulnerable to XML vulnerabilities.
|
vulnerable to XML vulnerabilities.
|
||||||
|
|
||||||
Please review the following resources for more information:
|
Please review the following resources for more information:
|
||||||
|
|
||||||
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
||||||
* https://github.com/tiran/defusedxml
|
* https://github.com/tiran/defusedxml
|
||||||
|
|
||||||
The standard library relies on libexpat for parsing XML:
|
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
|
||||||
https://github.com/libexpat/libexpat
|
for parsing XML.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def get_format_instructions(self) -> str:
|
def get_format_instructions(self) -> str:
|
||||||
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
|
|||||||
text: The output of an LLM call.
|
text: The output of an LLM call.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A dictionary representing the parsed XML.
|
A `dict` representing the parsed XML.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
OutputParserException: If the XML is not well-formed.
|
OutputParserException: If the XML is not well-formed.
|
||||||
ImportError: If defusedxml is not installed and the defusedxml
|
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
|
||||||
parser is requested.
|
requested.
|
||||||
"""
|
"""
|
||||||
# Try to find XML string within triple backticks
|
# Try to find XML string within triple backticks
|
||||||
# Imports are temporarily placed here to avoid issue with caching on CI
|
# Imports are temporarily placed here to avoid issue with caching on CI
|
||||||
|
|||||||
@@ -776,7 +776,6 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|||||||
|
|
||||||
Use to create flexible templated prompts for chat models.
|
Use to create flexible templated prompts for chat models.
|
||||||
|
|
||||||
Examples:
|
|
||||||
!!! warning "Behavior changed in 0.2.24"
|
!!! warning "Behavior changed in 0.2.24"
|
||||||
You can pass any Message-like formats supported by
|
You can pass any Message-like formats supported by
|
||||||
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
|
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
|
||||||
@@ -811,7 +810,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|||||||
# )
|
# )
|
||||||
```
|
```
|
||||||
|
|
||||||
Messages Placeholder:
|
!!! note "Messages Placeholder"
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# In addition to Human/AI/Tool/Function messages,
|
# In addition to Human/AI/Tool/Function messages,
|
||||||
@@ -852,13 +851,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
|||||||
# )
|
# )
|
||||||
```
|
```
|
||||||
|
|
||||||
Single-variable template:
|
!!! note "Single-variable template"
|
||||||
|
|
||||||
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
|
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
|
||||||
and you invoke the template with a non-dict object, the prompt template will
|
and you invoke the template with a non-dict object, the prompt template will
|
||||||
inject the provided argument into that variable location.
|
inject the provided argument into that variable location.
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
|
||||||
|
|||||||
@@ -96,10 +96,10 @@ class RunLogPatch:
|
|||||||
"""Patch to the run log."""
|
"""Patch to the run log."""
|
||||||
|
|
||||||
ops: list[dict[str, Any]]
|
ops: list[dict[str, Any]]
|
||||||
"""List of jsonpatch operations, which describe how to create the run state
|
"""List of JSONPatch operations, which describe how to create the run state
|
||||||
from an empty dict. This is the minimal representation of the log, designed to
|
from an empty dict. This is the minimal representation of the log, designed to
|
||||||
be serialized as JSON and sent over the wire to reconstruct the log on the other
|
be serialized as JSON and sent over the wire to reconstruct the log on the other
|
||||||
side. Reconstruction of the state can be done with any jsonpatch-compliant library,
|
side. Reconstruction of the state can be done with any JSONPatch-compliant library,
|
||||||
see https://jsonpatch.com for more information."""
|
see https://jsonpatch.com for more information."""
|
||||||
|
|
||||||
def __init__(self, *ops: dict[str, Any]) -> None:
|
def __init__(self, *ops: dict[str, Any]) -> None:
|
||||||
|
|||||||
@@ -11,8 +11,7 @@ When developing an application, developers should inspect the capabilities and
|
|||||||
permissions of the tools that underlie the given agent toolkit, and determine
|
permissions of the tools that underlie the given agent toolkit, and determine
|
||||||
whether permissions of the given toolkit are appropriate for the application.
|
whether permissions of the given toolkit are appropriate for the application.
|
||||||
|
|
||||||
See [Security](https://docs.langchain.com/oss/python/security-policy) for more
|
See https://docs.langchain.com/oss/python/security-policy for more information.
|
||||||
information.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|||||||
@@ -499,12 +499,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
and HuggingFace tokenizer based on the tiktoken_enabled flag.
|
and HuggingFace tokenizer based on the tiktoken_enabled flag.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts (List[str]): A list of texts to embed.
|
texts: A list of texts to embed.
|
||||||
engine (str): The engine or model to use for embeddings.
|
engine: The engine or model to use for embeddings.
|
||||||
chunk_size (int | None): The size of chunks for processing embeddings.
|
chunk_size: The size of chunks for processing embeddings.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[List[float]]: A list of embeddings for each input text.
|
A list of embeddings for each input text.
|
||||||
"""
|
"""
|
||||||
_chunk_size = chunk_size or self.chunk_size
|
_chunk_size = chunk_size or self.chunk_size
|
||||||
client_kwargs = {**self._invocation_params, **kwargs}
|
client_kwargs = {**self._invocation_params, **kwargs}
|
||||||
@@ -551,12 +551,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
`tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.
|
`tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts (List[str]): A list of texts to embed.
|
texts: A list of texts to embed.
|
||||||
engine (str): The engine or model to use for embeddings.
|
engine: The engine or model to use for embeddings.
|
||||||
chunk_size (int | None): The size of chunks for processing embeddings.
|
chunk_size: The size of chunks for processing embeddings.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[List[float]]: A list of embeddings for each input text.
|
A list of embeddings for each input text.
|
||||||
"""
|
"""
|
||||||
_chunk_size = chunk_size or self.chunk_size
|
_chunk_size = chunk_size or self.chunk_size
|
||||||
client_kwargs = {**self._invocation_params, **kwargs}
|
client_kwargs = {**self._invocation_params, **kwargs}
|
||||||
|
|||||||
Reference in New Issue
Block a user