style(core): more cleanup all around (#33711)

This commit is contained in:
Mason Daugherty
2025-10-28 22:58:19 -04:00
committed by GitHub
parent e5e1d6c705
commit a2a9a02ecb
12 changed files with 121 additions and 95 deletions

View File

@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
"""Base abstract message class. """Base abstract message class.
Messages are the inputs and outputs of a chat model. Messages are the inputs and outputs of a chat model.
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
[`AIMessage`][langchain.messages.AIMessage], and
[`SystemMessage`][langchain.messages.SystemMessage].
""" """
content: str | list[str | dict] content: str | list[str | dict]

View File

@@ -1,4 +1,20 @@
"""**OutputParser** classes parse the output of an LLM call.""" """`OutputParser` classes parse the output of an LLM call into structured data.
!!! tip "Structured output"
Output parsers emerged as an early solution to the challenge of obtaining structured
output from LLMs.
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
natively. In such cases, using output parsers may be unnecessary, and you should
leverage the model's built-in capabilities for structured output. Refer to the
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
for guidance on how to achieve structured output directly.
Output parsers remain valuable when working with models that do not support
structured output natively, or when you require additional processing or validation
of the model's output beyond its inherent capabilities.
"""
from typing import TYPE_CHECKING from typing import TYPE_CHECKING

View File

@@ -135,6 +135,9 @@ class BaseOutputParser(
Example: Example:
```python ```python
# Implement a simple boolean output parser
class BooleanOutputParser(BaseOutputParser[bool]): class BooleanOutputParser(BaseOutputParser[bool]):
true_val: str = "YES" true_val: str = "YES"
false_val: str = "NO" false_val: str = "NO"

View File

@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]): class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
"""Parse the output of an LLM call to a JSON object. """Parse the output of an LLM call to a JSON object.
Probably the most reliable output parser for getting structured data that does *not*
use function calling.
When used in streaming mode, it will yield partial JSON objects containing When used in streaming mode, it will yield partial JSON objects containing
all the keys that have been returned so far. all the keys that have been returned so far.
In streaming, if `diff` is set to `True`, yields JSONPatch operations In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
describing the difference between the previous and the current object. difference between the previous and the current object.
""" """
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type] pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]

View File

@@ -41,7 +41,7 @@ def droplastn(
class ListOutputParser(BaseTransformOutputParser[list[str]]): class ListOutputParser(BaseTransformOutputParser[list[str]]):
"""Parse the output of an LLM call to a list.""" """Parse the output of a model to a list."""
@property @property
def _type(self) -> str: def _type(self) -> str:
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = "" buffer = ""
for chunk in input: for chunk in input:
if isinstance(chunk, BaseMessage): if isinstance(chunk, BaseMessage):
# extract text # Extract text
chunk_content = chunk.content chunk_content = chunk.content
if not isinstance(chunk_content, str): if not isinstance(chunk_content, str):
continue continue
buffer += chunk_content buffer += chunk_content
else: else:
# add current chunk to buffer # Add current chunk to buffer
buffer += chunk buffer += chunk
# parse buffer into a list of parts # Parse buffer into a list of parts
try: try:
done_idx = 0 done_idx = 0
# yield only complete parts # Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1): for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end() done_idx = m.end()
yield [m.group(1)] yield [m.group(1)]
buffer = buffer[done_idx:] buffer = buffer[done_idx:]
except NotImplementedError: except NotImplementedError:
parts = self.parse(buffer) parts = self.parse(buffer)
# yield only complete parts # Yield only complete parts
if len(parts) > 1: if len(parts) > 1:
for part in parts[:-1]: for part in parts[:-1]:
yield [part] yield [part]
buffer = parts[-1] buffer = parts[-1]
# yield the last part # Yield the last part
for part in self.parse(buffer): for part in self.parse(buffer):
yield [part] yield [part]
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = "" buffer = ""
async for chunk in input: async for chunk in input:
if isinstance(chunk, BaseMessage): if isinstance(chunk, BaseMessage):
# extract text # Extract text
chunk_content = chunk.content chunk_content = chunk.content
if not isinstance(chunk_content, str): if not isinstance(chunk_content, str):
continue continue
buffer += chunk_content buffer += chunk_content
else: else:
# add current chunk to buffer # Add current chunk to buffer
buffer += chunk buffer += chunk
# parse buffer into a list of parts # Parse buffer into a list of parts
try: try:
done_idx = 0 done_idx = 0
# yield only complete parts # Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1): for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end() done_idx = m.end()
yield [m.group(1)] yield [m.group(1)]
buffer = buffer[done_idx:] buffer = buffer[done_idx:]
except NotImplementedError: except NotImplementedError:
parts = self.parse(buffer) parts = self.parse(buffer)
# yield only complete parts # Yield only complete parts
if len(parts) > 1: if len(parts) > 1:
for part in parts[:-1]: for part in parts[:-1]:
yield [part] yield [part]
buffer = parts[-1] buffer = parts[-1]
# yield the last part # Yield the last part
for part in self.parse(buffer): for part in self.parse(buffer):
yield [part] yield [part]
class CommaSeparatedListOutputParser(ListOutputParser): class CommaSeparatedListOutputParser(ListOutputParser):
"""Parse the output of an LLM call to a comma-separated list.""" """Parse the output of a model to a comma-separated list."""
@classmethod @classmethod
def is_lc_serializable(cls) -> bool: def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable.""" """Return `True` as this class is serializable."""
return True return True
@classmethod @classmethod
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
) )
return [item for sublist in reader for item in sublist] return [item for sublist in reader for item in sublist]
except csv.Error: except csv.Error:
# keep old logic for backup # Keep old logic for backup
return [part.strip() for part in text.split(",")] return [part.strip() for part in text.split(",")]
@property @property

View File

@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
class StrOutputParser(BaseTransformOutputParser[str]): class StrOutputParser(BaseTransformOutputParser[str]):
"""OutputParser that parses LLMResult into the top likely string.""" """OutputParser that parses `LLMResult` into the top likely string."""
@classmethod @classmethod
def is_lc_serializable(cls) -> bool: def is_lc_serializable(cls) -> bool:
"""StrOutputParser is serializable. """`StrOutputParser` is serializable.
Returns: Returns:
True `True`
""" """
return True return True

View File

@@ -43,19 +43,19 @@ class _StreamingParser:
"""Streaming parser for XML. """Streaming parser for XML.
This implementation is pulled into a class to avoid implementation This implementation is pulled into a class to avoid implementation
drift between transform and atransform of the XMLOutputParser. drift between transform and atransform of the `XMLOutputParser`.
""" """
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None: def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
"""Initialize the streaming parser. """Initialize the streaming parser.
Args: Args:
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'. parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
See documentation in XMLOutputParser for more information. `'xml'`. See documentation in `XMLOutputParser` for more information.
Raises: Raises:
ImportError: If defusedxml is not installed and the defusedxml ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
parser is requested. requested.
""" """
if parser == "defusedxml": if parser == "defusedxml":
if not _HAS_DEFUSEDXML: if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ class _StreamingParser:
"""Parse a chunk of text. """Parse a chunk of text.
Args: Args:
chunk: A chunk of text to parse. This can be a string or a BaseMessage. chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
Yields: Yields:
A dictionary representing the parsed XML element. A `dict` representing the parsed XML element.
Raises: Raises:
xml.etree.ElementTree.ParseError: If the XML is not well-formed. xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,46 +147,49 @@ class _StreamingParser:
class XMLOutputParser(BaseTransformOutputParser): class XMLOutputParser(BaseTransformOutputParser):
"""Parse an output using xml format.""" """Parse an output using xml format.
Returns a dictionary of tags.
"""
tags: list[str] | None = None tags: list[str] | None = None
"""Tags to tell the LLM to expect in the XML output. """Tags to tell the LLM to expect in the XML output.
Note this may not be perfect depending on the LLM implementation. Note this may not be perfect depending on the LLM implementation.
For example, with tags=["foo", "bar", "baz"]: For example, with `tags=["foo", "bar", "baz"]`:
1. A well-formatted XML instance: 1. A well-formatted XML instance:
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>" `"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
2. A badly-formatted XML instance (missing closing tag for 'bar'): 2. A badly-formatted XML instance (missing closing tag for 'bar'):
"<foo>\n <bar>\n </foo>" `"<foo>\n <bar>\n </foo>"`
3. A badly-formatted XML instance (unexpected 'tag' element): 3. A badly-formatted XML instance (unexpected 'tag' element):
"<foo>\n <tag>\n </tag>\n</foo>" `"<foo>\n <tag>\n </tag>\n</foo>"`
""" """
encoding_matcher: re.Pattern = re.compile( encoding_matcher: re.Pattern = re.compile(
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
) )
parser: Literal["defusedxml", "xml"] = "defusedxml" parser: Literal["defusedxml", "xml"] = "defusedxml"
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'. """Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities * `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
present in some distributions of Python's standard library xml. present in some distributions of Python's standard library xml.
`defusedxml` is a wrapper around the standard library parser that `defusedxml` is a wrapper around the standard library parser that
sets up the parser with secure defaults. sets up the parser with secure defaults.
* 'xml' is the standard library parser. * `'xml'` is the standard library parser.
Use `xml` only if you are sure that your distribution of the standard library Use `xml` only if you are sure that your distribution of the standard library is not
is not vulnerable to XML vulnerabilities. vulnerable to XML vulnerabilities.
Please review the following resources for more information: Please review the following resources for more information:
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities * https://docs.python.org/3/library/xml.html#xml-vulnerabilities
* https://github.com/tiran/defusedxml * https://github.com/tiran/defusedxml
The standard library relies on libexpat for parsing XML: The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
https://github.com/libexpat/libexpat for parsing XML.
""" """
def get_format_instructions(self) -> str: def get_format_instructions(self) -> str:
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
text: The output of an LLM call. text: The output of an LLM call.
Returns: Returns:
A dictionary representing the parsed XML. A `dict` representing the parsed XML.
Raises: Raises:
OutputParserException: If the XML is not well-formed. OutputParserException: If the XML is not well-formed.
ImportError: If defusedxml is not installed and the defusedxml ImportError: If defus`edxml is not installed and the `defusedxml` parser is
parser is requested. requested.
""" """
# Try to find XML string within triple backticks # Try to find XML string within triple backticks
# Imports are temporarily placed here to avoid issue with caching on CI # Imports are temporarily placed here to avoid issue with caching on CI

View File

@@ -776,7 +776,6 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
Use to create flexible templated prompts for chat models. Use to create flexible templated prompts for chat models.
Examples:
!!! warning "Behavior changed in 0.2.24" !!! warning "Behavior changed in 0.2.24"
You can pass any Message-like formats supported by You can pass any Message-like formats supported by
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()` `ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
@@ -811,7 +810,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
# ) # )
``` ```
Messages Placeholder: !!! note "Messages Placeholder"
```python ```python
# In addition to Human/AI/Tool/Function messages, # In addition to Human/AI/Tool/Function messages,
@@ -852,13 +851,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
# ) # )
``` ```
Single-variable template: !!! note "Single-variable template"
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"), If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
and you invoke the template with a non-dict object, the prompt template will and you invoke the template with a non-dict object, the prompt template will
inject the provided argument into that variable location. inject the provided argument into that variable location.
```python ```python
from langchain_core.prompts import ChatPromptTemplate from langchain_core.prompts import ChatPromptTemplate

View File

@@ -96,10 +96,10 @@ class RunLogPatch:
"""Patch to the run log.""" """Patch to the run log."""
ops: list[dict[str, Any]] ops: list[dict[str, Any]]
"""List of jsonpatch operations, which describe how to create the run state """List of JSONPatch operations, which describe how to create the run state
from an empty dict. This is the minimal representation of the log, designed to from an empty dict. This is the minimal representation of the log, designed to
be serialized as JSON and sent over the wire to reconstruct the log on the other be serialized as JSON and sent over the wire to reconstruct the log on the other
side. Reconstruction of the state can be done with any jsonpatch-compliant library, side. Reconstruction of the state can be done with any JSONPatch-compliant library,
see https://jsonpatch.com for more information.""" see https://jsonpatch.com for more information."""
def __init__(self, *ops: dict[str, Any]) -> None: def __init__(self, *ops: dict[str, Any]) -> None:

View File

@@ -11,8 +11,7 @@ When developing an application, developers should inspect the capabilities and
permissions of the tools that underlie the given agent toolkit, and determine permissions of the tools that underlie the given agent toolkit, and determine
whether permissions of the given toolkit are appropriate for the application. whether permissions of the given toolkit are appropriate for the application.
See [Security](https://docs.langchain.com/oss/python/security-policy) for more See https://docs.langchain.com/oss/python/security-policy for more information.
information.
""" """
from pathlib import Path from pathlib import Path

View File

@@ -499,12 +499,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
and HuggingFace tokenizer based on the tiktoken_enabled flag. and HuggingFace tokenizer based on the tiktoken_enabled flag.
Args: Args:
texts (List[str]): A list of texts to embed. texts: A list of texts to embed.
engine (str): The engine or model to use for embeddings. engine: The engine or model to use for embeddings.
chunk_size (int | None): The size of chunks for processing embeddings. chunk_size: The size of chunks for processing embeddings.
Returns: Returns:
List[List[float]]: A list of embeddings for each input text. A list of embeddings for each input text.
""" """
_chunk_size = chunk_size or self.chunk_size _chunk_size = chunk_size or self.chunk_size
client_kwargs = {**self._invocation_params, **kwargs} client_kwargs = {**self._invocation_params, **kwargs}
@@ -551,12 +551,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
`tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag. `tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.
Args: Args:
texts (List[str]): A list of texts to embed. texts: A list of texts to embed.
engine (str): The engine or model to use for embeddings. engine: The engine or model to use for embeddings.
chunk_size (int | None): The size of chunks for processing embeddings. chunk_size: The size of chunks for processing embeddings.
Returns: Returns:
List[List[float]]: A list of embeddings for each input text. A list of embeddings for each input text.
""" """
_chunk_size = chunk_size or self.chunk_size _chunk_size = chunk_size or self.chunk_size
client_kwargs = {**self._invocation_params, **kwargs} client_kwargs = {**self._invocation_params, **kwargs}