style(core): more cleanup all around (#33711)

This commit is contained in:
Mason Daugherty
2025-10-28 22:58:19 -04:00
committed by GitHub
parent e5e1d6c705
commit a2a9a02ecb
12 changed files with 121 additions and 95 deletions

View File

@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
"""Base abstract message class.
Messages are the inputs and outputs of a chat model.
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
[`AIMessage`][langchain.messages.AIMessage], and
[`SystemMessage`][langchain.messages.SystemMessage].
"""
content: str | list[str | dict]

View File

@@ -1,4 +1,20 @@
"""**OutputParser** classes parse the output of an LLM call."""
"""`OutputParser` classes parse the output of an LLM call into structured data.
!!! tip "Structured output"
Output parsers emerged as an early solution to the challenge of obtaining structured
output from LLMs.
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
natively. In such cases, using output parsers may be unnecessary, and you should
leverage the model's built-in capabilities for structured output. Refer to the
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
for guidance on how to achieve structured output directly.
Output parsers remain valuable when working with models that do not support
structured output natively, or when you require additional processing or validation
of the model's output beyond its inherent capabilities.
"""
from typing import TYPE_CHECKING

View File

@@ -135,6 +135,9 @@ class BaseOutputParser(
Example:
```python
# Implement a simple boolean output parser
class BooleanOutputParser(BaseOutputParser[bool]):
true_val: str = "YES"
false_val: str = "NO"

View File

@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
"""Parse the output of an LLM call to a JSON object.
Probably the most reliable output parser for getting structured data that does *not*
use function calling.
When used in streaming mode, it will yield partial JSON objects containing
all the keys that have been returned so far.
In streaming, if `diff` is set to `True`, yields JSONPatch operations
describing the difference between the previous and the current object.
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
difference between the previous and the current object.
"""
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]

View File

@@ -41,7 +41,7 @@ def droplastn(
class ListOutputParser(BaseTransformOutputParser[list[str]]):
"""Parse the output of an LLM call to a list."""
"""Parse the output of a model to a list."""
@property
def _type(self) -> str:
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
for chunk in input:
if isinstance(chunk, BaseMessage):
# extract text
# Extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
# add current chunk to buffer
# Add current chunk to buffer
buffer += chunk
# parse buffer into a list of parts
# Parse buffer into a list of parts
try:
done_idx = 0
# yield only complete parts
# Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
# yield only complete parts
# Yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
# yield the last part
# Yield the last part
for part in self.parse(buffer):
yield [part]
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
async for chunk in input:
if isinstance(chunk, BaseMessage):
# extract text
# Extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
# add current chunk to buffer
# Add current chunk to buffer
buffer += chunk
# parse buffer into a list of parts
# Parse buffer into a list of parts
try:
done_idx = 0
# yield only complete parts
# Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
# yield only complete parts
# Yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
# yield the last part
# Yield the last part
for part in self.parse(buffer):
yield [part]
class CommaSeparatedListOutputParser(ListOutputParser):
"""Parse the output of an LLM call to a comma-separated list."""
"""Parse the output of a model to a comma-separated list."""
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
)
return [item for sublist in reader for item in sublist]
except csv.Error:
# keep old logic for backup
# Keep old logic for backup
return [part.strip() for part in text.split(",")]
@property

View File

@@ -224,7 +224,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
all the keys that have been returned so far.
all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Raises:
@@ -307,7 +307,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
all the keys that have been returned so far.
all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Returns:

View File

@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
class StrOutputParser(BaseTransformOutputParser[str]):
"""OutputParser that parses LLMResult into the top likely string."""
"""OutputParser that parses `LLMResult` into the top likely string."""
@classmethod
def is_lc_serializable(cls) -> bool:
"""StrOutputParser is serializable.
"""`StrOutputParser` is serializable.
Returns:
True
`True`
"""
return True

View File

@@ -43,19 +43,19 @@ class _StreamingParser:
"""Streaming parser for XML.
This implementation is pulled into a class to avoid implementation
drift between transform and atransform of the XMLOutputParser.
drift between transform and atransform of the `XMLOutputParser`.
"""
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
"""Initialize the streaming parser.
Args:
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
See documentation in XMLOutputParser for more information.
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
`'xml'`. See documentation in `XMLOutputParser` for more information.
Raises:
ImportError: If defusedxml is not installed and the defusedxml
parser is requested.
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
requested.
"""
if parser == "defusedxml":
if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ class _StreamingParser:
"""Parse a chunk of text.
Args:
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
Yields:
A dictionary representing the parsed XML element.
A `dict` representing the parsed XML element.
Raises:
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,46 +147,49 @@ class _StreamingParser:
class XMLOutputParser(BaseTransformOutputParser):
"""Parse an output using xml format."""
"""Parse an output using xml format.
Returns a dictionary of tags.
"""
tags: list[str] | None = None
"""Tags to tell the LLM to expect in the XML output.
Note this may not be perfect depending on the LLM implementation.
For example, with tags=["foo", "bar", "baz"]:
For example, with `tags=["foo", "bar", "baz"]`:
1. A well-formatted XML instance:
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
2. A badly-formatted XML instance (missing closing tag for 'bar'):
"<foo>\n <bar>\n </foo>"
`"<foo>\n <bar>\n </foo>"`
3. A badly-formatted XML instance (unexpected 'tag' element):
"<foo>\n <tag>\n </tag>\n</foo>"
`"<foo>\n <tag>\n </tag>\n</foo>"`
"""
encoding_matcher: re.Pattern = re.compile(
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
)
parser: Literal["defusedxml", "xml"] = "defusedxml"
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
present in some distributions of Python's standard library xml.
`defusedxml` is a wrapper around the standard library parser that
sets up the parser with secure defaults.
* 'xml' is the standard library parser.
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
present in some distributions of Python's standard library xml.
`defusedxml` is a wrapper around the standard library parser that
sets up the parser with secure defaults.
* `'xml'` is the standard library parser.
Use `xml` only if you are sure that your distribution of the standard library
is not vulnerable to XML vulnerabilities.
Use `xml` only if you are sure that your distribution of the standard library is not
vulnerable to XML vulnerabilities.
Please review the following resources for more information:
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
* https://github.com/tiran/defusedxml
The standard library relies on libexpat for parsing XML:
https://github.com/libexpat/libexpat
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
for parsing XML.
"""
def get_format_instructions(self) -> str:
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
text: The output of an LLM call.
Returns:
A dictionary representing the parsed XML.
A `dict` representing the parsed XML.
Raises:
OutputParserException: If the XML is not well-formed.
ImportError: If defusedxml is not installed and the defusedxml
parser is requested.
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
requested.
"""
# Try to find XML string within triple backticks
# Imports are temporarily placed here to avoid issue with caching on CI

View File

@@ -776,42 +776,41 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
Use to create flexible templated prompts for chat models.
Examples:
!!! warning "Behavior changed in 0.2.24"
You can pass any Message-like formats supported by
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
init.
!!! warning "Behavior changed in 0.2.24"
You can pass any Message-like formats supported by
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
init.
```python
from langchain_core.prompts import ChatPromptTemplate
```python
from langchain_core.prompts import ChatPromptTemplate
template = ChatPromptTemplate(
[
("system", "You are a helpful AI bot. Your name is {name}."),
("human", "Hello, how are you doing?"),
("ai", "I'm doing well, thanks!"),
("human", "{user_input}"),
]
)
template = ChatPromptTemplate(
[
("system", "You are a helpful AI bot. Your name is {name}."),
("human", "Hello, how are you doing?"),
("ai", "I'm doing well, thanks!"),
("human", "{user_input}"),
]
)
prompt_value = template.invoke(
{
"name": "Bob",
"user_input": "What is your name?",
}
)
# Output:
# ChatPromptValue(
# messages=[
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
# HumanMessage(content='Hello, how are you doing?'),
# AIMessage(content="I'm doing well, thanks!"),
# HumanMessage(content='What is your name?')
# ]
# )
```
prompt_value = template.invoke(
{
"name": "Bob",
"user_input": "What is your name?",
}
)
# Output:
# ChatPromptValue(
# messages=[
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
# HumanMessage(content='Hello, how are you doing?'),
# AIMessage(content="I'm doing well, thanks!"),
# HumanMessage(content='What is your name?')
# ]
# )
```
Messages Placeholder:
!!! note "Messages Placeholder"
```python
# In addition to Human/AI/Tool/Function messages,
@@ -852,13 +851,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
# )
```
Single-variable template:
!!! note "Single-variable template"
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
and you invoke the template with a non-dict object, the prompt template will
inject the provided argument into that variable location.
```python
from langchain_core.prompts import ChatPromptTemplate

View File

@@ -96,10 +96,10 @@ class RunLogPatch:
"""Patch to the run log."""
ops: list[dict[str, Any]]
"""List of jsonpatch operations, which describe how to create the run state
"""List of JSONPatch operations, which describe how to create the run state
from an empty dict. This is the minimal representation of the log, designed to
be serialized as JSON and sent over the wire to reconstruct the log on the other
side. Reconstruction of the state can be done with any jsonpatch-compliant library,
side. Reconstruction of the state can be done with any JSONPatch-compliant library,
see https://jsonpatch.com for more information."""
def __init__(self, *ops: dict[str, Any]) -> None:

View File

@@ -11,8 +11,7 @@ When developing an application, developers should inspect the capabilities and
permissions of the tools that underlie the given agent toolkit, and determine
whether permissions of the given toolkit are appropriate for the application.
See [Security](https://docs.langchain.com/oss/python/security-policy) for more
information.
See https://docs.langchain.com/oss/python/security-policy for more information.
"""
from pathlib import Path

View File

@@ -499,12 +499,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
and HuggingFace tokenizer based on the tiktoken_enabled flag.
Args:
texts (List[str]): A list of texts to embed.
engine (str): The engine or model to use for embeddings.
chunk_size (int | None): The size of chunks for processing embeddings.
texts: A list of texts to embed.
engine: The engine or model to use for embeddings.
chunk_size: The size of chunks for processing embeddings.
Returns:
List[List[float]]: A list of embeddings for each input text.
A list of embeddings for each input text.
"""
_chunk_size = chunk_size or self.chunk_size
client_kwargs = {**self._invocation_params, **kwargs}
@@ -551,12 +551,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
`tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.
Args:
texts (List[str]): A list of texts to embed.
engine (str): The engine or model to use for embeddings.
chunk_size (int | None): The size of chunks for processing embeddings.
texts: A list of texts to embed.
engine: The engine or model to use for embeddings.
chunk_size: The size of chunks for processing embeddings.
Returns:
List[List[float]]: A list of embeddings for each input text.
A list of embeddings for each input text.
"""
_chunk_size = chunk_size or self.chunk_size
client_kwargs = {**self._invocation_params, **kwargs}