style(core): more cleanup all around (#33711)

2026-06-09 10:17:00 +00:00 · 2025-10-28 22:58:19 -04:00
parent e5e1d6c705
commit a2a9a02ecb
12 changed files with 121 additions and 95 deletions
--- a/libs/core/langchain_core/messages/base.py
+++ b/libs/core/langchain_core/messages/base.py
@@ -93,6 +93,10 @@ class BaseMessage(Serializable):
    """Base abstract message class.

    Messages are the inputs and outputs of a chat model.
+
+    Examples include [`HumanMessage`][langchain.messages.HumanMessage],
+    [`AIMessage`][langchain.messages.AIMessage], and
+    [`SystemMessage`][langchain.messages.SystemMessage].
    """

    content: str | list[str | dict]
--- a/libs/core/langchain_core/output_parsers/init.py
+++ b/libs/core/langchain_core/output_parsers/init.py
@@ -1,4 +1,20 @@
-"""**OutputParser** classes parse the output of an LLM call."""
+"""`OutputParser` classes parse the output of an LLM call into structured data.
+
+!!! tip "Structured output"
+
+    Output parsers emerged as an early solution to the challenge of obtaining structured
+    output from LLMs.
+
+    Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
+    natively. In such cases, using output parsers may be unnecessary, and you should
+    leverage the model's built-in capabilities for structured output. Refer to the
+    [documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
+    for guidance on how to achieve structured output directly.
+
+    Output parsers remain valuable when working with models that do not support
+    structured output natively, or when you require additional processing or validation
+    of the model's output beyond its inherent capabilities.
+"""

 from typing import TYPE_CHECKING

--- a/libs/core/langchain_core/output_parsers/base.py
+++ b/libs/core/langchain_core/output_parsers/base.py
@@ -135,6 +135,9 @@ class BaseOutputParser(

    Example:
        ```python
+        # Implement a simple boolean output parser
+
+
        class BooleanOutputParser(BaseOutputParser[bool]):
            true_val: str = "YES"
            false_val: str = "NO"
--- a/libs/core/langchain_core/output_parsers/json.py
+++ b/libs/core/langchain_core/output_parsers/json.py
@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
 class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
    """Parse the output of an LLM call to a JSON object.

+    Probably the most reliable output parser for getting structured data that does *not*
+    use function calling.
+
    When used in streaming mode, it will yield partial JSON objects containing
    all the keys that have been returned so far.

-    In streaming, if `diff` is set to `True`, yields JSONPatch operations
-    describing the difference between the previous and the current object.
+    In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
+    difference between the previous and the current object.
    """

    pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None  # type: ignore[valid-type]
--- a/libs/core/langchain_core/output_parsers/list.py
+++ b/libs/core/langchain_core/output_parsers/list.py
@@ -41,7 +41,7 @@ def droplastn(


 class ListOutputParser(BaseTransformOutputParser[list[str]]):
-    """Parse the output of an LLM call to a list."""
+    """Parse the output of a model to a list."""

    @property
    def _type(self) -> str:
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
        buffer = ""
        for chunk in input:
            if isinstance(chunk, BaseMessage):
-                # extract text
+                # Extract text
                chunk_content = chunk.content
                if not isinstance(chunk_content, str):
                    continue
                buffer += chunk_content
            else:
-                # add current chunk to buffer
+                # Add current chunk to buffer
                buffer += chunk
-            # parse buffer into a list of parts
+            # Parse buffer into a list of parts
            try:
                done_idx = 0
-                # yield only complete parts
+                # Yield only complete parts
                for m in droplastn(self.parse_iter(buffer), 1):
                    done_idx = m.end()
                    yield [m.group(1)]
                buffer = buffer[done_idx:]
            except NotImplementedError:
                parts = self.parse(buffer)
-                # yield only complete parts
+                # Yield only complete parts
                if len(parts) > 1:
                    for part in parts[:-1]:
                        yield [part]
                    buffer = parts[-1]
-        # yield the last part
+        # Yield the last part
        for part in self.parse(buffer):
            yield [part]

@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
        buffer = ""
        async for chunk in input:
            if isinstance(chunk, BaseMessage):
-                # extract text
+                # Extract text
                chunk_content = chunk.content
                if not isinstance(chunk_content, str):
                    continue
                buffer += chunk_content
            else:
-                # add current chunk to buffer
+                # Add current chunk to buffer
                buffer += chunk
-            # parse buffer into a list of parts
+            # Parse buffer into a list of parts
            try:
                done_idx = 0
-                # yield only complete parts
+                # Yield only complete parts
                for m in droplastn(self.parse_iter(buffer), 1):
                    done_idx = m.end()
                    yield [m.group(1)]
                buffer = buffer[done_idx:]
            except NotImplementedError:
                parts = self.parse(buffer)
-                # yield only complete parts
+                # Yield only complete parts
                if len(parts) > 1:
                    for part in parts[:-1]:
                        yield [part]
                    buffer = parts[-1]
-        # yield the last part
+        # Yield the last part
        for part in self.parse(buffer):
            yield [part]


 class CommaSeparatedListOutputParser(ListOutputParser):
-    """Parse the output of an LLM call to a comma-separated list."""
+    """Parse the output of a model to a comma-separated list."""

    @classmethod
    def is_lc_serializable(cls) -> bool:
-        """Return True as this class is serializable."""
+        """Return `True` as this class is serializable."""
        return True

    @classmethod
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
            )
            return [item for sublist in reader for item in sublist]
        except csv.Error:
-            # keep old logic for backup
+            # Keep old logic for backup
            return [part.strip() for part in text.split(",")]

    @property
--- a/libs/core/langchain_core/output_parsers/openai_tools.py
+++ b/libs/core/langchain_core/output_parsers/openai_tools.py
@@ -224,7 +224,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
            result: The result of the LLM call.
            partial: Whether to parse partial JSON.
                If `True`, the output will be a JSON object containing
-                all the keys that have been returned so far.
+                    all the keys that have been returned so far.
                If `False`, the output will be the full JSON object.

        Raises:
@@ -307,7 +307,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
            result: The result of the LLM call.
            partial: Whether to parse partial JSON.
                If `True`, the output will be a JSON object containing
-                all the keys that have been returned so far.
+                    all the keys that have been returned so far.
                If `False`, the output will be the full JSON object.

        Returns:
--- a/libs/core/langchain_core/output_parsers/string.py
+++ b/libs/core/langchain_core/output_parsers/string.py
@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser


 class StrOutputParser(BaseTransformOutputParser[str]):
-    """OutputParser that parses LLMResult into the top likely string."""
+    """OutputParser that parses `LLMResult` into the top likely string."""

    @classmethod
    def is_lc_serializable(cls) -> bool:
-        """StrOutputParser is serializable.
+        """`StrOutputParser` is serializable.

        Returns:
-            True
+            `True`
        """
        return True

--- a/libs/core/langchain_core/output_parsers/xml.py
+++ b/libs/core/langchain_core/output_parsers/xml.py
@@ -43,19 +43,19 @@ class _StreamingParser:
    """Streaming parser for XML.

    This implementation is pulled into a class to avoid implementation
-    drift between transform and atransform of the XMLOutputParser.
+    drift between transform and atransform of the `XMLOutputParser`.
    """

    def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
        """Initialize the streaming parser.

        Args:
-            parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
-              See documentation in XMLOutputParser for more information.
+            parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
+                `'xml'`. See documentation in `XMLOutputParser` for more information.

        Raises:
-            ImportError: If defusedxml is not installed and the defusedxml
-                parser is requested.
+            ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
+                requested.
        """
        if parser == "defusedxml":
            if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ class _StreamingParser:
        """Parse a chunk of text.

        Args:
-            chunk: A chunk of text to parse. This can be a string or a BaseMessage.
+            chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.

        Yields:
-            A dictionary representing the parsed XML element.
+            A `dict` representing the parsed XML element.

        Raises:
            xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,46 +147,49 @@ class _StreamingParser:


 class XMLOutputParser(BaseTransformOutputParser):
-    """Parse an output using xml format."""
+    """Parse an output using xml format.
+
+    Returns a dictionary of tags.
+    """

    tags: list[str] | None = None
    """Tags to tell the LLM to expect in the XML output.

    Note this may not be perfect depending on the LLM implementation.

-    For example, with tags=["foo", "bar", "baz"]:
+    For example, with `tags=["foo", "bar", "baz"]`:

    1. A well-formatted XML instance:
-       "<foo>\n   <bar>\n      <baz></baz>\n   </bar>\n</foo>"
+        `"<foo>\n   <bar>\n      <baz></baz>\n   </bar>\n</foo>"`

    2. A badly-formatted XML instance (missing closing tag for 'bar'):
-       "<foo>\n   <bar>\n   </foo>"
+        `"<foo>\n   <bar>\n   </foo>"`

    3. A badly-formatted XML instance (unexpected 'tag' element):
-       "<foo>\n   <tag>\n   </tag>\n</foo>"
+        `"<foo>\n   <tag>\n   </tag>\n</foo>"`
    """
    encoding_matcher: re.Pattern = re.compile(
        r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
    )
    parser: Literal["defusedxml", "xml"] = "defusedxml"
-    """Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
+    """Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.

-    * 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
-       present in some distributions of Python's standard library xml.
-       `defusedxml` is a wrapper around the standard library parser that
-       sets up the parser with secure defaults.
-    * 'xml' is the standard library parser.
+    * `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
+        present in some distributions of Python's standard library xml.
+        `defusedxml` is a wrapper around the standard library parser that
+        sets up the parser with secure defaults.
+    * `'xml'` is the standard library parser.

-    Use `xml` only if you are sure that your distribution of the standard library
-    is not vulnerable to XML vulnerabilities.
+    Use `xml` only if you are sure that your distribution of the standard library is not
+    vulnerable to XML vulnerabilities.

    Please review the following resources for more information:

    * https://docs.python.org/3/library/xml.html#xml-vulnerabilities
    * https://github.com/tiran/defusedxml

-    The standard library relies on libexpat for parsing XML:
-    https://github.com/libexpat/libexpat
+    The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
+    for parsing XML.
    """

    def get_format_instructions(self) -> str:
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
            text: The output of an LLM call.

        Returns:
-            A dictionary representing the parsed XML.
+            A `dict` representing the parsed XML.

        Raises:
            OutputParserException: If the XML is not well-formed.
-            ImportError: If defusedxml is not installed and the defusedxml
-                parser is requested.
+            ImportError: If defus`edxml is not installed and the `defusedxml` parser is
+                requested.
        """
        # Try to find XML string within triple backticks
        # Imports are temporarily placed here to avoid issue with caching on CI
--- a/libs/core/langchain_core/prompts/chat.py
+++ b/libs/core/langchain_core/prompts/chat.py
@@ -776,42 +776,41 @@ class ChatPromptTemplate(BaseChatPromptTemplate):

    Use to create flexible templated prompts for chat models.

-    Examples:
-        !!! warning "Behavior changed in 0.2.24"
-            You can pass any Message-like formats supported by
-            `ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
-            init.
+    !!! warning "Behavior changed in 0.2.24"
+        You can pass any Message-like formats supported by
+        `ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
+        init.

-        ```python
-        from langchain_core.prompts import ChatPromptTemplate
+    ```python
+    from langchain_core.prompts import ChatPromptTemplate

-        template = ChatPromptTemplate(
-            [
-                ("system", "You are a helpful AI bot. Your name is {name}."),
-                ("human", "Hello, how are you doing?"),
-                ("ai", "I'm doing well, thanks!"),
-                ("human", "{user_input}"),
-            ]
-        )
+    template = ChatPromptTemplate(
+        [
+            ("system", "You are a helpful AI bot. Your name is {name}."),
+            ("human", "Hello, how are you doing?"),
+            ("ai", "I'm doing well, thanks!"),
+            ("human", "{user_input}"),
+        ]
+    )

-        prompt_value = template.invoke(
-            {
-                "name": "Bob",
-                "user_input": "What is your name?",
-            }
-        )
-        # Output:
-        # ChatPromptValue(
-        #    messages=[
-        #        SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
-        #        HumanMessage(content='Hello, how are you doing?'),
-        #        AIMessage(content="I'm doing well, thanks!"),
-        #        HumanMessage(content='What is your name?')
-        #    ]
-        # )
-        ```
+    prompt_value = template.invoke(
+        {
+            "name": "Bob",
+            "user_input": "What is your name?",
+        }
+    )
+    # Output:
+    # ChatPromptValue(
+    #    messages=[
+    #        SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
+    #        HumanMessage(content='Hello, how are you doing?'),
+    #        AIMessage(content="I'm doing well, thanks!"),
+    #        HumanMessage(content='What is your name?')
+    #    ]
+    # )
+    ```

-    Messages Placeholder:
+    !!! note "Messages Placeholder"

        ```python
        # In addition to Human/AI/Tool/Function messages,
@@ -852,13 +851,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
        # )
        ```

-    Single-variable template:
+    !!! note "Single-variable template"

        If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
        and you invoke the template with a non-dict object, the prompt template will
        inject the provided argument into that variable location.

-
        ```python
        from langchain_core.prompts import ChatPromptTemplate

--- a/libs/core/langchain_core/tracers/log_stream.py
+++ b/libs/core/langchain_core/tracers/log_stream.py
@@ -96,10 +96,10 @@ class RunLogPatch:
    """Patch to the run log."""

    ops: list[dict[str, Any]]
-    """List of jsonpatch operations, which describe how to create the run state
+    """List of JSONPatch operations, which describe how to create the run state
    from an empty dict. This is the minimal representation of the log, designed to
    be serialized as JSON and sent over the wire to reconstruct the log on the other
-    side. Reconstruction of the state can be done with any jsonpatch-compliant library,
+    side. Reconstruction of the state can be done with any JSONPatch-compliant library,
    see https://jsonpatch.com for more information."""

    def __init__(self, *ops: dict[str, Any]) -> None:
--- a/libs/langchain/langchain_classic/agents/agent_toolkits/init.py
+++ b/libs/langchain/langchain_classic/agents/agent_toolkits/init.py
@@ -11,8 +11,7 @@ When developing an application, developers should inspect the capabilities and
 permissions of the tools that underlie the given agent toolkit, and determine
 whether permissions of the given toolkit are appropriate for the application.

-See [Security](https://docs.langchain.com/oss/python/security-policy) for more
-information.
+See https://docs.langchain.com/oss/python/security-policy for more information.
 """

 from pathlib import Path
--- a/libs/partners/openai/langchain_openai/embeddings/base.py
+++ b/libs/partners/openai/langchain_openai/embeddings/base.py
@@ -499,12 +499,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
        and HuggingFace tokenizer based on the tiktoken_enabled flag.

        Args:
-            texts (List[str]): A list of texts to embed.
-            engine (str): The engine or model to use for embeddings.
-            chunk_size (int | None): The size of chunks for processing embeddings.
+            texts: A list of texts to embed.
+            engine: The engine or model to use for embeddings.
+            chunk_size: The size of chunks for processing embeddings.

        Returns:
-            List[List[float]]: A list of embeddings for each input text.
+            A list of embeddings for each input text.
        """
        _chunk_size = chunk_size or self.chunk_size
        client_kwargs = {**self._invocation_params, **kwargs}
@@ -551,12 +551,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
        `tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.

        Args:
-            texts (List[str]): A list of texts to embed.
-            engine (str): The engine or model to use for embeddings.
-            chunk_size (int | None): The size of chunks for processing embeddings.
+            texts: A list of texts to embed.
+            engine: The engine or model to use for embeddings.
+            chunk_size: The size of chunks for processing embeddings.

        Returns:
-            List[List[float]]: A list of embeddings for each input text.
+            A list of embeddings for each input text.
        """
        _chunk_size = chunk_size or self.chunk_size
        client_kwargs = {**self._invocation_params, **kwargs}