Docs: Add custom parsing documentation and extending langchain (#18331)

* Added extending langchain.mdx -- we'll need to add links as we add more custom documentation * Added partial documentation about parsers
2025-09-26 05:48:40 +00:00 · 2024-03-07 16:30:57 -05:00
parent 8c71f92cb2
commit ca299a8e08
4 changed files with 692 additions and 3 deletions
--- a/libs/core/langchain_core/output_parsers/base.py
+++ b/libs/core/langchain_core/output_parsers/base.py
@@ -141,9 +141,9 @@ class BaseOutputParser(
                        )
                    return cleaned_text == self.true_val.upper()

-                    @property
-                    def _type(self) -> str:
-                            return "boolean_output_parser"
+                @property
+                def _type(self) -> str:
+                    return "boolean_output_parser"
    """  # noqa: E501

    @property
--- a/libs/core/tests/unit_tests/output_parsers/test_base_parsers.py
+++ b/libs/core/tests/unit_tests/output_parsers/test_base_parsers.py
@@ -0,0 +1,94 @@
+"""Module to test base parser implementations."""
+from typing import List
+
+from langchain_core.exceptions import OutputParserException
+from langchain_core.messages import AIMessage
+from langchain_core.output_parsers import (
+    BaseGenerationOutputParser,
+    BaseTransformOutputParser,
+)
+from langchain_core.outputs import ChatGeneration, Generation
+from tests.unit_tests.fake.chat_model import GenericFakeChatModel
+
+
+def test_base_generation_parser() -> None:
+    """Test Base Generation Output Parser."""
+
+    class StrInvertCase(BaseGenerationOutputParser[str]):
+        """An example parser that inverts the case of the characters in the message."""
+
+        def parse_result(
+            self, result: List[Generation], *, partial: bool = False
+        ) -> str:
+            """Parse a list of model Generations into a specific format.
+
+            Args:
+                result: A list of Generations to be parsed. The Generations are assumed
+                    to be different candidate outputs for a single model input.
+                    Many parsers assume that only a single generation is passed it in.
+                    We will assert for that
+                partial: Whether to allow partial results. This is used for parsers
+                         that support streaming
+            """
+            if len(result) != 1:
+                raise NotImplementedError(
+                    "This output parser can only be used with a single generation."
+                )
+            generation = result[0]
+            if not isinstance(generation, ChatGeneration):
+                # Say that this one only works with chat generations
+                raise OutputParserException(
+                    "This output parser can only be used with a chat generation."
+                )
+
+            content = generation.message.content
+            assert isinstance(content, str)
+            return content.swapcase()  # type: ignore
+
+    model = GenericFakeChatModel(messages=iter([AIMessage(content="hEllo")]))
+    chain = model | StrInvertCase()
+    assert chain.invoke("") == "HeLLO"
+
+
+def test_base_transform_output_parser() -> None:
+    """Test base transform output parser."""
+
+    class StrInvertCase(BaseTransformOutputParser[str]):
+        """An example parser that inverts the case of the characters in the message."""
+
+        def parse(self, text: str) -> str:
+            """Parse a single string into a specific format."""
+            raise NotImplementedError()
+
+        def parse_result(
+            self, result: List[Generation], *, partial: bool = False
+        ) -> str:
+            """Parse a list of model Generations into a specific format.
+
+            Args:
+                result: A list of Generations to be parsed. The Generations are assumed
+                    to be different candidate outputs for a single model input.
+                    Many parsers assume that only a single generation is passed it in.
+                    We will assert for that
+                partial: Whether to allow partial results. This is used for parsers
+                         that support streaming
+            """
+            if len(result) != 1:
+                raise NotImplementedError(
+                    "This output parser can only be used with a single generation."
+                )
+            generation = result[0]
+            if not isinstance(generation, ChatGeneration):
+                # Say that this one only works with chat generations
+                raise OutputParserException(
+                    "This output parser can only be used with a chat generation."
+                )
+            content = generation.message.content
+            assert isinstance(content, str)
+            return content.swapcase()  # type: ignore
+
+    model = GenericFakeChatModel(messages=iter([AIMessage(content="hello world")]))
+    chain = model | StrInvertCase()
+    # inputs to models are ignored, response is hard-coded in model definition
+    chunks = [chunk for chunk in chain.stream("")]
+    assert chunks == ["HELLO", " ", "WORLD"]