REFACTOR: Refactor langchain_core (#13627)

Changes: - remove langchain_core/schema since no clear distinction b/n schema and non-schema modules - make every module that doesn't end in -y plural - where easy have 1-2 classes per file - no more than one level of nesting in directories - only import from top level core modules in langchain
2025-09-14 22:17:15 +00:00 · 2023-11-21 08:35:29 -08:00
parent 17c6551c18
commit d32e511826
783 changed files with 2992 additions and 2899 deletions
--- a/libs/core/langchain_core/outputs/init.py
+++ b/libs/core/langchain_core/outputs/init.py
@@ -0,0 +1,15 @@
+from langchain_core.outputs.chat_generation import ChatGeneration, ChatGenerationChunk
+from langchain_core.outputs.chat_result import ChatResult
+from langchain_core.outputs.generation import Generation, GenerationChunk
+from langchain_core.outputs.llm_result import LLMResult
+from langchain_core.outputs.run_info import RunInfo
+
+__all__ = [
+    "ChatGeneration",
+    "ChatGenerationChunk",
+    "ChatResult",
+    "Generation",
+    "GenerationChunk",
+    "LLMResult",
+    "RunInfo",
+]
--- a/libs/core/langchain_core/outputs/chat_generation.py
+++ b/libs/core/langchain_core/outputs/chat_generation.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+from typing import Any, Dict, Literal
+
+from langchain_core.messages import BaseMessage, BaseMessageChunk
+from langchain_core.outputs.generation import Generation
+from langchain_core.pydantic_v1 import root_validator
+
+
+class ChatGeneration(Generation):
+    """A single chat generation output."""
+
+    text: str = ""
+    """*SHOULD NOT BE SET DIRECTLY* The text contents of the output message."""
+    message: BaseMessage
+    """The message output by the chat model."""
+    # Override type to be ChatGeneration, ignore mypy error as this is intentional
+    type: Literal["ChatGeneration"] = "ChatGeneration"  # type: ignore[assignment]
+    """Type is used exclusively for serialization purposes."""
+
+    @root_validator
+    def set_text(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Set the text attribute to be the contents of the message."""
+        try:
+            values["text"] = values["message"].content
+        except (KeyError, AttributeError) as e:
+            raise ValueError("Error while initializing ChatGeneration") from e
+        return values
+
+
+class ChatGenerationChunk(ChatGeneration):
+    """A ChatGeneration chunk, which can be concatenated with other
+      ChatGeneration chunks.
+
+    Attributes:
+        message: The message chunk output by the chat model.
+    """
+
+    message: BaseMessageChunk
+    # Override type to be ChatGeneration, ignore mypy error as this is intentional
+    type: Literal["ChatGenerationChunk"] = "ChatGenerationChunk"  # type: ignore[assignment] # noqa: E501
+    """Type is used exclusively for serialization purposes."""
+
+    def __add__(self, other: ChatGenerationChunk) -> ChatGenerationChunk:
+        if isinstance(other, ChatGenerationChunk):
+            generation_info = (
+                {**(self.generation_info or {}), **(other.generation_info or {})}
+                if self.generation_info is not None or other.generation_info is not None
+                else None
+            )
+            return ChatGenerationChunk(
+                message=self.message + other.message,
+                generation_info=generation_info,
+            )
+        else:
+            raise TypeError(
+                f"unsupported operand type(s) for +: '{type(self)}' and '{type(other)}'"
+            )
--- a/libs/core/langchain_core/outputs/chat_result.py
+++ b/libs/core/langchain_core/outputs/chat_result.py
@@ -0,0 +1,15 @@
+from typing import List, Optional
+
+from langchain_core.outputs.chat_generation import ChatGeneration
+from langchain_core.pydantic_v1 import BaseModel
+
+
+class ChatResult(BaseModel):
+    """Class that contains all results for a single chat model call."""
+
+    generations: List[ChatGeneration]
+    """List of the chat generations. This is a List because an input can have multiple 
+        candidate generations.
+    """
+    llm_output: Optional[dict] = None
+    """For arbitrary LLM provider specific output."""
--- a/libs/core/langchain_core/outputs/generation.py
+++ b/libs/core/langchain_core/outputs/generation.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from typing import Any, Dict, Literal, Optional
+
+from langchain_core.load import Serializable
+
+
+class Generation(Serializable):
+    """A single text generation output."""
+
+    text: str
+    """Generated text output."""
+
+    generation_info: Optional[Dict[str, Any]] = None
+    """Raw response from the provider. May include things like the 
+        reason for finishing or token log probabilities.
+    """
+    type: Literal["Generation"] = "Generation"
+    """Type is used exclusively for serialization purposes."""
+    # TODO: add log probs as separate attribute
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether this class is serializable."""
+        return True
+
+
+class GenerationChunk(Generation):
+    """A Generation chunk, which can be concatenated with other Generation chunks."""
+
+    def __add__(self, other: GenerationChunk) -> GenerationChunk:
+        if isinstance(other, GenerationChunk):
+            generation_info = (
+                {**(self.generation_info or {}), **(other.generation_info or {})}
+                if self.generation_info is not None or other.generation_info is not None
+                else None
+            )
+            return GenerationChunk(
+                text=self.text + other.text,
+                generation_info=generation_info,
+            )
+        else:
+            raise TypeError(
+                f"unsupported operand type(s) for +: '{type(self)}' and '{type(other)}'"
+            )
--- a/libs/core/langchain_core/outputs/llm_result.py
+++ b/libs/core/langchain_core/outputs/llm_result.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+from copy import deepcopy
+from typing import List, Optional
+
+from langchain_core.outputs.generation import Generation
+from langchain_core.outputs.run_info import RunInfo
+from langchain_core.pydantic_v1 import BaseModel
+
+
+class LLMResult(BaseModel):
+    """Class that contains all results for a batched LLM call."""
+
+    generations: List[List[Generation]]
+    """List of generated outputs. This is a List[List[]] because
+    each input could have multiple candidate generations."""
+    llm_output: Optional[dict] = None
+    """Arbitrary LLM provider-specific output."""
+    run: Optional[List[RunInfo]] = None
+    """List of metadata info for model call for each input."""
+
+    def flatten(self) -> List[LLMResult]:
+        """Flatten generations into a single list.
+
+        Unpack List[List[Generation]] -> List[LLMResult] where each returned LLMResult
+            contains only a single Generation. If token usage information is available,
+            it is kept only for the LLMResult corresponding to the top-choice
+            Generation, to avoid over-counting of token usage downstream.
+
+        Returns:
+            List of LLMResults where each returned LLMResult contains a single
+                Generation.
+        """
+        llm_results = []
+        for i, gen_list in enumerate(self.generations):
+            # Avoid double counting tokens in OpenAICallback
+            if i == 0:
+                llm_results.append(
+                    LLMResult(
+                        generations=[gen_list],
+                        llm_output=self.llm_output,
+                    )
+                )
+            else:
+                if self.llm_output is not None:
+                    llm_output = deepcopy(self.llm_output)
+                    llm_output["token_usage"] = dict()
+                else:
+                    llm_output = None
+                llm_results.append(
+                    LLMResult(
+                        generations=[gen_list],
+                        llm_output=llm_output,
+                    )
+                )
+        return llm_results
+
+    def __eq__(self, other: object) -> bool:
+        """Check for LLMResult equality by ignoring any metadata related to runs."""
+        if not isinstance(other, LLMResult):
+            return NotImplemented
+        return (
+            self.generations == other.generations
+            and self.llm_output == other.llm_output
+        )
--- a/libs/core/langchain_core/outputs/run_info.py
+++ b/libs/core/langchain_core/outputs/run_info.py
@@ -0,0 +1,12 @@
+from __future__ import annotations
+
+from uuid import UUID
+
+from langchain_core.pydantic_v1 import BaseModel
+
+
+class RunInfo(BaseModel):
+    """Class that contains metadata for a single execution of a Chain or model."""
+
+    run_id: UUID
+    """A unique identifier for the model or chain run."""