core[minor], langchain[minor]: deprecate old Chain and LLM methods (#15499)

2025-09-08 22:42:05 +00:00 · 2024-01-05 11:58:35 -05:00
parent fd5fbb507d
commit 00dfbd2a99
4 changed files with 265 additions and 135 deletions
--- a/libs/core/langchain_core/language_models/base.py
+++ b/libs/core/langchain_core/language_models/base.py
@@ -15,6 +15,7 @@ from typing import (

 from typing_extensions import TypeAlias

+from langchain_core._api import deprecated
 from langchain_core.messages import AnyMessage, BaseMessage, get_buffer_string
 from langchain_core.prompt_values import PromptValue
 from langchain_core.runnables import Runnable, RunnableSerializable
@@ -60,17 +61,6 @@ class BaseLanguageModel(
    """Abstract base class for interfacing with language models.

    All language model wrappers inherit from BaseLanguageModel.
-
-    Exposes three main methods:
-    - generate_prompt: generate language model outputs for a sequence of prompt
-        values. A prompt value is a model input that can be converted to any language
-        model input format (string or messages).
-    - predict: pass in a single string to a language model and return a string
-        prediction.
-    - predict_messages: pass in a sequence of BaseMessages (corresponding to a single
-        model call) to a language model and return a BaseMessage prediction.
-
-    Each of these has an equivalent asynchronous method.
    """

    @property
@@ -160,11 +150,12 @@ class BaseLanguageModel(
                prompt and additional model provider-specific output.
        """

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    @abstractmethod
    def predict(
        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
    ) -> str:
-        """Pass a single string input to the model and return a string prediction.
+        """Pass a single string input to the model and return a string.

         Use this method when passing in raw text. If you want to pass in specific
            types of chat messages, use predict_messages.
@@ -180,6 +171,7 @@ class BaseLanguageModel(
            Top model prediction as a string.
        """

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    @abstractmethod
    def predict_messages(
        self,
@@ -188,7 +180,7 @@ class BaseLanguageModel(
        stop: Optional[Sequence[str]] = None,
        **kwargs: Any,
    ) -> BaseMessage:
-        """Pass a message sequence to the model and return a message prediction.
+        """Pass a message sequence to the model and return a message.

        Use this method when passing in chat messages. If you want to pass in raw text,
            use predict.
@@ -204,11 +196,12 @@ class BaseLanguageModel(
            Top model prediction as a message.
        """

+    @deprecated("0.1.0", alternative="ainvoke", removal="0.2.0")
    @abstractmethod
    async def apredict(
        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
    ) -> str:
-        """Asynchronously pass a string to the model and return a string prediction.
+        """Asynchronously pass a string to the model and return a string.

        Use this method when calling pure text generation models and only the top
            candidate generation is needed.
@@ -224,6 +217,7 @@ class BaseLanguageModel(
            Top model prediction as a string.
        """

+    @deprecated("0.1.0", alternative="ainvoke", removal="0.2.0")
    @abstractmethod
    async def apredict_messages(
        self,
@@ -232,7 +226,7 @@ class BaseLanguageModel(
        stop: Optional[Sequence[str]] = None,
        **kwargs: Any,
    ) -> BaseMessage:
-        """Asynchronously pass messages to the model and return a message prediction.
+        """Asynchronously pass messages to the model and return a message.

        Use this method when calling chat models and only the top
            candidate generation is needed.
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@@ -16,6 +16,7 @@ from typing import (
    cast,
 )

+from langchain_core._api import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManager,
    AsyncCallbackManagerForLLMRun,
@@ -108,7 +109,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
    callbacks: Callbacks = Field(default=None, exclude=True)
    """Callbacks to add to the run trace."""
    callback_manager: Optional[BaseCallbackManager] = Field(default=None, exclude=True)
-    """Callback manager to add to the run trace."""
+    """[DEPRECATED] Callback manager to add to the run trace."""
    tags: Optional[List[str]] = Field(default=None, exclude=True)
    """Tags to add to the run trace."""
    metadata: Optional[Dict[str, Any]] = Field(default=None, exclude=True)
@@ -345,7 +346,30 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        run_name: Optional[str] = None,
        **kwargs: Any,
    ) -> LLMResult:
-        """Top Level call"""
+        """Pass a sequence of prompts to the model and return model generations.
+
+        This method should make use of batched calls for models that expose a batched
+        API.
+
+        Use this method when you want to:
+            1. take advantage of batched calls,
+            2. need more output from the model than just the top generated value,
+            3. are building chains that are agnostic to the underlying language model
+                type (e.g., pure text completion models vs chat models).
+
+        Args:
+            messages: List of list of messages.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            callbacks: Callbacks to pass through. Used for executing additional
+                functionality, such as logging or streaming, throughout generation.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An LLMResult, which contains a list of candidate Generations for each input
+                prompt and additional model provider-specific output.
+        """
        params = self._get_invocation_params(stop=stop, **kwargs)
        options = {"stop": stop}

@@ -407,7 +431,30 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        run_name: Optional[str] = None,
        **kwargs: Any,
    ) -> LLMResult:
-        """Top Level call"""
+        """Asynchronously pass a sequence of prompts to a model and return generations.
+
+        This method should make use of batched calls for models that expose a batched
+        API.
+
+        Use this method when you want to:
+            1. take advantage of batched calls,
+            2. need more output from the model than just the top generated value,
+            3. are building chains that are agnostic to the underlying language model
+                type (e.g., pure text completion models vs chat models).
+
+        Args:
+            messages: List of list of messages.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            callbacks: Callbacks to pass through. Used for executing additional
+                functionality, such as logging or streaming, throughout generation.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An LLMResult, which contains a list of candidate Generations for each input
+                prompt and additional model provider-specific output.
+        """
        params = self._get_invocation_params(stop=stop, **kwargs)
        options = {"stop": stop}

@@ -632,6 +679,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
    ) -> AsyncIterator[ChatGenerationChunk]:
        raise NotImplementedError()

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    def __call__(
        self,
        messages: List[BaseMessage],
@@ -663,11 +711,13 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        else:
            raise ValueError("Unexpected generation type")

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    def call_as_llm(
        self, message: str, stop: Optional[List[str]] = None, **kwargs: Any
    ) -> str:
        return self.predict(message, stop=stop, **kwargs)

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    def predict(
        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
    ) -> str:
@@ -681,6 +731,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        else:
            raise ValueError("Cannot use predict when output is not a string.")

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    def predict_messages(
        self,
        messages: List[BaseMessage],
@@ -694,6 +745,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
            _stop = list(stop)
        return self(messages, stop=_stop, **kwargs)

+    @deprecated("0.1.0", alternative="ainvoke", removal="0.2.0")
    async def apredict(
        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
    ) -> str:
@@ -709,6 +761,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        else:
            raise ValueError("Cannot use predict when output is not a string.")

+    @deprecated("0.1.0", alternative="ainvoke", removal="0.2.0")
    async def apredict_messages(
        self,
        messages: List[BaseMessage],
--- a/libs/core/langchain_core/language_models/llms.py
+++ b/libs/core/langchain_core/language_models/llms.py
@@ -36,6 +36,7 @@ from tenacity import (
    wait_exponential,
 )

+from langchain_core._api import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManager,
    AsyncCallbackManagerForLLMRun,
@@ -157,14 +158,17 @@ class BaseLLM(BaseLanguageModel[str], ABC):
    It should take in a prompt and return a string."""

    cache: Optional[bool] = None
+    """Whether to cache the response."""
    verbose: bool = Field(default_factory=_get_verbosity)
    """Whether to print out response text."""
    callbacks: Callbacks = Field(default=None, exclude=True)
-    callback_manager: Optional[BaseCallbackManager] = Field(default=None, exclude=True)
+    """Callbacks to add to the run trace."""
    tags: Optional[List[str]] = Field(default=None, exclude=True)
    """Tags to add to the run trace."""
    metadata: Optional[Dict[str, Any]] = Field(default=None, exclude=True)
    """Metadata to add to the run trace."""
+    callback_manager: Optional[BaseCallbackManager] = Field(default=None, exclude=True)
+    """[DEPRECATED]"""

    class Config:
        """Configuration for this pydantic object."""
@@ -576,7 +580,30 @@ class BaseLLM(BaseLanguageModel[str], ABC):
        run_name: Optional[Union[str, List[str]]] = None,
        **kwargs: Any,
    ) -> LLMResult:
-        """Run the LLM on the given prompt and input."""
+        """Pass a sequence of prompts to a model and return generations.
+
+        This method should make use of batched calls for models that expose a batched
+        API.
+
+        Use this method when you want to:
+            1. take advantage of batched calls,
+            2. need more output from the model than just the top generated value,
+            3. are building chains that are agnostic to the underlying language model
+                type (e.g., pure text completion models vs chat models).
+
+        Args:
+            prompts: List of string prompts.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            callbacks: Callbacks to pass through. Used for executing additional
+                functionality, such as logging or streaming, throughout generation.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An LLMResult, which contains a list of candidate Generations for each input
+                prompt and additional model provider-specific output.
+        """
        if not isinstance(prompts, list):
            raise ValueError(
                "Argument 'prompts' is expected to be of type List[str], received"
@@ -754,7 +781,30 @@ class BaseLLM(BaseLanguageModel[str], ABC):
        run_name: Optional[Union[str, List[str]]] = None,
        **kwargs: Any,
    ) -> LLMResult:
-        """Run the LLM on the given prompt and input."""
+        """Asynchronously pass a sequence of prompts to a model and return generations.
+
+        This method should make use of batched calls for models that expose a batched
+        API.
+
+        Use this method when you want to:
+            1. take advantage of batched calls,
+            2. need more output from the model than just the top generated value,
+            3. are building chains that are agnostic to the underlying language model
+                type (e.g., pure text completion models vs chat models).
+
+        Args:
+            prompts: List of string prompts.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            callbacks: Callbacks to pass through. Used for executing additional
+                functionality, such as logging or streaming, throughout generation.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An LLMResult, which contains a list of candidate Generations for each input
+                prompt and additional model provider-specific output.
+        """
        # Create callback managers
        if isinstance(callbacks, list) and (
            isinstance(callbacks[0], (list, BaseCallbackManager))
@@ -927,6 +977,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
        )
        return result.generations[0][0].text

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    def predict(
        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
    ) -> str:
@@ -936,6 +987,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
            _stop = list(stop)
        return self(text, stop=_stop, **kwargs)

+    @deprecated("0.1.0", alternative="invoke", removal="0.2.0")
    def predict_messages(
        self,
        messages: List[BaseMessage],
@@ -951,6 +1003,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
        content = self(text, stop=_stop, **kwargs)
        return AIMessage(content=content)

+    @deprecated("0.1.0", alternative="ainvoke", removal="0.2.0")
    async def apredict(
        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
    ) -> str:
@@ -960,6 +1013,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
            _stop = list(stop)
        return await self._call_async(text, stop=_stop, **kwargs)

+    @deprecated("0.1.0", alternative="ainvoke", removal="0.2.0")
    async def apredict_messages(
        self,
        messages: List[BaseMessage],