docs: Update documentation for custom LLMs (#19972)

Update documentation for customizing LLMs
2025-09-15 14:36:54 +00:00 · 2024-04-11 12:21:27 -04:00
parent 799714c629
commit 653489a1a9
2 changed files with 429 additions and 58 deletions
--- a/libs/core/langchain_core/language_models/llms.py
+++ b/libs/core/langchain_core/language_models/llms.py
@@ -557,6 +557,25 @@ class BaseLLM(BaseLanguageModel[str], ABC):
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[GenerationChunk]:
+        """Stream the LLM on the given prompt.
+
+        This method should be overridden by subclasses that support streaming.
+
+        If not implemented, the default behavior of calls to stream will be to
+        fallback to the non-streaming version of the model and return
+        the output as a single chunk.
+
+        Args:
+            prompt: The prompt to generate from.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            run_manager: Callback manager for the run.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An iterator of GenerationChunks.
+        """
        raise NotImplementedError()

    async def _astream(
@@ -566,6 +585,23 @@ class BaseLLM(BaseLanguageModel[str], ABC):
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> AsyncIterator[GenerationChunk]:
+        """An async version of the _stream method.
+
+        The default implementation uses the synchronous _stream method and wraps it in
+        an async iterator. Subclasses that need to provide a true async implementation
+        should override this method.
+
+        Args:
+            prompt: The prompt to generate from.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            run_manager: Callback manager for the run.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An async iterator of GenerationChunks.
+        """
        iterator = await run_in_executor(
            None,
            self._stream,
@@ -1182,10 +1218,28 @@ class BaseLLM(BaseLanguageModel[str], ABC):


 class LLM(BaseLLM):
-    """Base LLM abstract class.
+    """This class exposes a simple interface for implementing a custom LLM.

-    The purpose of this class is to expose a simpler interface for working
-    with LLMs, rather than expect the user to implement the full _generate method.
+    You should subclass this class and implement the following:
+
+    - `_call` method: Run the LLM on the given prompt and input (used by `invoke`).
+    - `_identifying_params` property: Return a dictionary of the identifying parameters
+        This is critical for caching and tracing purposes. Identifying parameters
+        is a dict that identifies the LLM.
+        It should mostly include a `model_name`.
+
+    Optional: Override the following methods to provide more optimizations:
+
+    - `_acall`: Provide a native async version of the `_call` method.
+        If not provided, will delegate to the synchronous version using
+        `run_in_executor`. (Used by `ainvoke`).
+    - `_stream`: Stream the LLM on the given prompt and input.
+        `stream` will use `_stream` if provided, otherwise it
+        use `_call` and output will arrive in one chunk.
+    - `_astream`: Override to provide a native async version of the `_stream` method.
+        `astream` will use `_astream` if provided, otherwise it will implement
+        a fallback behavior that will use `_stream` if `_stream` is implemented,
+        and use `_acall` if `_stream` is not implemented.
    """

    @abstractmethod
@@ -1196,7 +1250,22 @@ class LLM(BaseLLM):
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
-        """Run the LLM on the given prompt and input."""
+        """Run the LLM on the given input.
+
+        Override this method to implement the LLM logic.
+
+        Args:
+            prompt: The prompt to generate from.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of the stop substrings.
+                If stop tokens are not supported consider raising NotImplementedError.
+            run_manager: Callback manager for the run.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            The model output as a string. SHOULD NOT include the prompt.
+        """

    async def _acall(
        self,
@@ -1205,7 +1274,24 @@ class LLM(BaseLLM):
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
-        """Run the LLM on the given prompt and input."""
+        """Async version of the _call method.
+
+        The default implementation delegates to the synchronous _call method using
+        `run_in_executor`. Subclasses that need to provide a true async implementation
+        should override this method to reduce the overhead of using `run_in_executor`.
+
+        Args:
+            prompt: The prompt to generate from.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of the stop substrings.
+                If stop tokens are not supported consider raising NotImplementedError.
+            run_manager: Callback manager for the run.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            The model output as a string. SHOULD NOT include the prompt.
+        """
        return await run_in_executor(
            None,
            self._call,