mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-15 14:36:54 +00:00
docs: Update documentation for custom LLMs (#19972)
Update documentation for customizing LLMs
This commit is contained in:
@@ -557,6 +557,25 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[GenerationChunk]:
|
||||
"""Stream the LLM on the given prompt.
|
||||
|
||||
This method should be overridden by subclasses that support streaming.
|
||||
|
||||
If not implemented, the default behavior of calls to stream will be to
|
||||
fallback to the non-streaming version of the model and return
|
||||
the output as a single chunk.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An iterator of GenerationChunks.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
async def _astream(
|
||||
@@ -566,6 +585,23 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> AsyncIterator[GenerationChunk]:
|
||||
"""An async version of the _stream method.
|
||||
|
||||
The default implementation uses the synchronous _stream method and wraps it in
|
||||
an async iterator. Subclasses that need to provide a true async implementation
|
||||
should override this method.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An async iterator of GenerationChunks.
|
||||
"""
|
||||
iterator = await run_in_executor(
|
||||
None,
|
||||
self._stream,
|
||||
@@ -1182,10 +1218,28 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
|
||||
class LLM(BaseLLM):
|
||||
"""Base LLM abstract class.
|
||||
"""This class exposes a simple interface for implementing a custom LLM.
|
||||
|
||||
The purpose of this class is to expose a simpler interface for working
|
||||
with LLMs, rather than expect the user to implement the full _generate method.
|
||||
You should subclass this class and implement the following:
|
||||
|
||||
- `_call` method: Run the LLM on the given prompt and input (used by `invoke`).
|
||||
- `_identifying_params` property: Return a dictionary of the identifying parameters
|
||||
This is critical for caching and tracing purposes. Identifying parameters
|
||||
is a dict that identifies the LLM.
|
||||
It should mostly include a `model_name`.
|
||||
|
||||
Optional: Override the following methods to provide more optimizations:
|
||||
|
||||
- `_acall`: Provide a native async version of the `_call` method.
|
||||
If not provided, will delegate to the synchronous version using
|
||||
`run_in_executor`. (Used by `ainvoke`).
|
||||
- `_stream`: Stream the LLM on the given prompt and input.
|
||||
`stream` will use `_stream` if provided, otherwise it
|
||||
use `_call` and output will arrive in one chunk.
|
||||
- `_astream`: Override to provide a native async version of the `_stream` method.
|
||||
`astream` will use `_astream` if provided, otherwise it will implement
|
||||
a fallback behavior that will use `_stream` if `_stream` is implemented,
|
||||
and use `_acall` if `_stream` is not implemented.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
@@ -1196,7 +1250,22 @@ class LLM(BaseLLM):
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Run the LLM on the given prompt and input."""
|
||||
"""Run the LLM on the given input.
|
||||
|
||||
Override this method to implement the LLM logic.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
"""
|
||||
|
||||
async def _acall(
|
||||
self,
|
||||
@@ -1205,7 +1274,24 @@ class LLM(BaseLLM):
|
||||
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Run the LLM on the given prompt and input."""
|
||||
"""Async version of the _call method.
|
||||
|
||||
The default implementation delegates to the synchronous _call method using
|
||||
`run_in_executor`. Subclasses that need to provide a true async implementation
|
||||
should override this method to reduce the overhead of using `run_in_executor`.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
"""
|
||||
return await run_in_executor(
|
||||
None,
|
||||
self._call,
|
||||
|
Reference in New Issue
Block a user