docs: Update documentation for custom LLMs (#19972)

Update documentation for customizing LLMs
This commit is contained in:
Eugene Yurtsev
2024-04-11 12:21:27 -04:00
committed by GitHub
parent 799714c629
commit 653489a1a9
2 changed files with 429 additions and 58 deletions

View File

@@ -557,6 +557,25 @@ class BaseLLM(BaseLanguageModel[str], ABC):
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[GenerationChunk]:
"""Stream the LLM on the given prompt.
This method should be overridden by subclasses that support streaming.
If not implemented, the default behavior of calls to stream will be to
fallback to the non-streaming version of the model and return
the output as a single chunk.
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
An iterator of GenerationChunks.
"""
raise NotImplementedError()
async def _astream(
@@ -566,6 +585,23 @@ class BaseLLM(BaseLanguageModel[str], ABC):
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> AsyncIterator[GenerationChunk]:
"""An async version of the _stream method.
The default implementation uses the synchronous _stream method and wraps it in
an async iterator. Subclasses that need to provide a true async implementation
should override this method.
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
An async iterator of GenerationChunks.
"""
iterator = await run_in_executor(
None,
self._stream,
@@ -1182,10 +1218,28 @@ class BaseLLM(BaseLanguageModel[str], ABC):
class LLM(BaseLLM):
"""Base LLM abstract class.
"""This class exposes a simple interface for implementing a custom LLM.
The purpose of this class is to expose a simpler interface for working
with LLMs, rather than expect the user to implement the full _generate method.
You should subclass this class and implement the following:
- `_call` method: Run the LLM on the given prompt and input (used by `invoke`).
- `_identifying_params` property: Return a dictionary of the identifying parameters
This is critical for caching and tracing purposes. Identifying parameters
is a dict that identifies the LLM.
It should mostly include a `model_name`.
Optional: Override the following methods to provide more optimizations:
- `_acall`: Provide a native async version of the `_call` method.
If not provided, will delegate to the synchronous version using
`run_in_executor`. (Used by `ainvoke`).
- `_stream`: Stream the LLM on the given prompt and input.
`stream` will use `_stream` if provided, otherwise it
use `_call` and output will arrive in one chunk.
- `_astream`: Override to provide a native async version of the `_stream` method.
`astream` will use `_astream` if provided, otherwise it will implement
a fallback behavior that will use `_stream` if `_stream` is implemented,
and use `_acall` if `_stream` is not implemented.
"""
@abstractmethod
@@ -1196,7 +1250,22 @@ class LLM(BaseLLM):
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""Run the LLM on the given prompt and input."""
"""Run the LLM on the given input.
Override this method to implement the LLM logic.
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
The model output as a string. SHOULD NOT include the prompt.
"""
async def _acall(
self,
@@ -1205,7 +1274,24 @@ class LLM(BaseLLM):
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""Run the LLM on the given prompt and input."""
"""Async version of the _call method.
The default implementation delegates to the synchronous _call method using
`run_in_executor`. Subclasses that need to provide a true async implementation
should override this method to reduce the overhead of using `run_in_executor`.
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
The model output as a string. SHOULD NOT include the prompt.
"""
return await run_in_executor(
None,
self._call,