From 5336d87c15c49f169e89b00feb4623d1b32e89f3 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Fri, 22 Sep 2023 09:16:40 -0700 Subject: [PATCH] update feat table (#10939) --- docs/_scripts/model_feat_table.py | 17 ++++++++++------- docs/extras/integrations/chat/index.mdx | 9 +++++---- docs/extras/integrations/llms/index.mdx | 8 +++++--- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/docs/_scripts/model_feat_table.py b/docs/_scripts/model_feat_table.py index 61e18091140..33f48881820 100644 --- a/docs/_scripts/model_feat_table.py +++ b/docs/_scripts/model_feat_table.py @@ -34,10 +34,12 @@ sidebar_class_name: hidden import DocCardList from "@theme/DocCardList"; ## Features (natively supported) -All `LLM`s implement the LCEL `Runnable` interface, meaning they all expose functioning `invoke`, `ainvoke`, `batch`, `abatch`, `stream`, and `astream` methods. -*That is, they all have functioning sync, async, streaming, and batch generation methods.* +All LLMs implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abtach`, `stream`, `astream`. This gives all LLMs basic support for async, streaming and batch, which by default is implemented as below: +- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the LLM is being executed, by moving this call to a background thread. +- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying LLM provider. This obviously doesn't give you token-by-token streaming, which requires native support from the LLM provider, but ensures your code that expects an iterator of tokens can work for any of our LLM integrations. +- *Batch* support defaults to calling the underlying LLM in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`. -This table highlights specifically those integrations that **natively support** batching, streaming, and asynchronous generation (meaning these features are built into the 3rd-party integration). +Each LLM integration optionally can implement native support for async, streaming or batch, which, for providers that support it, can be more efficient. {table} @@ -55,11 +57,12 @@ sidebar_class_name: hidden import DocCardList from "@theme/DocCardList"; ## Features (natively supported) -All `ChatModel`s implement the LCEL `Runnable` interface, meaning they all expose functioning `invoke`, `ainvoke`, `stream`, and `astream` (and `batch`, `abatch`) methods. -*That is, they all have functioning sync, async and streaming generation methods.* - -This table highlights specifically those integrations that **natively support** streaming and asynchronous generation (meaning these features are built into the 3rd-party integration). +All ChatModels implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abtach`, `stream`, `astream`. This gives all LLMs basic support for async, streaming and batch, which by default is implemented as below: +- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the LLM is being executed, by moving this call to a background thread. +- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying LLM provider. This obviously doesn't give you token-by-token streaming, which requires native support from the LLM provider, but ensures your code that expects an iterator of tokens can work for any of our LLM integrations. +- *Batch* support defaults to calling the underlying LLM in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`. +Each ChatModel integration optionally can implement native support for async, streaming or batch, which, for providers that support it, can be more efficient. {table} diff --git a/docs/extras/integrations/chat/index.mdx b/docs/extras/integrations/chat/index.mdx index 38e4a81be33..001a606b02b 100644 --- a/docs/extras/integrations/chat/index.mdx +++ b/docs/extras/integrations/chat/index.mdx @@ -8,11 +8,12 @@ sidebar_class_name: hidden import DocCardList from "@theme/DocCardList"; ## Features (natively supported) -All `ChatModel`s implement the LCEL `Runnable` interface, meaning they all expose functioning `invoke`, `ainvoke`, `stream`, and `astream` (and `batch`, `abatch`) methods. -*That is, they all have functioning sync, async and streaming generation methods.* - -This table highlights specifically those integrations that **natively support** streaming and asynchronous generation (meaning these features are built into the 3rd-party integration). +All ChatModels implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abtach`, `stream`, `astream`. This gives all LLMs basic support for async, streaming and batch, which by default is implemented as below: +- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the LLM is being executed, by moving this call to a background thread. +- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying LLM provider. This obviously doesn't give you token-by-token streaming, which requires native support from the LLM provider, but ensures your code that expects an iterator of tokens can work for any of our LLM integrations. +- *Batch* support defaults to calling the underlying LLM in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`. +Each ChatModel integration optionally can implement native support for async, streaming or batch, which, for providers that support it, can be more efficient. Model|Generate|Async generate|Stream|Async stream :-|:-:|:-:|:-:|:-: AzureChatOpenAI|✅|✅|✅|✅ diff --git a/docs/extras/integrations/llms/index.mdx b/docs/extras/integrations/llms/index.mdx index 872bd8a6d1b..54a28a52363 100644 --- a/docs/extras/integrations/llms/index.mdx +++ b/docs/extras/integrations/llms/index.mdx @@ -8,10 +8,12 @@ sidebar_class_name: hidden import DocCardList from "@theme/DocCardList"; ## Features (natively supported) -All `LLM`s implement the LCEL `Runnable` interface, meaning they all expose functioning `invoke`, `ainvoke`, `batch`, `abatch`, `stream`, and `astream` methods. -*That is, they all have functioning sync, async, streaming, and batch generation methods.* +All LLMs implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abtach`, `stream`, `astream`. This gives all LLMs basic support for async, streaming and batch, which by default is implemented as below: +- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the LLM is being executed, by moving this call to a background thread. +- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying LLM provider. This obviously doesn't give you token-by-token streaming, which requires native support from the LLM provider, but ensures your code that expects an iterator of tokens can work for any of our LLM integrations. +- *Batch* support defaults to calling the underlying LLM in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`. -This table highlights specifically those integrations that **natively support** batching, streaming, and asynchronous generation (meaning these features are built into the 3rd-party integration). +Each LLM integration optionally can implement native support for async, streaming or batch, which, for providers that support it, can be more efficient. Model|Generate|Async generate|Stream|Async stream|Batch|Async batch :-|:-:|:-:|:-:|:-:|:-:|:-: