From ce61840e3b25d48a2f5088b0887efc4e6f903439 Mon Sep 17 00:00:00 2001 From: Jiayi Ni <105399924+jiayini1119@users.noreply.github.com> Date: Mon, 18 Sep 2023 23:36:29 +0800 Subject: [PATCH] ENH: Add `llm_kwargs` for Xinference LLMs (#10354) - This pr adds `llm_kwargs` to the initialization of Xinference LLMs (integrated in #8171 ). - With this enhancement, users can not only provide `generate_configs` when calling the llms for generation but also during the initialization process. This allows users to include custom configurations when utilizing LangChain features like LLMChain. - It also fixes some format issues for the docstrings. --- .../integrations/providers/xinference.mdx | 4 +- .../langchain/embeddings/xinference.py | 22 +++++-- libs/langchain/langchain/llms/xinference.py | 57 +++++++++++++------ 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/docs/extras/integrations/providers/xinference.mdx b/docs/extras/integrations/providers/xinference.mdx index 3b1d57725e4..41a7f44bbc8 100644 --- a/docs/extras/integrations/providers/xinference.mdx +++ b/docs/extras/integrations/providers/xinference.mdx @@ -93,10 +93,10 @@ llm( ### Usage For more information and detailed examples, refer to the -[example notebook for xinference](../modules/models/llms/integrations/xinference.ipynb) +[example for xinference LLMs](/docs/integrations/llms/xinference.html) ### Embeddings Xinference also supports embedding queries and documents. See -[example notebook for xinference embeddings](../modules/data_connection/text_embedding/integrations/xinference.ipynb) +[example for xinference embeddings](/docs/integrations/text_embedding/xinference.html) for a more detailed demo. \ No newline at end of file diff --git a/libs/langchain/langchain/embeddings/xinference.py b/libs/langchain/langchain/embeddings/xinference.py index 8be92fc64c4..1609f2b537d 100644 --- a/libs/langchain/langchain/embeddings/xinference.py +++ b/libs/langchain/langchain/embeddings/xinference.py @@ -8,37 +8,47 @@ class XinferenceEmbeddings(Embeddings): """Wrapper around xinference embedding models. To use, you should have the xinference library installed: + .. code-block:: bash pip install xinference Check out: https://github.com/xorbitsai/inference - To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers + To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers. + Example: To start a local instance of Xinference, run - .. code-block:: bash - $ xinference + .. code-block:: bash + + $ xinference + You can also deploy Xinference in a distributed cluster. Here are the steps: + Starting the supervisor: + .. code-block:: bash - $ xinference-supervisor + $ xinference-supervisor + Starting the worker: + .. code-block:: bash - $ xinference-worker + $ xinference-worker Then, launch a model using command line interface (CLI). Example: + .. code-block:: bash - $ xinference launch -n orca -s 3 -q q4_0 + $ xinference launch -n orca -s 3 -q q4_0 It will return a model UID. Then you can use Xinference Embedding with LangChain. Example: + .. code-block:: python from langchain.embeddings import XinferenceEmbeddings diff --git a/libs/langchain/langchain/llms/xinference.py b/libs/langchain/langchain/llms/xinference.py index faf0ef0258e..3b16838da1a 100644 --- a/libs/langchain/langchain/llms/xinference.py +++ b/libs/langchain/langchain/llms/xinference.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Generator, List, Mapping, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Generator, List, Mapping, Optional, Union from langchain.callbacks.manager import CallbackManagerForLLMRun from langchain.llms.base import LLM @@ -11,55 +11,65 @@ if TYPE_CHECKING: class Xinference(LLM): """Wrapper for accessing Xinference's large-scale model inference service. To use, you should have the xinference library installed: + .. code-block:: bash - pip install "xinference[all]" + pip install "xinference[all]" Check out: https://github.com/xorbitsai/inference To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers + Example: To start a local instance of Xinference, run - .. code-block:: bash - $ xinference + .. code-block:: bash + + $ xinference You can also deploy Xinference in a distributed cluster. Here are the steps: + Starting the supervisor: + .. code-block:: bash - $ xinference-supervisor + $ xinference-supervisor Starting the worker: + .. code-block:: bash - $ xinference-worker + $ xinference-worker Then, launch a model using command line interface (CLI). Example: + .. code-block:: bash - $ xinference launch -n orca -s 3 -q q4_0 + $ xinference launch -n orca -s 3 -q q4_0 It will return a model UID. Then, you can use Xinference with LangChain. Example: - .. code-block:: python - from langchain.llms import Xinference + .. code-block:: python - llm = Xinference( - server_url="http://0.0.0.0:9997", - model_uid = {model_uid} # replace model_uid with the model UID return from launching the model - ) + from langchain.llms import Xinference - llm( - prompt="Q: where can we visit in the capital of France? A:", - generate_config={"max_tokens": 1024, "stream": True}, - ) + llm = Xinference( + server_url="http://0.0.0.0:9997", + model_uid = {model_uid} # replace model_uid with the model UID return from launching the model + ) + + llm( + prompt="Q: where can we visit in the capital of France? A:", + generate_config={"max_tokens": 1024, "stream": True}, + ) To view all the supported builtin models, run: + .. code-block:: bash + $ xinference list --all """ # noqa: E501 @@ -69,9 +79,14 @@ class Xinference(LLM): """URL of the xinference server""" model_uid: Optional[str] """UID of the launched model""" + model_kwargs: Dict[str, Any] + """Key word arguments to be passed to xinference.LLM""" def __init__( - self, server_url: Optional[str] = None, model_uid: Optional[str] = None + self, + server_url: Optional[str] = None, + model_uid: Optional[str] = None, + **model_kwargs: Any, ): try: from xinference.client import RESTfulClient @@ -81,10 +96,13 @@ class Xinference(LLM): " with `pip install xinference`." ) from e + model_kwargs = model_kwargs or {} + super().__init__( **{ "server_url": server_url, "model_uid": model_uid, + "model_kwargs": model_kwargs, } ) @@ -107,6 +125,7 @@ class Xinference(LLM): return { **{"server_url": self.server_url}, **{"model_uid": self.model_uid}, + **{"model_kwargs": self.model_kwargs}, } def _call( @@ -131,6 +150,8 @@ class Xinference(LLM): generate_config: "LlamaCppGenerateConfig" = kwargs.get("generate_config", {}) + generate_config = {**self.model_kwargs, **generate_config} + if stop: generate_config["stop"] = stop