mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-29 09:58:44 +00:00
ENH: Add llm_kwargs
for Xinference LLMs (#10354)
- This pr adds `llm_kwargs` to the initialization of Xinference LLMs (integrated in #8171 ). - With this enhancement, users can not only provide `generate_configs` when calling the llms for generation but also during the initialization process. This allows users to include custom configurations when utilizing LangChain features like LLMChain. - It also fixes some format issues for the docstrings.
This commit is contained in:
parent
1eefb9052b
commit
ce61840e3b
@ -93,10 +93,10 @@ llm(
|
|||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
For more information and detailed examples, refer to the
|
For more information and detailed examples, refer to the
|
||||||
[example notebook for xinference](../modules/models/llms/integrations/xinference.ipynb)
|
[example for xinference LLMs](/docs/integrations/llms/xinference.html)
|
||||||
|
|
||||||
### Embeddings
|
### Embeddings
|
||||||
|
|
||||||
Xinference also supports embedding queries and documents. See
|
Xinference also supports embedding queries and documents. See
|
||||||
[example notebook for xinference embeddings](../modules/data_connection/text_embedding/integrations/xinference.ipynb)
|
[example for xinference embeddings](/docs/integrations/text_embedding/xinference.html)
|
||||||
for a more detailed demo.
|
for a more detailed demo.
|
@ -8,23 +8,31 @@ class XinferenceEmbeddings(Embeddings):
|
|||||||
|
|
||||||
"""Wrapper around xinference embedding models.
|
"""Wrapper around xinference embedding models.
|
||||||
To use, you should have the xinference library installed:
|
To use, you should have the xinference library installed:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pip install xinference
|
pip install xinference
|
||||||
|
|
||||||
Check out: https://github.com/xorbitsai/inference
|
Check out: https://github.com/xorbitsai/inference
|
||||||
To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers
|
To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
To start a local instance of Xinference, run
|
To start a local instance of Xinference, run
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference
|
$ xinference
|
||||||
|
|
||||||
You can also deploy Xinference in a distributed cluster. Here are the steps:
|
You can also deploy Xinference in a distributed cluster. Here are the steps:
|
||||||
|
|
||||||
Starting the supervisor:
|
Starting the supervisor:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference-supervisor
|
$ xinference-supervisor
|
||||||
|
|
||||||
Starting the worker:
|
Starting the worker:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference-worker
|
$ xinference-worker
|
||||||
@ -32,6 +40,7 @@ class XinferenceEmbeddings(Embeddings):
|
|||||||
Then, launch a model using command line interface (CLI).
|
Then, launch a model using command line interface (CLI).
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference launch -n orca -s 3 -q q4_0
|
$ xinference launch -n orca -s 3 -q q4_0
|
||||||
@ -39,6 +48,7 @@ class XinferenceEmbeddings(Embeddings):
|
|||||||
It will return a model UID. Then you can use Xinference Embedding with LangChain.
|
It will return a model UID. Then you can use Xinference Embedding with LangChain.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from langchain.embeddings import XinferenceEmbeddings
|
from langchain.embeddings import XinferenceEmbeddings
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from typing import TYPE_CHECKING, Any, Generator, List, Mapping, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Mapping, Optional, Union
|
||||||
|
|
||||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||||
from langchain.llms.base import LLM
|
from langchain.llms.base import LLM
|
||||||
@ -11,25 +11,31 @@ if TYPE_CHECKING:
|
|||||||
class Xinference(LLM):
|
class Xinference(LLM):
|
||||||
"""Wrapper for accessing Xinference's large-scale model inference service.
|
"""Wrapper for accessing Xinference's large-scale model inference service.
|
||||||
To use, you should have the xinference library installed:
|
To use, you should have the xinference library installed:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
pip install "xinference[all]"
|
pip install "xinference[all]"
|
||||||
|
|
||||||
Check out: https://github.com/xorbitsai/inference
|
Check out: https://github.com/xorbitsai/inference
|
||||||
To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers
|
To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
To start a local instance of Xinference, run
|
To start a local instance of Xinference, run
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference
|
$ xinference
|
||||||
|
|
||||||
You can also deploy Xinference in a distributed cluster. Here are the steps:
|
You can also deploy Xinference in a distributed cluster. Here are the steps:
|
||||||
|
|
||||||
Starting the supervisor:
|
Starting the supervisor:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference-supervisor
|
$ xinference-supervisor
|
||||||
|
|
||||||
Starting the worker:
|
Starting the worker:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference-worker
|
$ xinference-worker
|
||||||
@ -37,6 +43,7 @@ class Xinference(LLM):
|
|||||||
Then, launch a model using command line interface (CLI).
|
Then, launch a model using command line interface (CLI).
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference launch -n orca -s 3 -q q4_0
|
$ xinference launch -n orca -s 3 -q q4_0
|
||||||
@ -44,6 +51,7 @@ class Xinference(LLM):
|
|||||||
It will return a model UID. Then, you can use Xinference with LangChain.
|
It will return a model UID. Then, you can use Xinference with LangChain.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from langchain.llms import Xinference
|
from langchain.llms import Xinference
|
||||||
@ -59,7 +67,9 @@ class Xinference(LLM):
|
|||||||
)
|
)
|
||||||
|
|
||||||
To view all the supported builtin models, run:
|
To view all the supported builtin models, run:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ xinference list --all
|
$ xinference list --all
|
||||||
|
|
||||||
""" # noqa: E501
|
""" # noqa: E501
|
||||||
@ -69,9 +79,14 @@ class Xinference(LLM):
|
|||||||
"""URL of the xinference server"""
|
"""URL of the xinference server"""
|
||||||
model_uid: Optional[str]
|
model_uid: Optional[str]
|
||||||
"""UID of the launched model"""
|
"""UID of the launched model"""
|
||||||
|
model_kwargs: Dict[str, Any]
|
||||||
|
"""Key word arguments to be passed to xinference.LLM"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, server_url: Optional[str] = None, model_uid: Optional[str] = None
|
self,
|
||||||
|
server_url: Optional[str] = None,
|
||||||
|
model_uid: Optional[str] = None,
|
||||||
|
**model_kwargs: Any,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
from xinference.client import RESTfulClient
|
from xinference.client import RESTfulClient
|
||||||
@ -81,10 +96,13 @@ class Xinference(LLM):
|
|||||||
" with `pip install xinference`."
|
" with `pip install xinference`."
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
|
model_kwargs = model_kwargs or {}
|
||||||
|
|
||||||
super().__init__(
|
super().__init__(
|
||||||
**{
|
**{
|
||||||
"server_url": server_url,
|
"server_url": server_url,
|
||||||
"model_uid": model_uid,
|
"model_uid": model_uid,
|
||||||
|
"model_kwargs": model_kwargs,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -107,6 +125,7 @@ class Xinference(LLM):
|
|||||||
return {
|
return {
|
||||||
**{"server_url": self.server_url},
|
**{"server_url": self.server_url},
|
||||||
**{"model_uid": self.model_uid},
|
**{"model_uid": self.model_uid},
|
||||||
|
**{"model_kwargs": self.model_kwargs},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _call(
|
def _call(
|
||||||
@ -131,6 +150,8 @@ class Xinference(LLM):
|
|||||||
|
|
||||||
generate_config: "LlamaCppGenerateConfig" = kwargs.get("generate_config", {})
|
generate_config: "LlamaCppGenerateConfig" = kwargs.get("generate_config", {})
|
||||||
|
|
||||||
|
generate_config = {**self.model_kwargs, **generate_config}
|
||||||
|
|
||||||
if stop:
|
if stop:
|
||||||
generate_config["stop"] = stop
|
generate_config["stop"] = stop
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user