mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-09 21:08:59 +00:00
feat(llms): support vLLM's OpenAI-compatible server (#9179)
This PR aims at supporting [vLLM's OpenAI-compatible server feature](https://vllm.readthedocs.io/en/latest/getting_started/quickstart.html#openai-compatible-server), i.e. allowing to call vLLM's LLMs like if they were OpenAI's. I've also udpated the related notebook providing an example usage. At the moment, vLLM only supports the `Completion` API.
This commit is contained in:
parent
621da3c164
commit
d95eeaedbe
@ -170,6 +170,51 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"llm(\"What is the future of AI?\")"
|
"llm(\"What is the future of AI?\")"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "64e89be0-6ad7-43a8-9dac-1324dcd4e851",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## OpenAI-Compatible Server\n",
|
||||||
|
"\n",
|
||||||
|
"vLLM can be deployed as a server that mimics the OpenAI API protocol. This allows vLLM to be used as a drop-in replacement for applications using OpenAI API.\n",
|
||||||
|
"\n",
|
||||||
|
"This server can be queried in the same format as OpenAI API.\n",
|
||||||
|
"\n",
|
||||||
|
"### OpenAI-Compatible Completion"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "c3cbc428-0bb8-422a-913e-1c6fef8b89d4",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" a city that is filled with history, ancient buildings, and art around every corner\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain.llms import VLLMOpenAI\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"llm = VLLMOpenAI(\n",
|
||||||
|
" openai_api_key=\"EMPTY\",\n",
|
||||||
|
" openai_api_base=\"http://localhost:8000/v1\",\n",
|
||||||
|
" model_name=\"tiiuae/falcon-7b\",\n",
|
||||||
|
" model_kwargs={\"stop\": [\".\"]}\n",
|
||||||
|
")\n",
|
||||||
|
"print(llm(\"Rome is\"))"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -80,7 +80,7 @@ from langchain.llms.textgen import TextGen
|
|||||||
from langchain.llms.titan_takeoff import TitanTakeoff
|
from langchain.llms.titan_takeoff import TitanTakeoff
|
||||||
from langchain.llms.tongyi import Tongyi
|
from langchain.llms.tongyi import Tongyi
|
||||||
from langchain.llms.vertexai import VertexAI
|
from langchain.llms.vertexai import VertexAI
|
||||||
from langchain.llms.vllm import VLLM
|
from langchain.llms.vllm import VLLM, VLLMOpenAI
|
||||||
from langchain.llms.writer import Writer
|
from langchain.llms.writer import Writer
|
||||||
from langchain.llms.xinference import Xinference
|
from langchain.llms.xinference import Xinference
|
||||||
|
|
||||||
@ -149,6 +149,7 @@ __all__ = [
|
|||||||
"Tongyi",
|
"Tongyi",
|
||||||
"VertexAI",
|
"VertexAI",
|
||||||
"VLLM",
|
"VLLM",
|
||||||
|
"VLLMOpenAI",
|
||||||
"Writer",
|
"Writer",
|
||||||
"OctoAIEndpoint",
|
"OctoAIEndpoint",
|
||||||
"Xinference",
|
"Xinference",
|
||||||
@ -213,6 +214,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
|||||||
"openllm": OpenLLM,
|
"openllm": OpenLLM,
|
||||||
"openllm_client": OpenLLM,
|
"openllm_client": OpenLLM,
|
||||||
"vllm": VLLM,
|
"vllm": VLLM,
|
||||||
|
"vllm_openai": VLLMOpenAI,
|
||||||
"writer": Writer,
|
"writer": Writer,
|
||||||
"xinference": Xinference,
|
"xinference": Xinference,
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ from pydantic import root_validator
|
|||||||
|
|
||||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||||
from langchain.llms.base import BaseLLM
|
from langchain.llms.base import BaseLLM
|
||||||
|
from langchain.llms.openai import BaseOpenAI
|
||||||
from langchain.schema.output import Generation, LLMResult
|
from langchain.schema.output import Generation, LLMResult
|
||||||
|
|
||||||
|
|
||||||
@ -127,3 +128,27 @@ class VLLM(BaseLLM):
|
|||||||
def _llm_type(self) -> str:
|
def _llm_type(self) -> str:
|
||||||
"""Return type of llm."""
|
"""Return type of llm."""
|
||||||
return "vllm"
|
return "vllm"
|
||||||
|
|
||||||
|
|
||||||
|
class VLLMOpenAI(BaseOpenAI):
|
||||||
|
"""vLLM OpenAI-compatible API client"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _invocation_params(self) -> Dict[str, Any]:
|
||||||
|
"""Get the parameters used to invoke the model."""
|
||||||
|
openai_creds: Dict[str, Any] = {
|
||||||
|
"api_key": self.openai_api_key,
|
||||||
|
"api_base": self.openai_api_base,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"model": self.model_name,
|
||||||
|
**openai_creds,
|
||||||
|
**self._default_params,
|
||||||
|
"logit_bias": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _llm_type(self) -> str:
|
||||||
|
"""Return type of llm."""
|
||||||
|
return "vllm-openai"
|
||||||
|
Loading…
Reference in New Issue
Block a user