refactor: Refactor proxy LLM (#1064)

This commit is contained in:
Fangyin Cheng
2024-01-14 21:01:37 +08:00
committed by GitHub
parent a035433170
commit 22bfd01c4b
95 changed files with 2049 additions and 1294 deletions

View File

@@ -1,259 +1,231 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import importlib.metadata as metadata
import logging
import os
from typing import List
from concurrent.futures import Executor
from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Optional, Union
import httpx
from dbgpt.core.interface.message import ModelMessage, ModelMessageRoleType
from dbgpt.core import (
MessageConverter,
ModelMetadata,
ModelOutput,
ModelRequest,
ModelRequestContext,
)
from dbgpt.model.parameter import ProxyModelParameters
from dbgpt.model.proxy.base import ProxyLLMClient
from dbgpt.model.proxy.llms.proxy_model import ProxyModel
from dbgpt.model.utils.chatgpt_utils import OpenAIParameters
if TYPE_CHECKING:
from httpx._types import ProxiesTypes
from openai import AsyncAzureOpenAI, AsyncOpenAI
ClientType = Union[AsyncAzureOpenAI, AsyncOpenAI]
logger = logging.getLogger(__name__)
def _initialize_openai(params: ProxyModelParameters):
try:
import openai
except ImportError as exc:
raise ValueError(
"Could not import python package: openai "
"Please install openai by command `pip install openai` "
) from exc
api_type = params.proxy_api_type or os.getenv("OPENAI_API_TYPE", "open_ai")
api_base = params.proxy_api_base or os.getenv(
"OPENAI_API_TYPE",
os.getenv("AZURE_OPENAI_ENDPOINT") if api_type == "azure" else None,
)
api_key = params.proxy_api_key or os.getenv(
"OPENAI_API_KEY",
os.getenv("AZURE_OPENAI_KEY") if api_type == "azure" else None,
)
api_version = params.proxy_api_version or os.getenv("OPENAI_API_VERSION")
if not api_base and params.proxy_server_url:
# Adapt previous proxy_server_url configuration
api_base = params.proxy_server_url.split("/chat/completions")[0]
if api_type:
openai.api_type = api_type
if api_base:
openai.api_base = api_base
if api_key:
openai.api_key = api_key
if api_version:
openai.api_version = api_version
if params.http_proxy:
openai.proxy = params.http_proxy
openai_params = {
"api_type": api_type,
"api_base": api_base,
"api_version": api_version,
"proxy": params.http_proxy,
}
return openai_params
def _initialize_openai_v1(params: ProxyModelParameters):
try:
from openai import OpenAI
except ImportError as exc:
raise ValueError(
"Could not import python package: openai "
"Please install openai by command `pip install openai"
)
api_type = params.proxy_api_type or os.getenv("OPENAI_API_TYPE", "open_ai")
base_url = params.proxy_api_base or os.getenv(
"OPENAI_API_BASE",
os.getenv("AZURE_OPENAI_ENDPOINT") if api_type == "azure" else None,
)
api_key = params.proxy_api_key or os.getenv(
"OPENAI_API_KEY",
os.getenv("AZURE_OPENAI_KEY") if api_type == "azure" else None,
)
api_version = params.proxy_api_version or os.getenv("OPENAI_API_VERSION")
if not base_url and params.proxy_server_url:
# Adapt previous proxy_server_url configuration
base_url = params.proxy_server_url.split("/chat/completions")[0]
proxies = params.http_proxy
openai_params = {
"api_key": api_key,
"base_url": base_url,
}
return openai_params, api_type, api_version, proxies
def __convert_2_gpt_messages(messages: List[ModelMessage]):
gpt_messages = []
last_usr_message = ""
system_messages = []
# TODO: We can't change message order in low level
for message in messages:
if message.role == ModelMessageRoleType.HUMAN or message.role == "user":
last_usr_message = message.content
elif message.role == ModelMessageRoleType.SYSTEM:
system_messages.append(message.content)
elif message.role == ModelMessageRoleType.AI or message.role == "assistant":
last_ai_message = message.content
gpt_messages.append({"role": "user", "content": last_usr_message})
gpt_messages.append({"role": "assistant", "content": last_ai_message})
if len(system_messages) > 0:
if len(system_messages) < 2:
gpt_messages.insert(0, {"role": "system", "content": system_messages[0]})
gpt_messages.append({"role": "user", "content": last_usr_message})
else:
gpt_messages.append({"role": "user", "content": system_messages[1]})
else:
last_message = messages[-1]
if last_message.role == ModelMessageRoleType.HUMAN:
gpt_messages.append({"role": "user", "content": last_message.content})
return gpt_messages
def _build_request(model: ProxyModel, params):
model_params = model.get_params()
logger.info(f"Model: {model}, model_params: {model_params}")
messages: List[ModelMessage] = params["messages"]
# history = __convert_2_gpt_messages(messages)
convert_to_compatible_format = params.get("convert_to_compatible_format", False)
history = ModelMessage.to_openai_messages(
messages, convert_to_compatible_format=convert_to_compatible_format
)
payloads = {
"temperature": params.get("temperature"),
"max_tokens": params.get("max_new_tokens"),
"stream": True,
}
proxyllm_backend = model_params.proxyllm_backend
if metadata.version("openai") >= "1.0.0":
openai_params, api_type, api_version, proxies = _initialize_openai_v1(
model_params
)
proxyllm_backend = proxyllm_backend or "gpt-3.5-turbo"
payloads["model"] = proxyllm_backend
else:
openai_params = _initialize_openai(model_params)
if openai_params["api_type"] == "azure":
# engine = "deployment_name".
proxyllm_backend = proxyllm_backend or "gpt-35-turbo"
payloads["engine"] = proxyllm_backend
else:
proxyllm_backend = proxyllm_backend or "gpt-3.5-turbo"
payloads["model"] = proxyllm_backend
logger.info(f"Send request to real model {proxyllm_backend}")
return history, payloads
def chatgpt_generate_stream(
async def chatgpt_generate_stream(
model: ProxyModel, tokenizer, params, device, context_len=2048
):
if metadata.version("openai") >= "1.0.0":
model_params = model.get_params()
openai_params, api_type, api_version, proxies = _initialize_openai_v1(
model_params
client: OpenAILLMClient = model.proxy_llm_client
context = ModelRequestContext(stream=True, user_name=params.get("user_name"))
request = ModelRequest.build_request(
client.default_model,
messages=params["messages"],
temperature=params.get("temperature"),
context=context,
max_new_tokens=params.get("max_new_tokens"),
)
async for r in client.generate_stream(request):
yield r
class OpenAILLMClient(ProxyLLMClient):
def __init__(
self,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_type: Optional[str] = None,
api_version: Optional[str] = None,
model: Optional[str] = None,
proxies: Optional["ProxiesTypes"] = None,
timeout: Optional[int] = 240,
model_alias: Optional[str] = "chatgpt_proxyllm",
context_length: Optional[int] = 8192,
openai_client: Optional["ClientType"] = None,
openai_kwargs: Optional[Dict[str, Any]] = None,
**kwargs,
):
try:
import openai
except ImportError as exc:
raise ValueError(
"Could not import python package: openai "
"Please install openai by command `pip install openai"
) from exc
self._openai_version = metadata.version("openai")
self._openai_less_then_v1 = not self._openai_version >= "1.0.0"
self._init_params = OpenAIParameters(
api_type=api_type,
api_base=api_base,
api_key=api_key,
api_version=api_version,
proxies=proxies,
full_url=kwargs.get("full_url"),
)
history, payloads = _build_request(model, params)
if api_type == "azure":
from openai import AzureOpenAI
client = AzureOpenAI(
api_key=openai_params["api_key"],
api_version=api_version,
azure_endpoint=openai_params["base_url"],
http_client=httpx.Client(proxies=proxies),
self._model = model
self._proxies = proxies
self._timeout = timeout
self._model_alias = model_alias
self._context_length = context_length
self._api_type = api_type
self._client = openai_client
self._openai_kwargs = openai_kwargs or {}
super().__init__(model_names=[model_alias], context_length=context_length)
@classmethod
def new_client(
cls,
model_params: ProxyModelParameters,
default_executor: Optional[Executor] = None,
) -> "OpenAILLMClient":
return cls(
api_key=model_params.proxy_api_key,
api_base=model_params.proxy_api_base,
api_type=model_params.proxy_api_type,
api_version=model_params.proxy_api_version,
model=model_params.proxyllm_backend,
proxies=model_params.http_proxy,
model_alias=model_params.model_name,
context_length=max(model_params.max_context_size, 8192),
full_url=model_params.proxy_server_url,
)
@property
def client(self) -> ClientType:
if self._openai_less_then_v1:
raise ValueError(
"Current model (Load by OpenAILLMClient) require openai.__version__>=1.0.0"
)
else:
from openai import OpenAI
if self._client is None:
from dbgpt.model.utils.chatgpt_utils import _build_openai_client
client = OpenAI(**openai_params, http_client=httpx.Client(proxies=proxies))
res = client.chat.completions.create(messages=history, **payloads)
self._api_type, self._client = _build_openai_client(
init_params=self._init_params
)
return self._client
@property
def default_model(self) -> str:
model = self._model
if not model:
model = "gpt-35-turbo" if self._api_type == "azure" else "gpt-3.5-turbo"
return model
def _build_request(
self, request: ModelRequest, stream: Optional[bool] = False
) -> Dict[str, Any]:
payload = {"stream": stream}
model = request.model or self.default_model
if self._openai_less_then_v1 and self._api_type == "azure":
payload["engine"] = model
else:
payload["model"] = model
# Apply openai kwargs
for k, v in self._openai_kwargs.items():
payload[k] = v
if request.temperature:
payload["temperature"] = request.temperature
if request.max_new_tokens:
payload["max_tokens"] = request.max_new_tokens
return payload
async def generate(
self,
request: ModelRequest,
message_converter: Optional[MessageConverter] = None,
) -> ModelOutput:
request = self.local_covert_message(request, message_converter)
messages = request.to_common_messages()
payload = self._build_request(request)
logger.info(
f"Send request to openai({self._openai_version}), payload: {payload}\n\n messages:\n{messages}"
)
try:
if self._openai_less_then_v1:
return await self.generate_less_then_v1(messages, payload)
else:
return await self.generate_v1(messages, payload)
except Exception as e:
return ModelOutput(
text=f"**LLMServer Generate Error, Please CheckErrorInfo.**: {e}",
error_code=1,
)
async def generate_stream(
self,
request: ModelRequest,
message_converter: Optional[MessageConverter] = None,
) -> AsyncIterator[ModelOutput]:
request = self.local_covert_message(request, message_converter)
messages = request.to_common_messages()
payload = self._build_request(request, stream=True)
logger.info(
f"Send request to openai({self._openai_version}), payload: {payload}\n\n messages:\n{messages}"
)
if self._openai_less_then_v1:
async for r in self.generate_stream_less_then_v1(messages, payload):
yield r
else:
async for r in self.generate_stream_v1(messages, payload):
yield r
async def generate_v1(
self, messages: List[Dict[str, Any]], payload: Dict[str, Any]
) -> ModelOutput:
chat_completion = await self.client.chat.completions.create(
messages=messages, **payload
)
text = chat_completion.choices[0].message.content
usage = chat_completion.usage.dict()
return ModelOutput(text=text, error_code=0, usage=usage)
async def generate_less_then_v1(
self, messages: List[Dict[str, Any]], payload: Dict[str, Any]
) -> ModelOutput:
import openai
chat_completion = await openai.ChatCompletion.acreate(
messages=messages, **payload
)
text = chat_completion.choices[0].message.content
usage = chat_completion.usage.to_dict()
return ModelOutput(text=text, error_code=0, usage=usage)
async def generate_stream_v1(
self, messages: List[Dict[str, Any]], payload: Dict[str, Any]
) -> AsyncIterator[ModelOutput]:
chat_completion = await self.client.chat.completions.create(
messages=messages, **payload
)
text = ""
for r in res:
# logger.info(str(r))
# Azure Openai reponse may have empty choices body in the first chunk
# to avoid index out of range error
async for r in chat_completion:
if len(r.choices) == 0:
continue
if r.choices[0].delta.content is not None:
content = r.choices[0].delta.content
text += content
yield text
yield ModelOutput(text=text, error_code=0)
else:
async def generate_stream_less_then_v1(
self, messages: List[Dict[str, Any]], payload: Dict[str, Any]
) -> AsyncIterator[ModelOutput]:
import openai
history, payloads = _build_request(model, params)
res = openai.ChatCompletion.create(messages=history, **payloads)
text = ""
for r in res:
if len(r.choices) == 0:
continue
if r["choices"][0]["delta"].get("content") is not None:
content = r["choices"][0]["delta"]["content"]
text += content
yield text
async def async_chatgpt_generate_stream(
model: ProxyModel, tokenizer, params, device, context_len=2048
):
if metadata.version("openai") >= "1.0.0":
model_params = model.get_params()
openai_params, api_type, api_version, proxies = _initialize_openai_v1(
model_params
)
history, payloads = _build_request(model, params)
if api_type == "azure":
from openai import AsyncAzureOpenAI
client = AsyncAzureOpenAI(
api_key=openai_params["api_key"],
api_version=api_version,
azure_endpoint=openai_params["base_url"],
http_client=httpx.AsyncClient(proxies=proxies),
)
else:
from openai import AsyncOpenAI
client = AsyncOpenAI(
**openai_params, http_client=httpx.AsyncClient(proxies=proxies)
)
res = await client.chat.completions.create(messages=history, **payloads)
text = ""
for r in res:
if not r.get("choices"):
continue
if r.choices[0].delta.content is not None:
content = r.choices[0].delta.content
text += content
yield text
else:
import openai
history, payloads = _build_request(model, params)
res = await openai.ChatCompletion.acreate(messages=history, **payloads)
res = await openai.ChatCompletion.acreate(messages=messages, **payload)
text = ""
async for r in res:
if not r.get("choices"):
@@ -261,4 +233,21 @@ async def async_chatgpt_generate_stream(
if r["choices"][0]["delta"].get("content") is not None:
content = r["choices"][0]["delta"]["content"]
text += content
yield text
yield ModelOutput(text=text, error_code=0)
async def models(self) -> List[ModelMetadata]:
model_metadata = ModelMetadata(
model=self._model_alias,
context_length=await self.get_context_length(),
)
return [model_metadata]
async def get_context_length(self) -> int:
"""Get the context length of the model.
Returns:
int: The context length.
# TODO: This is a temporary solution. We should have a better way to get the context length.
eg. get real context length from the openai api.
"""
return self._context_length