Native data AI application framework based on AWEL+AGENT (#1152)

Co-authored-by: Fangyin Cheng <staneyffer@gmail.com>
Co-authored-by: lcx01800250 <lcx01800250@alibaba-inc.com>
Co-authored-by: licunxing <864255598@qq.com>
Co-authored-by: Aralhi <xiaoping0501@gmail.com>
Co-authored-by: xuyuan23 <643854343@qq.com>
Co-authored-by: aries_ckt <916701291@qq.com>
Co-authored-by: hzh97 <2976151305@qq.com>
This commit is contained in:
明天
2024-02-07 17:43:27 +08:00
committed by GitHub
parent dbb9ac83b1
commit d5afa6e206
328 changed files with 22606 additions and 3282 deletions

View File

@@ -1,6 +1,8 @@
import asyncio
from typing import AsyncIterator, List, Optional
from dbgpt.core.awel import DAGVar
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.core.interface.llm import (
DefaultMessageConverter,
LLMClient,
@@ -13,6 +15,23 @@ from dbgpt.model.cluster.manager_base import WorkerManager
from dbgpt.model.parameter import WorkerType
@register_resource(
label="Default LLM Client",
name="default_llm_client",
category=ResourceCategory.LLM_CLIENT,
description="Default LLM client(Connect to your DB-GPT model serving)",
parameters=[
Parameter.build_from(
"Auto Convert Message",
name="auto_convert_message",
type=bool,
optional=True,
default=False,
description="Whether to auto convert the messages that are not supported "
"by the LLM to a compatible format",
)
],
)
class DefaultLLMClient(LLMClient):
"""Default LLM client implementation.
@@ -24,11 +43,28 @@ class DefaultLLMClient(LLMClient):
"""
def __init__(
self, worker_manager: WorkerManager, auto_convert_message: bool = False
self,
worker_manager: Optional[WorkerManager] = None,
auto_convert_message: bool = False,
):
self._worker_manager = worker_manager
self._auto_covert_message = auto_convert_message
@property
def worker_manager(self) -> WorkerManager:
"""Get the worker manager instance.
If not set, get the worker manager from the system app. If not set, raise
ValueError.
"""
if not self._worker_manager:
system_app = DAGVar.get_current_system_app()
if not system_app:
raise ValueError("System app is not initialized")
from dbgpt.model.cluster import WorkerManagerFactory
return WorkerManagerFactory.get_instance(system_app).create()
return self._worker_manager
async def generate(
self,
request: ModelRequest,
@@ -37,7 +73,7 @@ class DefaultLLMClient(LLMClient):
if not message_converter and self._auto_covert_message:
message_converter = DefaultMessageConverter()
request = await self.covert_message(request, message_converter)
return await self._worker_manager.generate(request.to_dict())
return await self.worker_manager.generate(request.to_dict())
async def generate_stream(
self,
@@ -47,18 +83,18 @@ class DefaultLLMClient(LLMClient):
if not message_converter and self._auto_covert_message:
message_converter = DefaultMessageConverter()
request = await self.covert_message(request, message_converter)
async for output in self._worker_manager.generate_stream(request.to_dict()):
async for output in self.worker_manager.generate_stream(request.to_dict()):
yield output
async def models(self) -> List[ModelMetadata]:
instances = await self._worker_manager.get_all_model_instances(
instances = await self.worker_manager.get_all_model_instances(
WorkerType.LLM.value, healthy_only=True
)
query_metadata_task = []
for instance in instances:
worker_name, _ = WorkerType.parse_worker_key(instance.worker_key)
query_metadata_task.append(
self._worker_manager.get_model_metadata({"model": worker_name})
self.worker_manager.get_model_metadata({"model": worker_name})
)
models: List[ModelMetadata] = await asyncio.gather(*query_metadata_task)
model_map = {}
@@ -67,6 +103,4 @@ class DefaultLLMClient(LLMClient):
return [model_map[model_name] for model_name in sorted(model_map.keys())]
async def count_token(self, model: str, prompt: str) -> int:
return await self._worker_manager.count_token(
{"model": model, "prompt": prompt}
)
return await self.worker_manager.count_token({"model": model, "prompt": prompt})

View File

@@ -3,8 +3,16 @@ from abc import ABC
from typing import Optional
from dbgpt.component import ComponentType
from dbgpt.core import LLMClient
from dbgpt.core import LLMClient, ModelOutput, ModelRequest
from dbgpt.core.awel import BaseOperator
from dbgpt.core.awel.flow import (
IOField,
OperatorCategory,
OperatorType,
Parameter,
ResourceCategory,
ViewMetadata,
)
from dbgpt.core.operators import BaseLLM, BaseLLMOperator, BaseStreamingLLMOperator
logger = logging.getLogger(__name__)
@@ -54,6 +62,36 @@ class LLMOperator(MixinLLMOperator, BaseLLMOperator):
and if we can't connect to the model serving cluster, we will use the :class:`OpenAILLMClient` as the llm_client.
"""
metadata = ViewMetadata(
label="LLM Operator",
name="llm_operator",
category=OperatorCategory.LLM,
description="The LLM operator.",
parameters=[
Parameter.build_from(
"LLM Client",
"llm_client",
LLMClient,
optional=True,
default=None,
description="The LLM Client.",
),
],
inputs=[
IOField.build_from(
"Model Request", "model_request", ModelRequest, "The model request."
)
],
outputs=[
IOField.build_from(
"Model Output",
"model_output",
ModelOutput,
description="The model output.",
)
],
)
def __init__(self, llm_client: Optional[LLMClient] = None, **kwargs):
super().__init__(llm_client)
BaseLLMOperator.__init__(self, llm_client, **kwargs)
@@ -68,6 +106,38 @@ class StreamingLLMOperator(MixinLLMOperator, BaseStreamingLLMOperator):
and if we can't connect to the model serving cluster, we will use the :class:`OpenAILLMClient` as the llm_client.
"""
metadata = ViewMetadata(
label="Streaming LLM Operator",
name="streaming_llm_operator",
operator_type=OperatorType.STREAMIFY,
category=OperatorCategory.LLM,
description="The streaming LLM operator.",
parameters=[
Parameter.build_from(
"LLM Client",
"llm_client",
LLMClient,
optional=True,
default=None,
description="The LLM Client.",
),
],
inputs=[
IOField.build_from(
"Model Request", "model_request", ModelRequest, "The model request."
)
],
outputs=[
IOField.build_from(
"Model Output",
"model_output",
ModelOutput,
description="The model output.",
is_list=True,
)
],
)
def __init__(self, llm_client: Optional[LLMClient] = None, **kwargs):
super().__init__(llm_client)
BaseStreamingLLMOperator.__init__(self, llm_client, **kwargs)

View File

@@ -12,6 +12,7 @@ from dbgpt.core import (
ModelRequest,
ModelRequestContext,
)
from dbgpt.core.awel.flow import Parameter, ResourceCategory, register_resource
from dbgpt.model.parameter import ProxyModelParameters
from dbgpt.model.proxy.base import ProxyLLMClient
from dbgpt.model.proxy.llms.proxy_model import ProxyModel
@@ -42,6 +43,32 @@ async def chatgpt_generate_stream(
yield r
@register_resource(
label="OpenAI LLM Client",
name="openai_llm_client",
category=ResourceCategory.LLM_CLIENT,
parameters=[
Parameter.build_from(
label="OpenAI API Key",
name="apk_key",
type=str,
optional=True,
default=None,
description="OpenAI API Key, not required if you have set OPENAI_API_KEY "
"environment variable.",
),
Parameter.build_from(
label="OpenAI API Base",
name="api_base",
type=str,
optional=True,
default=None,
description="OpenAI API Base, not required if you have set OPENAI_API_BASE "
"environment variable.",
),
],
documentation_url="https://github.com/openai/openai-python",
)
class OpenAILLMClient(ProxyLLMClient):
def __init__(
self,

View File

@@ -16,6 +16,14 @@ from typing import (
from dbgpt._private.pydantic import model_to_json
from dbgpt.core.awel import TransformStreamAbsOperator
from dbgpt.core.awel.flow import (
IOField,
OperatorCategory,
OperatorType,
Parameter,
ResourceCategory,
ViewMetadata,
)
from dbgpt.core.interface.llm import ModelOutput
from dbgpt.core.operators import BaseLLM
@@ -152,7 +160,34 @@ def _build_openai_client(init_params: OpenAIParameters) -> Tuple[str, ClientType
class OpenAIStreamingOutputOperator(TransformStreamAbsOperator[ModelOutput, str]):
"""Transform ModelOutput to openai stream format."""
async def transform_stream(self, input_value: AsyncIterator[ModelOutput]):
metadata = ViewMetadata(
label="OpenAI Streaming Output Operator",
name="openai_streaming_output_operator",
operator_type=OperatorType.TRANSFORM_STREAM,
category=OperatorCategory.OUTPUT_PARSER,
description="The OpenAI streaming LLM operator.",
parameters=[],
inputs=[
IOField.build_from(
"Upstream Model Output",
"model_output",
ModelOutput,
is_list=True,
description="The model output of upstream.",
)
],
outputs=[
IOField.build_from(
"Model Output",
"model_output",
str,
is_list=True,
description="The model output after transform to openai stream format",
)
],
)
async def transform_stream(self, model_output: AsyncIterator[ModelOutput]):
async def model_caller() -> str:
"""Read model name from share data.
In streaming mode, this transform_stream function will be executed
@@ -162,7 +197,7 @@ class OpenAIStreamingOutputOperator(TransformStreamAbsOperator[ModelOutput, str]
BaseLLM.SHARE_DATA_KEY_MODEL_NAME
)
async for output in _to_openai_stream(input_value, None, model_caller):
async for output in _to_openai_stream(model_output, None, model_caller):
yield output