feat: Multi-model command line

2025-08-02 08:40:36 +00:00 · 2023-08-30 11:07:35 +08:00 · 2023-08-30 11:07:35 +08:00 · dd86fb86b1
commit dd86fb86b1
parent d467092766
20 changed files with 317 additions and 35 deletions
--- a/pilot/model/cli.py
+++ b/pilot/model/cli.py
@ -0,0 +1,151 @@
 import click
 import functools
 from pilot.model.controller.registry import ModelRegistryClient
 from pilot.model.worker.manager import (
    RemoteWorkerManager,
    WorkerApplyRequest,
    WorkerApplyType,
 )
 from pilot.utils import get_or_create_event_loop
@click.group("model")
 def model_cli_group():
    pass
@model_cli_group.command()
@click.option(
    "--address",
    type=str,
    default="http://127.0.0.1:8000",
    required=False,
    help=(
        "Address of the Model Controller to connect to."
        "Just support light deploy model"
    ),
 )
@click.option(
    "--model-name", type=str, default=None, required=False, help=("The name of model")
 )
@click.option(
    "--model-type", type=str, default="llm", required=False, help=("The type of model")
 )
 def list(address: str, model_name: str, model_type: str):
    """List model instances"""
    from prettytable import PrettyTable
    loop = get_or_create_event_loop()
    registry = ModelRegistryClient(address)
    if not model_name:
        instances = loop.run_until_complete(registry.get_all_model_instances())
    else:
        if not model_type:
            model_type = "llm"
        register_model_name = f"{model_name}@{model_type}"
        instances = loop.run_until_complete(
            registry.get_all_instances(register_model_name)
        )
    table = PrettyTable()
    table.field_names = [
        "Model Name",
        "Model Type",
        "Host",
        "Port",
        "Healthy",
        "Enabled",
        "Prompt Template",
        "Last Heartbeat",
    ]
    for instance in instances:
        model_name, model_type = instance.model_name.split("@")
        table.add_row(
            [
                model_name,
                model_type,
                instance.host,
                instance.port,
                instance.healthy,
                instance.enabled,
                instance.prompt_template,
                instance.last_heartbeat,
            ]
        )
    print(table)
 def add_model_options(func):
    @click.option(
        "--address",
        type=str,
        default="http://127.0.0.1:8000",
        required=False,
        help=(
            "Address of the Model Controller to connect to."
            "Just support light deploy model"
        ),
    )
    @click.option(
        "--model-name",
        type=str,
        default=None,
        required=True,
        help=("The name of model"),
    )
    @click.option(
        "--model-type",
        type=str,
        default="llm",
        required=False,
        help=("The type of model"),
    )
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        return func(*args, **kwargs)
    return wrapper
@model_cli_group.command()
@add_model_options
 def stop(address: str, model_name: str, model_type: str):
    """Stop model instances"""
    worker_apply(address, model_name, model_type, WorkerApplyType.STOP)
@model_cli_group.command()
@add_model_options
 def start(address: str, model_name: str, model_type: str):
    """Start model instances"""
    worker_apply(address, model_name, model_type, WorkerApplyType.START)
@model_cli_group.command()
@add_model_options
 def restart(address: str, model_name: str, model_type: str):
    """Restart model instances"""
    worker_apply(address, model_name, model_type, WorkerApplyType.RESTART)
 # @model_cli_group.command()
 # @add_model_options
 # def modify(address: str, model_name: str, model_type: str):
 #     """Restart model instances"""
 #     worker_apply(address, model_name, model_type, WorkerApplyType.UPDATE_PARAMS)
 def worker_apply(
    address: str, model_name: str, model_type: str, apply_type: WorkerApplyType
 ):
    loop = get_or_create_event_loop()
    registry = ModelRegistryClient(address)
    worker_manager = RemoteWorkerManager(registry)
    apply_req = WorkerApplyRequest(
        model=model_name, worker_type=model_type, apply_type=apply_type
    )
    res = loop.run_until_complete(worker_manager.worker_apply(apply_req))
    print(res)
--- a/pilot/model/controller/controller.py
+++ b/pilot/model/controller/controller.py
@ -27,6 +27,9 @@ class ModelController:
        )
        return await self.registry.get_all_instances(model_name, healthy_only)
    async def get_all_model_instances(self) -> List[ModelInstance]:
        return await self.registry.get_all_model_instances()
    async def send_heartbeat(self, instance: ModelInstance) -> bool:
        return await self.registry.send_heartbeat(instance)
@ -51,10 +54,12 @@ async def api_deregister_instance(request: ModelInstance):
@router.get("/controller/models")
-async def api_get_all_instances(model_name: str, healthy_only: bool = False):
+async def api_get_all_instances(model_name: str = None, healthy_only: bool = False):
    if not model_name:
        return await controller.get_all_model_instances()
    return await controller.get_all_instances(model_name, healthy_only=healthy_only)
@router.post("/controller/heartbeat")
-async def api_get_all_instances(request: ModelInstance):
+async def api_model_heartbeat(request: ModelInstance):
    return await controller.send_heartbeat(request)
--- a/pilot/model/controller/registry.py
+++ b/pilot/model/controller/registry.py
@ -5,6 +5,7 @@ from abc import ABC, abstractmethod
 from collections import defaultdict
 from datetime import datetime, timedelta
 from typing import Dict, List, Tuple
 import itertools
 from pilot.model.base import ModelInstance
@ -57,6 +58,15 @@ class ModelRegistry(ABC):
        - List[ModelInstance]: A list of instances for the given model.
        """
    @abstractmethod
    async def get_all_model_instances(self) -> List[ModelInstance]:
        """
        Fetch all instances of all models
        Returns:
        - List[ModelInstance]: A list of instances for the all models.
        """
    async def select_one_health_instance(self, model_name: str) -> ModelInstance:
        """
        Selects one healthy and enabled instance for a given model.
@ -154,12 +164,15 @@ class EmbeddedModelRegistry(ModelRegistry):
    async def get_all_instances(
        self, model_name: str, healthy_only: bool = False
    ) -> List[ModelInstance]:
        print(self.registry)
        instances = self.registry[model_name]
        if healthy_only:
            instances = [ins for ins in instances if ins.healthy == True]
        return instances
    async def get_all_model_instances(self) -> List[ModelInstance]:
        print(self.registry)
        return list(itertools.chain(*self.registry.values()))
    async def send_heartbeat(self, instance: ModelInstance) -> bool:
        _, exist_ins = self._get_instances(
            instance.model_name, instance.host, instance.port, healthy_only=False
@ -194,6 +207,10 @@ class ModelRegistryClient(ModelRegistry):
    ) -> List[ModelInstance]:
        pass
    @api_remote(path="/api/controller/models")
    async def get_all_model_instances(self) -> List[ModelInstance]:
        pass
    @api_remote(path="/api/controller/models")
    async def select_one_health_instance(self, model_name: str) -> ModelInstance:
        instances = await self.get_all_instances(model_name, healthy_only=True)
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@ -118,7 +118,6 @@ class ModelLoader:
    def loader_with_params(self, model_params: ModelParameters):
        llm_adapter = get_llm_model_adapter(self.model_name, self.model_path)
        model_type = llm_adapter.model_type()
        param_cls = llm_adapter.model_param_class(model_type)
        self.prompt_template = model_params.prompt_template
        logger.info(f"model_params:\n{model_params}")
        if model_type == ModelType.HF:
--- a/pilot/model/parameter.py
+++ b/pilot/model/parameter.py
@ -184,6 +184,19 @@ class BaseParameters:
        return updated
    def __str__(self) -> str:
        class_name = self.__class__.__name__
        parameters = [
            f"\n\n=========================== {class_name} ===========================\n"
        ]
        for field_info in fields(self):
            value = getattr(self, field_info.name)
            parameters.append(f"{field_info.name}: {value}")
        parameters.append(
            "\n======================================================================\n\n"
        )
        return "\n".join(parameters)
@dataclass
 class ModelWorkerParameters(BaseParameters):
--- a/pilot/model/worker/default_worker.py
+++ b/pilot/model/worker/default_worker.py
@ -4,12 +4,12 @@ from typing import Dict, Iterator, List
 import torch
 from pilot.configs.model_config import DEVICE
-from pilot.model.adapter import get_llm_model_adapter
+from pilot.model.adapter import get_llm_model_adapter, BaseLLMAdaper
 from pilot.model.base import ModelOutput
 from pilot.model.loader import ModelLoader, _get_model_real_path
 from pilot.model.parameter import EnvArgumentParser, ModelParameters
 from pilot.model.worker.base import ModelWorker
-from pilot.server.chat_adapter import get_llm_chat_adapter
+from pilot.server.chat_adapter import get_llm_chat_adapter, BaseChatAdpter
 from pilot.utils.model_utils import _clear_torch_cache
 logger = logging.getLogger("model_worker")
@ -20,6 +20,8 @@ class DefaultModelWorker(ModelWorker):
        self.model = None
        self.tokenizer = None
        self._model_params = None
        self.llm_adapter: BaseLLMAdaper = None
        self.llm_chat_adapter: BaseChatAdpter = None
    def load_worker(self, model_name: str, model_path: str, **kwargs) -> None:
        if model_path.endswith("/"):
@ -28,9 +30,9 @@ class DefaultModelWorker(ModelWorker):
        self.model_name = model_name
        self.model_path = model_path
-        llm_adapter = get_llm_model_adapter(self.model_name, self.model_path)
+        self.llm_adapter = get_llm_model_adapter(self.model_name, self.model_path)
-        model_type = llm_adapter.model_type()
+        model_type = self.llm_adapter.model_type()
-        self.param_cls = llm_adapter.model_param_class(model_type)
+        self.param_cls = self.llm_adapter.model_param_class(model_type)
        self.llm_chat_adapter = get_llm_chat_adapter(self.model_name, self.model_path)
        self.generate_stream_func = self.llm_chat_adapter.get_generate_stream_func(
@ -50,12 +52,14 @@ class DefaultModelWorker(ModelWorker):
        param_cls = self.model_param_class()
        model_args = EnvArgumentParser()
        env_prefix = EnvArgumentParser.get_env_prefix(self.model_name)
        model_type = self.llm_adapter.model_type()
        model_params: ModelParameters = model_args.parse_args_into_dataclass(
            param_cls,
            env_prefix=env_prefix,
            command_args=command_args,
            model_name=self.model_name,
            model_path=self.model_path,
            model_type=model_type,
        )
        if not model_params.device:
            model_params.device = DEVICE
--- a/pilot/model/worker/manager.py
+++ b/pilot/model/worker/manager.py
@ -1,4 +1,5 @@
 import asyncio
 import httpx
 import itertools
 import json
 import os
@ -147,7 +148,7 @@ class LocalWorkerManager(WorkerManager):
        self.model_registry = model_registry
    def _worker_key(self, worker_type: str, model_name: str) -> str:
-        return f"$${worker_type}_$$_{model_name}"
+        return f"{model_name}@{worker_type}"
    def add_worker(
        self,
@ -311,7 +312,9 @@ class LocalWorkerManager(WorkerManager):
            # Apply to all workers
            worker_instances = list(itertools.chain(*self.workers.values()))
            logger.info(f"Apply to all workers: {worker_instances}")
-        await asyncio.gather(*(apply_func(worker) for worker in worker_instances))
+        return await asyncio.gather(
            *(apply_func(worker) for worker in worker_instances)
        )
    async def _start_all_worker(
        self, apply_req: WorkerApplyRequest
@ -423,6 +426,28 @@ class RemoteWorkerManager(LocalWorkerManager):
            worker_instances.append(wr)
        return worker_instances
    async def worker_apply(self, apply_req: WorkerApplyRequest) -> WorkerApplyOutput:
        async def _remote_apply_func(worker_run_data: WorkerRunData):
            worker_addr = worker_run_data.worker.worker_addr
            async with httpx.AsyncClient() as client:
                response = await client.post(
                    worker_addr + "/apply",
                    headers=worker_run_data.worker.headers,
                    json=apply_req.dict(),
                    timeout=worker_run_data.worker.timeout,
                )
                if response.status_code == 200:
                    output = WorkerApplyOutput(**response.json())
                    logger.info(f"worker_apply success: {output}")
                else:
                    output = WorkerApplyOutput(message=response.text)
                    logger.warn(f"worker_apply failed: {output}")
                return output
        results = await self._apply_worker(apply_req, _remote_apply_func)
        if results:
            return results[0]
 class WorkerManagerAdapter(WorkerManager):
    def __init__(self, worker_manager: WorkerManager = None) -> None:
--- a/pilot/scene/base_chat.py
+++ b/pilot/scene/base_chat.py
@ -28,10 +28,7 @@ from pilot.memory.chat_history.mem_history import MemHistoryMemory
 from pilot.memory.chat_history.duckdb_history import DuckdbHistoryMemory
 from pilot.configs.model_config import LOGDIR, DATASETS_DIR
-from pilot.utils import (
+from pilot.utils import build_logger, server_error_msg, get_or_create_event_loop
    build_logger,
    server_error_msg,
 )
 from pilot.scene.base_message import (
    BaseMessage,
    SystemMessage,
@ -222,13 +219,10 @@ class BaseChat(ABC):
        return self.current_ai_response()
    def _blocking_stream_call(self):
        import asyncio
        logger.warn(
            "_blocking_stream_call is only temporarily used in webserver and will be deleted soon, please use stream_call to replace it for higher performance"
        )
-        loop = asyncio.new_event_loop()
+        loop = get_or_create_event_loop()
        asyncio.set_event_loop(loop)
        async_gen = self.stream_call()
        while True:
            try:
@ -238,13 +232,10 @@ class BaseChat(ABC):
                break
    def _blocking_nostream_call(self):
        import asyncio
        logger.warn(
            "_blocking_nostream_call is only temporarily used in webserver and will be deleted soon, please use nostream_call to replace it for higher performance"
        )
-        loop = asyncio.new_event_loop()
+        loop = get_or_create_event_loop()
        asyncio.set_event_loop(loop)
        return loop.run_until_complete(self.nostream_call())
    def call(self):
--- a/pilot/scripts/init.py
+++ b/pilot/scripts/init.py
--- a/pilot/scripts/cli_scripts.py
+++ b/pilot/scripts/cli_scripts.py
@ -0,0 +1,45 @@
 import sys
 import click
 import os
 import copy
 import logging
 sys.path.append(
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
 )
@click.group()
@click.option(
    "--log-level",
    required=False,
    type=str,
    default="warn",
    help="Log level",
 )
@click.version_option()
 def cli(log_level: str):
    # TODO not working now
    logging.basicConfig(level=log_level, encoding="utf-8")
 def add_command_alias(command, name: str, hidden: bool = False):
    new_command = copy.deepcopy(command)
    new_command.hidden = hidden
    cli.add_command(new_command, name=name)
 try:
    from pilot.model.cli import model_cli_group
    add_command_alias(model_cli_group, name="model")
 except ImportError as e:
    logging.warning(f"Integrating dbgpt model command line tool failed: {e}")
 def main():
    return cli()
 if __name__ == "__main__":
    main()
--- a/pilot/server/dbgpt_server.py
+++ b/pilot/server/dbgpt_server.py
@ -75,14 +75,20 @@ app.include_router(api_editor_route_v1, prefix="/api")
 app.include_router(knowledge_router)
 # app.include_router(api_editor_route_v1)
 def mount_static_files(app):
    os.makedirs(static_message_img_path, exist_ok=True)
    app.mount(
-        "/images", StaticFiles(directory=static_message_img_path, html=True), name="static2"
+        "/images",
        StaticFiles(directory=static_message_img_path, html=True),
        name="static2",
    )
    app.mount(
        "/_next/static", StaticFiles(directory=static_file_path + "/_next/static")
    )
    app.mount("/_next/static", StaticFiles(directory=static_file_path + "/_next/static"))
    app.mount("/", StaticFiles(directory=static_file_path, html=True), name="static")
 app.add_exception_handler(RequestValidationError, validation_exception_handler)
 if __name__ == "__main__":
--- a/pilot/speech/brian.py
+++ b/pilot/speech/brian.py
@ -2,7 +2,6 @@ import logging
 import os
 import requests
 from playsound import playsound
 from pilot.speech.base import VoiceBase
@ -23,6 +22,8 @@ class BrianSpeech(VoiceBase):
        Returns:
            bool: True if the request was successful, False otherwise
        """
        from playsound import playsound
        tts_url = (
            f"https://api.streamelements.com/kappa/v2/speech?voice=Brian&text={text}"
        )
--- a/pilot/speech/eleven_labs.py
+++ b/pilot/speech/eleven_labs.py
@ -2,7 +2,6 @@
 import os
 import requests
 from playsound import playsound
 from pilot.configs.config import Config
 from pilot.speech.base import VoiceBase
@ -70,6 +69,7 @@ class ElevenLabsSpeech(VoiceBase):
            bool: True if the request was successful, False otherwise
        """
        from pilot.logs import logger
        from playsound import playsound
        tts_url = (
            f"https://api.elevenlabs.io/v1/text-to-speech/{self._voices[voice_index]}"
--- a/pilot/speech/gtts.py
+++ b/pilot/speech/gtts.py
@ -2,7 +2,6 @@
 import os
 import gtts
 from playsound import playsound
 from pilot.speech.base import VoiceBase
@ -15,6 +14,8 @@ class GTTSVoice(VoiceBase):
    def _speech(self, text: str, _: int = 0) -> bool:
        """Play the given text."""
        from playsound import playsound
        tts = gtts.gTTS(text)
        tts.save("speech.mp3")
        playsound("speech.mp3", True)
--- a/pilot/utils/init.py
+++ b/pilot/utils/init.py
@ -5,4 +5,5 @@ from .utils import (
    disable_torch_init,
    pretty_print_semaphore,
    server_error_msg,
    get_or_create_event_loop,
 )
--- a/pilot/utils/api_utils.py
+++ b/pilot/utils/api_utils.py
@ -1,6 +1,7 @@
 import httpx
 from inspect import signature
 import typing_inspect
 import logging
 from typing import get_type_hints, List, Type, TypeVar, Union, Optional, Tuple
 from dataclasses import is_dataclass, asdict
@ -21,7 +22,9 @@ def _api_remote(path, method="GET"):
            raise TypeError("Return type must be annotated in the decorated function.")
        actual_dataclass = _extract_dataclass_from_generic(return_type)
-        print(f"return_type: {return_type}, actual_dataclass: {actual_dataclass}")
+        logging.debug(
            f"return_type: {return_type}, actual_dataclass: {actual_dataclass}"
        )
        if not actual_dataclass:
            actual_dataclass = return_type
        sig = signature(func)
@ -57,7 +60,9 @@ def _api_remote(path, method="GET"):
            else:  # For GET, DELETE, etc.
                request_params["params"] = request_data
-            print(f"request_params: {request_params}, args: {args}, kwargs: {kwargs}")
+            logging.info(
                f"request_params: {request_params}, args: {args}, kwargs: {kwargs}"
            )
            async with httpx.AsyncClient() as client:
                response = await client.request(**request_params)
--- a/pilot/utils/utils.py
+++ b/pilot/utils/utils.py
@ -5,8 +5,8 @@ import logging
 import logging.handlers
 import os
 import sys
 import asyncio
 import torch
 from pilot.configs.model_config import LOGDIR
 server_error_msg = (
@ -17,6 +17,8 @@ handler = None
 def get_gpu_memory(max_gpus=None):
    import torch
    gpu_memory = []
    num_gpus = (
        torch.cuda.device_count()
@ -130,3 +132,15 @@ def pretty_print_semaphore(semaphore):
    if semaphore is None:
        return "None"
    return f"Semaphore(value={semaphore._value}, locked={semaphore.locked()})"
 def get_or_create_event_loop() -> asyncio.BaseEventLoop:
    try:
        loop = asyncio.get_event_loop()
    except Exception as e:
        if not "no running event loop" in str(e):
            raise e
        logging.warning("Cant not get running event loop, create new event loop now")
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    return loop
--- a/requirements.txt
+++ b/requirements.txt
@ -77,3 +77,6 @@ bardapi==0.1.29
 pymysql
 duckdb
 duckdb-engine
 # cli
 prettytable
--- a/setup.py
+++ b/setup.py
@ -267,7 +267,7 @@ def llama_cpp_python_cuda_requires():
    llama_cpp_version = "0.1.77"
    py_version = "cp310"
    os_pkg_name = "linux_x86_64" if os_type == OSType.LINUX else "win_amd64"
-    extra_index_url = f"{base_url}/llama_cpp_python_cuda-{llama_cpp_version}+{device}{cpu_avx}-{py_version}-{py_version}-{os_pkg_name}.whl"
+    extra_index_url = f"{base_url}/llama_cpp_python_cuda-{llama_cpp_version}+{device}-{py_version}-{py_version}-{os_pkg_name}.whl"
    extra_index_url, _ = encode_url(extra_index_url)
    print(f"Install llama_cpp_python_cuda from {extra_index_url}")
@ -361,7 +361,7 @@ setuptools.setup(
    extras_require=setup_spec.extras,
    entry_points={
        "console_scripts": [
-            "dbgpt_server=pilot.server:webserver",
+            "dbgpt=pilot.scripts.cli_scripts:main",
        ],
    },
 )
--- a/tools/cli/cli_scripts.py
+++ b/tools/cli/cli_scripts.py
@ -25,7 +25,6 @@ sys.path.append(
 from pilot.configs.model_config import DATASETS_DIR
 from tools.cli.knowledge_client import knowledge_init
 API_ADDRESS: str = "http://127.0.0.1:5000"
@ -97,6 +96,8 @@ def knowledge(
    verbose: bool,
 ):
    """Knowledge command line tool"""
    from tools.cli.knowledge_client import knowledge_init
    knowledge_init(
        API_ADDRESS,
        vector_name,