feat:llm manage

This commit is contained in:
aries_ckt 2023-09-22 00:09:57 +08:00
parent 598619080d
commit 55d037ac4f

View File

@ -82,12 +82,6 @@ async def model_stop(request: WorkerStartupRequest):
try: try:
from pilot.model.cluster.controller.controller import BaseModelController from pilot.model.cluster.controller.controller import BaseModelController
# controller = CFG.SYSTEM_APP.get_component(
# ComponentType.MODEL_CONTROLLER, BaseModelController
# )
# instances = await controller.get_all_instances(
# model_name="WorkerManager@service", healthy_only=True
# )
worker_manager = CFG.SYSTEM_APP.get_component( worker_manager = CFG.SYSTEM_APP.get_component(
ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory
).create() ).create()
@ -95,17 +89,6 @@ async def model_stop(request: WorkerStartupRequest):
return Result.faild(code="E000X", msg=f"can not find worker manager") return Result.faild(code="E000X", msg=f"can not find worker manager")
request.params = {} request.params = {}
return Result.succ(await worker_manager.model_shutdown(request)) return Result.succ(await worker_manager.model_shutdown(request))
# worker_instance = None
# for instance in instances:
# if instance.host == request.host and instance.port == request.port:
# from pilot.model.cluster import ModelRegistryClient
# from pilot.model.cluster import RemoteWorkerManager
#
# registry = ModelRegistryClient(f"http://{request.host}:{request.port}")
# worker_manager = RemoteWorkerManager(registry)
# return Result.succ(await worker_manager.model_shutdown(request))
# if not worker_instance:
# return Result.faild(code="E000X", msg=f"can not find worker manager")
except Exception as e: except Exception as e:
return Result.faild(code="E000X", msg=f"model stop failed {e}") return Result.faild(code="E000X", msg=f"model stop failed {e}")
@ -114,30 +97,11 @@ async def model_stop(request: WorkerStartupRequest):
async def model_start(request: WorkerStartupRequest): async def model_start(request: WorkerStartupRequest):
print(f"/v1/worker/model/start:") print(f"/v1/worker/model/start:")
try: try:
# from pilot.model.cluster.controller.controller import BaseModelController
#
# controller = CFG.SYSTEM_APP.get_component(
# ComponentType.MODEL_CONTROLLER, BaseModelController
# )
# instances = await controller.get_all_instances(
# model_name="WorkerManager@service", healthy_only=True
# )
worker_manager = CFG.SYSTEM_APP.get_component( worker_manager = CFG.SYSTEM_APP.get_component(
ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory
).create() ).create()
if not worker_manager: if not worker_manager:
return Result.faild(code="E000X", msg=f"can not find worker manager") return Result.faild(code="E000X", msg=f"can not find worker manager")
return Result.succ(await worker_manager.model_startup(request)) return Result.succ(await worker_manager.model_startup(request))
# worker_instance = None
# for instance in instances:
# if instance.host == request.host and instance.port == request.port:
# from pilot.model.cluster import ModelRegistryClient
# from pilot.model.cluster import RemoteWorkerManager
#
# registry = ModelRegistryClient(f"http://{request.host}:{request.port}")
# worker_manager = RemoteWorkerManager(registry)
# return Result.succ(await worker_manager.model_startup(request))
# if not worker_instance:
# return Result.faild(code="E000X", msg=f"can not find worker manager")
except Exception as e: except Exception as e:
return Result.faild(code="E000X", msg=f"model start failed {e}") return Result.faild(code="E000X", msg=f"model start failed {e}")