feat: 系统监控添加 Core/Celery Terminal; 修改检测终端状态逻辑; (#6570)

* feat: 系统监控添加 Core Terminal; 修改检测终端状态逻辑;

* feat: 添加management包

* feat: 添加management包

* feat: 添加 start 模块

* feat: 修改 start 模块

* feat: 修改启动命令目录结构

* feat: 修改启动命令目录结构

* feat: 修改启动命令目录结构

* feat: 修改启动命令目录结构

* feat: 修改启动命令目录结构

* feat: 修改启动命令目录结构

* feat: 修改启动命令目录结构

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

* feat: 修改启动脚本

Co-authored-by: Bai <bugatti_it@163.com>
This commit is contained in:
fit2bot
2021-08-06 19:16:18 +08:00
committed by GitHub
parent 8ad78ffef8
commit 39ce60c93a
30 changed files with 912 additions and 556 deletions

View File

@@ -0,0 +1,6 @@
from .services.command import BaseActionCommand, Action
class Command(BaseActionCommand):
help = 'Restart services'
action = Action.restart.value

View File

@@ -0,0 +1,139 @@
from django.core.management.base import BaseCommand, CommandError
from django.db.models import TextChoices
from .utils import ServicesUtil
from .hands import *
class Services(TextChoices):
gunicorn = 'gunicorn', 'gunicorn'
daphne = 'daphne', 'daphne'
celery_ansible = 'celery_ansible', 'celery_ansible'
celery_default = 'celery_default', 'celery_default'
beat = 'beat', 'beat'
flower = 'flower', 'flower'
ws = 'ws', 'ws'
web = 'web', 'web'
celery = 'celery', 'celery'
task = 'task', 'task'
all = 'all', 'all'
@classmethod
def get_service_object_class(cls, name):
from . import services
services_map = {
cls.gunicorn.value: services.GunicornService,
cls.daphne: services.DaphneService,
cls.flower: services.FlowerService,
cls.celery_default: services.CeleryDefaultService,
cls.celery_ansible: services.CeleryAnsibleService,
cls.beat: services.BeatService
}
return services_map.get(name)
@classmethod
def ws_services(cls):
return [cls.daphne]
@classmethod
def web_services(cls):
return [cls.gunicorn, cls.daphne]
@classmethod
def celery_services(cls):
return [cls.celery_ansible, cls.celery_default]
@classmethod
def task_services(cls):
return cls.celery_services() + [cls.beat]
@classmethod
def all_services(cls):
return cls.web_services() + cls.task_services()
@classmethod
def export_services_values(cls):
return [cls.all.value, cls.web.value, cls.task.value]
@classmethod
def get_service_objects(cls, service_names, **kwargs):
services = set()
for name in service_names:
method_name = f'{name}_services'
if hasattr(cls, method_name):
_services = getattr(cls, method_name)()
elif hasattr(cls, name):
_services = [getattr(cls, name)]
else:
continue
services.update(set(_services))
service_objects = []
for s in services:
service_class = cls.get_service_object_class(s.value)
if not service_class:
continue
kwargs.update({
'name': s.value
})
service_object = service_class(**kwargs)
service_objects.append(service_object)
return service_objects
class Action(TextChoices):
start = 'start', 'start'
status = 'status', 'status'
stop = 'stop', 'stop'
restart = 'restart', 'restart'
class BaseActionCommand(BaseCommand):
help = 'Service Base Command'
action = None
util = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
'services', nargs='+', choices=Services.export_services_values(), help='Service',
)
parser.add_argument('-d', '--daemon', nargs="?", const=True)
parser.add_argument('-w', '--worker', type=int, nargs="?", default=4)
parser.add_argument('-f', '--force', nargs="?", const=True)
def initial_util(self, *args, **options):
service_names = options.get('services')
service_kwargs = {
'worker_gunicorn': options.get('worker')
}
services = Services.get_service_objects(service_names=service_names, **service_kwargs)
kwargs = {
'services': services,
'daemon_run': options.get('daemon', False),
'daemon_stop': Services.all.value in service_names,
'force_stop': options.get('force', False),
}
self.util = ServicesUtil(**kwargs)
def handle(self, *args, **options):
self.initial_util(*args, **options)
assert self.action in Action.values, f'The action {self.action} is not in the optional list'
_handle = getattr(self, f'_handle_{self.action}', lambda: None)
_handle()
def _handle_start(self):
self.util.start_and_watch()
os._exit(0)
def _handle_stop(self):
self.util.stop()
def _handle_restart(self):
self.util.restart()
def _handle_status(self):
self.util.show_status()

View File

@@ -0,0 +1,26 @@
import os
import sys
import logging
from django.conf import settings
from apps.jumpserver.const import CONFIG
try:
from apps.jumpserver import const
__version__ = const.VERSION
except ImportError as e:
print("Not found __version__: {}".format(e))
print("Python is: ")
logging.info(sys.executable)
__version__ = 'Unknown'
sys.exit(1)
HTTP_HOST = CONFIG.HTTP_BIND_HOST or '127.0.0.1'
HTTP_PORT = CONFIG.HTTP_LISTEN_PORT or 8080
WS_PORT = CONFIG.WS_LISTEN_PORT or 8082
DEBUG = CONFIG.DEBUG or False
BASE_DIR = os.path.dirname(settings.BASE_DIR)
LOG_DIR = os.path.join(BASE_DIR, 'logs')
APPS_DIR = os.path.join(BASE_DIR, 'apps')
TMP_DIR = os.path.join(BASE_DIR, 'tmp')

View File

@@ -0,0 +1,6 @@
from .beat import *
from .celery_ansible import *
from .celery_default import *
from .daphne import *
from .flower import *
from .gunicorn import *

View File

@@ -0,0 +1,188 @@
import abc
import time
import shutil
import datetime
import threading
import subprocess
from ..hands import *
class BaseService(object):
def __init__(self, **kwargs):
self.name = kwargs['name']
self.process = None
self.STOP_TIMEOUT = 10
self.max_retry = 0
self.retry = 3
self.LOG_KEEP_DAYS = 7
self.EXIT_EVENT = threading.Event()
self.LOCK = threading.Lock()
@property
@abc.abstractmethod
def cmd(self):
return []
@property
@abc.abstractmethod
def cwd(self):
return ''
@property
def is_running(self):
if self.pid == 0:
return False
try:
os.kill(self.pid, 0)
except (OSError, ProcessLookupError):
return False
else:
return True
def show_status(self):
if self.is_running:
msg = f'{self.name} is running: {self.pid}.'
else:
msg = f'{self.name} is stopped.'
print(msg)
# -- log --
@property
def log_filename(self):
return f'{self.name}.log'
@property
def log_filepath(self):
return os.path.join(LOG_DIR, self.log_filename)
@property
def log_file(self):
return open(self.log_filepath, 'a')
@property
def log_dir(self):
return os.path.dirname(self.log_filepath)
# -- end log --
# -- pid --
@property
def pid_filepath(self):
return os.path.join(TMP_DIR, f'{self.name}.pid')
@property
def pid(self):
if not os.path.isfile(self.pid_filepath):
return 0
with open(self.pid_filepath) as f:
try:
pid = int(f.read().strip())
except ValueError:
pid = 0
return pid
def write_pid(self):
with open(self.pid_filepath, 'w') as f:
f.write(str(self.process.pid))
def remove_pid(self):
if os.path.isfile(self.pid_filepath):
os.unlink(self.pid_filepath)
# -- end pid --
# -- action --
def open_subprocess(self):
kwargs = {'cwd': self.cwd, 'stderr': self.log_file, 'stdout': self.log_file}
self.process = subprocess.Popen(self.cmd, **kwargs)
def start(self):
if self.is_running:
self.show_status()
return
self.remove_pid()
self.open_subprocess()
self.write_pid()
self.start_other()
def start_other(self):
pass
def stop(self, force=True):
if not self.is_running:
self.show_status()
# self.remove_pid()
return
print(f'Stop service: {self.name}', end='')
sig = 9 if force else 15
os.kill(self.pid, sig)
try:
self.process.wait(2)
except:
pass
for i in range(self.STOP_TIMEOUT):
if i == self.STOP_TIMEOUT - 1:
print("\033[31m Error\033[0m")
if not self.is_running:
print("\033[32m Ok\033[0m")
self.remove_pid()
break
else:
time.sleep(1)
continue
def watch(self):
with self.LOCK:
self._check()
if not self.is_running:
self._restart()
self._rotate_log()
def _check(self):
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f"{now} Check service status: {self.name} -> ", end='')
try:
self.process.wait(timeout=1) # 不wait子进程可能无法回收
except subprocess.TimeoutExpired:
pass
if self.is_running:
print(f'running at {self.process.pid}')
else:
print(f'stopped with code: {self.process.returncode}({self.process.pid})')
def _restart(self):
if self.retry > self.max_retry:
logging.info("Service start failed, exit: ", self.name)
self.EXIT_EVENT.set()
return
self.retry += 1
logging.info(f'> Find {self.name} stopped, retry {self.retry}, {self.process.pid}')
self.start()
def _rotate_log(self):
now = datetime.datetime.now()
_time = now.strftime('%H:%M')
if _time != '23:59':
return
backup_date = now.strftime('%Y-%m-%d')
backup_log_dir = os.path.join(self.log_dir, backup_date)
if not os.path.exists(backup_log_dir):
os.mkdir(backup_log_dir)
backup_log_path = os.path.join(backup_log_dir, self.log_filename)
if os.path.isfile(self.log_filepath) and not os.path.isfile(backup_log_path):
logging.info(f'Rotate log file: {self.log_filepath} => {backup_log_path}')
shutil.copy(self.log_filepath, backup_log_path)
with open(self.log_filepath, 'w') as f:
pass
to_delete_date = now - datetime.timedelta(days=self.LOG_KEEP_DAYS)
to_delete_dir = os.path.join(LOG_DIR, to_delete_date.strftime('%Y-%m-%d'))
if os.path.exists(to_delete_dir):
logging.info(f'Remove old log: {to_delete_dir}')
shutil.rmtree(to_delete_dir, ignore_errors=True)
# -- end action --

View File

@@ -0,0 +1,25 @@
from ..hands import *
from .base import BaseService
from django.core.cache import cache
__all__ = ['BeatService']
class BeatService(BaseService):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.lock = cache.lock('beat-distribute-start-lock', expire=60)
@property
def cmd(self):
print("\n- Start Beat as Periodic Task Scheduler")
cmd = [
sys.executable, 'start_celery_beat.py',
]
return cmd
@property
def cwd(self):
return os.path.join(BASE_DIR, 'utils')

View File

@@ -0,0 +1,11 @@
from .celery_base import CeleryBaseService
__all__ = ['CeleryAnsibleService']
class CeleryAnsibleService(CeleryBaseService):
def __init__(self, **kwargs):
kwargs['queue'] = 'ansible'
super().__init__(**kwargs)

View File

@@ -0,0 +1,38 @@
from ..hands import *
from .base import BaseService
class CeleryBaseService(BaseService):
def __init__(self, queue, num=10, **kwargs):
super().__init__(**kwargs)
self.queue = queue
self.num = num
@property
def cmd(self):
print('\n- Start Celery as Distributed Task Queue: {}'.format(self.queue.capitalize()))
os.environ.setdefault('PYTHONOPTIMIZE', '1')
os.environ.setdefault('ANSIBLE_FORCE_COLOR', 'True')
if os.getuid() == 0:
os.environ.setdefault('C_FORCE_ROOT', '1')
server_hostname = os.environ.get("SERVER_HOSTNAME")
if not server_hostname:
server_hostname = '%h'
cmd = [
'celery', 'worker',
'-P', 'threads',
'-A', 'ops',
'-l', 'INFO',
'-c', str(self.num),
'-Q', self.queue,
'-n', f'{self.queue}@{server_hostname}'
]
return cmd
@property
def cwd(self):
return APPS_DIR

View File

@@ -0,0 +1,16 @@
from .celery_base import CeleryBaseService
__all__ = ['CeleryDefaultService']
class CeleryDefaultService(CeleryBaseService):
def __init__(self, **kwargs):
kwargs['queue'] = 'celery'
super().__init__(**kwargs)
def start_other(self):
from terminal.startup import CeleryTerminal
celery_terminal = CeleryTerminal()
celery_terminal.start_heartbeat_thread()

View File

@@ -0,0 +1,25 @@
from ..hands import *
from .base import BaseService
__all__ = ['DaphneService']
class DaphneService(BaseService):
def __init__(self, **kwargs):
super().__init__(**kwargs)
@property
def cmd(self):
print("\n- Start Daphne ASGI WS Server")
cmd = [
'daphne', 'jumpserver.asgi:application',
'-b', HTTP_HOST,
'-p', str(WS_PORT),
]
return cmd
@property
def cwd(self):
return APPS_DIR

View File

@@ -0,0 +1,31 @@
from ..hands import *
from .base import BaseService
__all__ = ['FlowerService']
class FlowerService(BaseService):
def __init__(self, **kwargs):
super().__init__(**kwargs)
@property
def cmd(self):
print("\n- Start Flower as Task Monitor")
if os.getuid() == 0:
os.environ.setdefault('C_FORCE_ROOT', '1')
cmd = [
'celery', 'flower',
'-A', 'ops',
'-l', 'INFO',
'--url_prefix=/core/flower',
'--auto_refresh=False',
'--max_tasks=1000',
'--tasks_columns=uuid,name,args,state,received,started,runtime,worker'
]
return cmd
@property
def cwd(self):
return APPS_DIR

View File

@@ -0,0 +1,40 @@
from ..hands import *
from .base import BaseService
__all__ = ['GunicornService']
class GunicornService(BaseService):
def __init__(self, **kwargs):
self.worker = kwargs['worker_gunicorn']
super().__init__(**kwargs)
@property
def cmd(self):
print("\n- Start Gunicorn WSGI HTTP Server")
log_format = '%(h)s %(t)s %(L)ss "%(r)s" %(s)s %(b)s '
bind = f'{HTTP_HOST}:{HTTP_PORT}'
cmd = [
'gunicorn', 'jumpserver.wsgi',
'-b', bind,
'-k', 'gthread',
'--threads', '10',
'-w', str(self.worker),
'--max-requests', '4096',
'--access-logformat', log_format,
'--access-logfile', '-'
]
if DEBUG:
cmd.append('--reload')
return cmd
@property
def cwd(self):
return APPS_DIR
def start_other(self):
from terminal.startup import CoreTerminal
core_terminal = CoreTerminal()
core_terminal.start_heartbeat_thread()

View File

@@ -0,0 +1,141 @@
import threading
import signal
import time
import daemon
from daemon import pidfile
from .hands import *
from .hands import __version__
from .services.base import BaseService
class ServicesUtil(object):
def __init__(self, services, daemon_run=False, force_stop=True, daemon_stop=False):
self._services = services
self.daemon_run = daemon_run
self.force_stop = force_stop
self.daemon_stop = daemon_stop
self.EXIT_EVENT = threading.Event()
self.check_interval = 30
self.files_preserve_map = {}
def restart(self):
self.stop()
time.sleep(5)
self.start_and_watch()
def start_and_watch(self):
logging.info(time.ctime())
logging.info(f'JumpServer version {__version__}, more see https://www.jumpserver.org')
self.start()
if self.daemon_run:
self.show_status()
with self.daemon_context:
self.watch()
else:
self.watch()
def start(self):
for service in self._services:
service: BaseService
service.start()
self.files_preserve_map[service.name] = service.log_file
time.sleep(1)
def stop(self):
for service in self._services:
service: BaseService
service.stop(force=self.force_stop)
if self.daemon_stop:
self.stop_daemon()
# -- watch --
def watch(self):
while not self.EXIT_EVENT.is_set():
try:
_exit = self._watch()
if _exit:
break
time.sleep(self.check_interval)
except KeyboardInterrupt:
print('Start stop services')
break
self.clean_up()
def _watch(self):
for service in self._services:
service: BaseService
service.watch()
if service.EXIT_EVENT.is_set():
self.EXIT_EVENT.set()
return True
return False
# -- end watch --
def clean_up(self):
if not self.EXIT_EVENT.is_set():
self.EXIT_EVENT.set()
for service in self._services:
service: BaseService
service.stop(force=self.force_stop)
def show_status(self):
for service in self._services:
service: BaseService
service.show_status()
# -- daemon --
def stop_daemon(self):
if self.daemon_pid and self.daemon_is_running:
os.kill(self.daemon_pid, 15)
self.remove_daemon_pid()
def remove_daemon_pid(self):
if os.path.isfile(self.daemon_pid_filepath):
os.unlink(self.daemon_pid_filepath)
@property
def daemon_pid(self):
if not os.path.isfile(self.daemon_pid_filepath):
return 0
with open(self.daemon_pid_filepath) as f:
try:
pid = int(f.read().strip())
except ValueError:
pid = 0
return pid
@property
def daemon_is_running(self):
try:
os.kill(self.daemon_pid, 0)
except (OSError, ProcessLookupError):
return False
else:
return True
@property
def daemon_pid_filepath(self):
return os.path.join(TMP_DIR, 'jms.pid')
@property
def daemon_log_filepath(self):
return os.path.join(LOG_DIR, 'jms.log')
@property
def daemon_context(self):
daemon_log_file = open(self.daemon_log_filepath, 'a')
context = daemon.DaemonContext(
pidfile=pidfile.TimeoutPIDLockFile(self.daemon_pid_filepath),
signal_map={
signal.SIGTERM: lambda x, y: self.clean_up(),
signal.SIGHUP: 'terminate',
},
stdout=daemon_log_file,
stderr=daemon_log_file,
files_preserve=list(self.files_preserve_map.values()),
detach_process=True,
)
return context
# -- end daemon --

View File

@@ -0,0 +1,6 @@
from .services.command import BaseActionCommand, Action
class Command(BaseActionCommand):
help = 'Start services'
action = Action.start.value

View File

@@ -0,0 +1,6 @@
from .services.command import BaseActionCommand, Action
class Command(BaseActionCommand):
help = 'Show services status'
action = Action.status.value

View File

@@ -0,0 +1,6 @@
from .services.command import BaseActionCommand, Action
class Command(BaseActionCommand):
help = 'Stop services'
action = Action.stop.value