feat: 添加组件监控;TerminalModel添加type字段; (#5206)

* feat: 添加组件监控;TerminalModel添加type字段;

* feat: Terminal序列类添加type字段

* feat: Terminal序列类添加type字段为只读

* feat: 修改组件status文案

* feat: 取消上传组件状态序列类count字段

* reactor: 修改termina/models目录结构

* feat: 修改ComponentTypeChoices

* feat: 取消考虑CoreComponent类型

* feat: 修改Terminal status判断逻辑

* feat: 终端列表添加status过滤; 组件状态序列类添加default值

* feat: 添加PrometheusMetricsAPI

* feat: 修改PrometheusMetricsAPI

Co-authored-by: Bai <bugatti_it@163.com>
This commit is contained in:
fit2bot
2020-12-10 20:50:22 +08:00
committed by GitHub
parent d4feaf1e08
commit 856e7c16e5
20 changed files with 902 additions and 495 deletions

View File

@@ -11,6 +11,7 @@ import jms_storage
from common.tasks import send_mail_async
from common.utils import get_logger, reverse
from settings.models import Setting
from . import const
from .models import ReplayStorage, Session, Command
@@ -101,3 +102,104 @@ def send_command_alert_mail(command):
logger.debug(message)
send_mail_async.delay(subject, message, recipient_list, html_message=message)
class ComponentsMetricsUtil(object):
def __init__(self, component_type=None):
self.type = component_type
self.components = []
self.initial_components()
def initial_components(self):
from .models import Terminal
terminals = Terminal.objects.all().order_by('type')
if self.type:
terminals = terminals.filter(type=self.type)
self.components = list(terminals)
def get_metrics(self):
total_count = normal_count = high_count = critical_count = session_active_total = 0
for component in self.components:
total_count += 1
if not component.is_alive:
critical_count += 1
continue
session_active_total += component.state.get('session_active_count', 0)
if component.is_normal:
normal_count += 1
elif component.is_high:
high_count += 1
else:
critical_count += 1
metrics = {
'total': total_count,
'normal': normal_count,
'high': high_count,
'critical': critical_count,
'session_active': session_active_total
}
return metrics
class ComponentsPrometheusMetricsUtil(ComponentsMetricsUtil):
@staticmethod
def get_status_metrics(metrics):
return {
'any': metrics['total'],
'normal': metrics['normal'],
'high': metrics['high'],
'critical': metrics['critical']
}
def get_prometheus_metrics_text(self):
prometheus_metrics = []
prometheus_metrics.append('# JumpServer 各组件状态个数汇总')
base_status_metric_text = 'jumpserver_components_status_total{component_type="%s", status="%s"} %s'
for component in self.components:
component_type = component.type
base_metrics = self.get_metrics()
prometheus_metrics.append(f'## 组件: {component_type}')
status_metrics = self.get_status_metrics(base_metrics)
for status, value in status_metrics.items():
metric_text = base_status_metric_text % (component_type, status, value)
prometheus_metrics.append(metric_text)
prometheus_metrics.append('\n')
prometheus_metrics.append('# JumpServer 各组件在线会话数汇总')
base_session_active_metric_text = 'jumpserver_components_session_active_total{component_type="%s"} %s'
for component in self.components:
component_type = component.type
prometheus_metrics.append(f'## 组件: {component_type}')
base_metrics = self.get_metrics()
metric_text = base_session_active_metric_text % (
component_type,
base_metrics['session_active']
)
prometheus_metrics.append(metric_text)
prometheus_metrics.append('\n')
prometheus_metrics.append('# JumpServer 各组件节点一些指标')
base_system_state_metric_text = 'jumpserver_components_%s{component_type="%s", component="%s"} %s'
system_states_name = [
'system_cpu_load_1', 'system_memory_used_percent',
'system_disk_used_percent', 'session_active_count'
]
for system_state_name in system_states_name:
prometheus_metrics.append(f'## 指标: {system_state_name}')
for component in self.components:
if not component.is_alive:
continue
component_type = component.type
metric_text = base_system_state_metric_text % (
system_state_name,
component_type,
component.name,
component.state.get(system_state_name)
)
prometheus_metrics.append(metric_text)
prometheus_metrics_text = '\n'.join(prometheus_metrics)
return prometheus_metrics_text