From 113987e0db52860aba5925a062bab66cf2dc904f Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Tue, 20 Feb 2018 16:26:08 +0100 Subject: [PATCH] Add prometheus addon --- .../prometheus/prometheus-configmap.yaml | 169 ++++++++++++++++++ .../prometheus/prometheus-deployment.yaml | 100 +++++++++++ cluster/addons/prometheus/prometheus-pvc.yaml | 15 ++ .../addons/prometheus/prometheus-rbac.yaml | 54 ++++++ .../addons/prometheus/prometheus-service.yaml | 17 ++ cluster/gce/config-default.sh | 3 + cluster/gce/config-test.sh | 3 + cluster/gce/gci/configure-helper.sh | 5 + 8 files changed, 366 insertions(+) create mode 100644 cluster/addons/prometheus/prometheus-configmap.yaml create mode 100644 cluster/addons/prometheus/prometheus-deployment.yaml create mode 100644 cluster/addons/prometheus/prometheus-pvc.yaml create mode 100644 cluster/addons/prometheus/prometheus-rbac.yaml create mode 100644 cluster/addons/prometheus/prometheus-service.yaml diff --git a/cluster/addons/prometheus/prometheus-configmap.yaml b/cluster/addons/prometheus/prometheus-configmap.yaml new file mode 100644 index 00000000000..5848aba8e74 --- /dev/null +++ b/cluster/addons/prometheus/prometheus-configmap.yaml @@ -0,0 +1,169 @@ +# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: EnsureExists +data: + prometheus.yml: | + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: kubernetes-apiservers + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + - job_name: kubernetes-nodes-kubelet + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__address__] + action: replace + target_label: __address__ + regex: ([^:;]+):(\d+) + replacement: ${1}:10255 + + - job_name: kubernetes-nodes-cadvisor + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__address__] + action: replace + target_label: __address__ + regex: ([^:;]+):(\d+) + replacement: ${1}:4194 + + - job_name: kubernetes-service-endpoints + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scrape + - action: replace + regex: (https?) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + target_label: __scheme__ + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_service_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + + - job_name: kubernetes-services + kubernetes_sd_configs: + - role: service + metrics_path: /probe + params: + module: + - http_2xx + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + - source_labels: + - __address__ + target_label: __param_target + - replacement: blackbox + target_label: __address__ + - source_labels: + - __param_target + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - source_labels: + - __meta_kubernetes_service_name + target_label: kubernetes_name + + - job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name + alerting: + alertmanagers: + - kubernetes_sd_configs: + - role: pod + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace] + regex: kube-system + action: keep + - source_labels: [__meta_kubernetes_pod_label_k8s_app] + regex: alertmanager + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: + action: drop diff --git a/cluster/addons/prometheus/prometheus-deployment.yaml b/cluster/addons/prometheus/prometheus-deployment.yaml new file mode 100644 index 00000000000..dd9635d85cf --- /dev/null +++ b/cluster/addons/prometheus/prometheus-deployment.yaml @@ -0,0 +1,100 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: prometheus + namespace: kube-system + labels: + k8s-app: prometheus + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile + version: v2.2.1 +spec: + replicas: 1 + selector: + matchLabels: + k8s-app: prometheus + version: v2.2.1 + template: + metadata: + labels: + k8s-app: prometheus + version: v2.2.1 + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + priorityClassName: system-cluster-critical + serviceAccountName: prometheus + initContainers: + - name: "init-chown-data" + image: "busybox:latest" + imagePullPolicy: "IfNotPresent" + command: ["chown", "-R", "65534:65534", "/data"] + volumeMounts: + - name: storage-volume + mountPath: /data + subPath: "" + containers: + - name: prometheus-server-configmap-reload + image: "jimmidyson/configmap-reload:v0.1" + imagePullPolicy: "IfNotPresent" + args: + - --volume-dir=/etc/config + - --webhook-url=http://localhost:9090/-/reload + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + resources: + limits: + cpu: 10m + memory: 10Mi + requests: + cpu: 10m + memory: 10Mi + + - name: prometheus-server + image: "prom/prometheus:v2.2.1" + imagePullPolicy: "IfNotPresent" + args: + - --config.file=/etc/config/prometheus.yml + - --storage.tsdb.path=/data + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + - --web.enable-lifecycle + ports: + - containerPort: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + initialDelaySeconds: 30 + timeoutSeconds: 30 + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + initialDelaySeconds: 30 + timeoutSeconds: 30 + # based on 10 running nodes with 30 pods each + resources: + limits: + cpu: 200m + memory: 1000Mi + requests: + cpu: 200m + memory: 1000Mi + + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: storage-volume + mountPath: /data + subPath: "" + terminationGracePeriodSeconds: 300 + volumes: + - name: config-volume + configMap: + name: prometheus-config + - name: storage-volume + persistentVolumeClaim: + claimName: prometheus diff --git a/cluster/addons/prometheus/prometheus-pvc.yaml b/cluster/addons/prometheus/prometheus-pvc.yaml new file mode 100644 index 00000000000..0ec1fe1d9cc --- /dev/null +++ b/cluster/addons/prometheus/prometheus-pvc.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: prometheus + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +spec: + storageClassName: standard + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "16Gi" diff --git a/cluster/addons/prometheus/prometheus-rbac.yaml b/cluster/addons/prometheus/prometheus-rbac.yaml new file mode 100644 index 00000000000..d0f7fb9bc0c --- /dev/null +++ b/cluster/addons/prometheus/prometheus-rbac.yaml @@ -0,0 +1,54 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: kube-system + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: prometheus + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +rules: + - apiGroups: + - "" + resources: + - nodes + - services + - endpoints + - pods + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - nonResourceURLs: + - "/metrics" + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: prometheus + labels: + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount + name: prometheus + namespace: kube-system diff --git a/cluster/addons/prometheus/prometheus-service.yaml b/cluster/addons/prometheus/prometheus-service.yaml new file mode 100644 index 00000000000..d97394b0591 --- /dev/null +++ b/cluster/addons/prometheus/prometheus-service.yaml @@ -0,0 +1,17 @@ +kind: Service +apiVersion: v1 +metadata: + name: prometheus + namespace: kube-system + labels: + kubernetes.io/name: "Prometheus" + kubernetes.io/cluster-service: "true" + addonmanager.kubernetes.io/mode: Reconcile +spec: + ports: + - name: http + port: 9090 + protocol: TCP + targetPort: 9090 + selector: + k8s-app: prometheus diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index e9717a27a87..c92e4859ab4 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -144,6 +144,9 @@ ENABLE_L7_LOADBALANCING="${KUBE_ENABLE_L7_LOADBALANCING:-glbc}" # standalone - Heapster only. Metrics available via Heapster REST API. ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-influxdb}" +# Optional: Enable deploying separate prometheus stack for monitoring kubernetes cluster +ENABLE_PROMETHEUS_MONITORING="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}" + # Optional: Enable Metrics Server. Metrics Server should be enable everywhere, # since it's a critical component, but in the first release we need a way to disable # this in case of stability issues. diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 2870234f50b..08db3e86f03 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -138,6 +138,9 @@ ENABLE_L7_LOADBALANCING="${KUBE_ENABLE_L7_LOADBALANCING:-glbc}" # standalone - Heapster only. Metrics available via Heapster REST API. ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-influxdb}" +# Optional: Enable deploying separate prometheus stack for monitoring kubernetes cluster +ENABLE_PROMETHEUS_MONITORING="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}" + # Optional: Enable Metrics Server. Metrics Server should be enable everywhere, # since it's a critical component, but in the first release we need a way to disable # this in case of stability issues. diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index 0000f325992..c22b3a7940c 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -2104,6 +2104,11 @@ EOF prepare-kube-proxy-manifest-variables "$src_dir/kube-proxy/kube-proxy-ds.yaml" setup-addon-manifests "addons" "kube-proxy" fi + # Setup prometheus stack for monitoring kubernetes cluster + if [[ "${ENABLE_PROMETHEUS_MONITORING:-}" == "true" ]]; then + setup-addon-manifests "addons" "prometheus" + fi + # Setup cluster monitoring using heapster if [[ "${ENABLE_CLUSTER_MONITORING:-}" == "influxdb" ]] || \ [[ "${ENABLE_CLUSTER_MONITORING:-}" == "google" ]] || \ [[ "${ENABLE_CLUSTER_MONITORING:-}" == "stackdriver" ]] || \