mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-01 17:29:00 +00:00
Merge pull request #83442 from serathius/remove-prometheus-addon
Remove prometheus addon
This commit is contained in:
@@ -1,13 +0,0 @@
|
|||||||
# See the OWNERS docs at https://go.k8s.io/owners
|
|
||||||
|
|
||||||
approvers:
|
|
||||||
- kawych
|
|
||||||
- piosz
|
|
||||||
- serathius
|
|
||||||
- brancz
|
|
||||||
reviewers:
|
|
||||||
- kawych
|
|
||||||
- piosz
|
|
||||||
- serathius
|
|
||||||
- brancz
|
|
||||||
|
|
@@ -1,5 +0,0 @@
|
|||||||
# Prometheus Add-on
|
|
||||||
|
|
||||||
This add-on is an experimental configuration of k8s monitoring using Prometheus used for e2e tests.
|
|
||||||
|
|
||||||
For production use check out more mature setups like [Prometheus Operator](https://github.com/coreos/prometheus-operator) and [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus).
|
|
@@ -1,18 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: alertmanager-config
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: EnsureExists
|
|
||||||
data:
|
|
||||||
alertmanager.yml: |
|
|
||||||
global: null
|
|
||||||
receivers:
|
|
||||||
- name: default-receiver
|
|
||||||
route:
|
|
||||||
group_interval: 5m
|
|
||||||
group_wait: 10s
|
|
||||||
receiver: default-receiver
|
|
||||||
repeat_interval: 3h
|
|
@@ -1,76 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: alertmanager
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
k8s-app: alertmanager
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
version: v0.14.0
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: alertmanager
|
|
||||||
version: v0.14.0
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: alertmanager
|
|
||||||
version: v0.14.0
|
|
||||||
spec:
|
|
||||||
priorityClassName: system-cluster-critical
|
|
||||||
containers:
|
|
||||||
- name: prometheus-alertmanager
|
|
||||||
image: "prom/alertmanager:v0.14.0"
|
|
||||||
imagePullPolicy: "IfNotPresent"
|
|
||||||
args:
|
|
||||||
- --config.file=/etc/config/alertmanager.yml
|
|
||||||
- --storage.path=/data
|
|
||||||
- --web.external-url=/
|
|
||||||
ports:
|
|
||||||
- containerPort: 9093
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /#/status
|
|
||||||
port: 9093
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
timeoutSeconds: 30
|
|
||||||
volumeMounts:
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /etc/config
|
|
||||||
- name: storage-volume
|
|
||||||
mountPath: "/data"
|
|
||||||
subPath: ""
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 50Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 50Mi
|
|
||||||
- name: prometheus-alertmanager-configmap-reload
|
|
||||||
image: "jimmidyson/configmap-reload:v0.1"
|
|
||||||
imagePullPolicy: "IfNotPresent"
|
|
||||||
args:
|
|
||||||
- --volume-dir=/etc/config
|
|
||||||
- --webhook-url=http://localhost:9093/-/reload
|
|
||||||
volumeMounts:
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /etc/config
|
|
||||||
readOnly: true
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 10Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 10Mi
|
|
||||||
volumes:
|
|
||||||
- name: config-volume
|
|
||||||
configMap:
|
|
||||||
name: alertmanager-config
|
|
||||||
- name: storage-volume
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: alertmanager
|
|
@@ -1,15 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: alertmanager
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: EnsureExists
|
|
||||||
spec:
|
|
||||||
storageClassName: standard
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: "2Gi"
|
|
@@ -1,18 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: alertmanager
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
kubernetes.io/name: "Alertmanager"
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 80
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: 9093
|
|
||||||
selector:
|
|
||||||
k8s-app: alertmanager
|
|
||||||
type: "ClusterIP"
|
|
@@ -1,89 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-state-metrics
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
version: v1.3.0
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: kube-state-metrics
|
|
||||||
version: v1.3.0
|
|
||||||
replicas: 1
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-state-metrics
|
|
||||||
version: v1.3.0
|
|
||||||
spec:
|
|
||||||
priorityClassName: system-cluster-critical
|
|
||||||
serviceAccountName: kube-state-metrics
|
|
||||||
containers:
|
|
||||||
- name: kube-state-metrics
|
|
||||||
image: quay.io/coreos/kube-state-metrics:v1.3.0
|
|
||||||
ports:
|
|
||||||
- name: http-metrics
|
|
||||||
containerPort: 8080
|
|
||||||
- name: telemetry
|
|
||||||
containerPort: 8081
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /healthz
|
|
||||||
port: 8080
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
timeoutSeconds: 5
|
|
||||||
- name: addon-resizer
|
|
||||||
image: k8s.gcr.io/addon-resizer:1.8.6
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 30Mi
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 30Mi
|
|
||||||
env:
|
|
||||||
- name: MY_POD_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.name
|
|
||||||
- name: MY_POD_NAMESPACE
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.namespace
|
|
||||||
volumeMounts:
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /etc/config
|
|
||||||
command:
|
|
||||||
- /pod_nanny
|
|
||||||
- --config-dir=/etc/config
|
|
||||||
- --container=kube-state-metrics
|
|
||||||
- --cpu=100m
|
|
||||||
- --extra-cpu=1m
|
|
||||||
- --memory=100Mi
|
|
||||||
- --extra-memory=2Mi
|
|
||||||
- --threshold=5
|
|
||||||
- --deployment=kube-state-metrics
|
|
||||||
volumes:
|
|
||||||
- name: config-volume
|
|
||||||
configMap:
|
|
||||||
name: kube-state-metrics-config
|
|
||||||
---
|
|
||||||
# Config map for resource configuration.
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics-config
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
k8s-app: kube-state-metrics
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
data:
|
|
||||||
NannyConfiguration: |-
|
|
||||||
apiVersion: nannyconfig/v1alpha1
|
|
||||||
kind: NannyConfiguration
|
|
||||||
|
|
@@ -1,103 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources:
|
|
||||||
- configmaps
|
|
||||||
- secrets
|
|
||||||
- nodes
|
|
||||||
- pods
|
|
||||||
- services
|
|
||||||
- resourcequotas
|
|
||||||
- replicationcontrollers
|
|
||||||
- limitranges
|
|
||||||
- persistentvolumeclaims
|
|
||||||
- persistentvolumes
|
|
||||||
- namespaces
|
|
||||||
- endpoints
|
|
||||||
verbs: ["list", "watch"]
|
|
||||||
- apiGroups: ["extensions"]
|
|
||||||
resources:
|
|
||||||
- daemonsets
|
|
||||||
- deployments
|
|
||||||
- replicasets
|
|
||||||
verbs: ["list", "watch"]
|
|
||||||
- apiGroups: ["apps"]
|
|
||||||
resources:
|
|
||||||
- statefulsets
|
|
||||||
verbs: ["list", "watch"]
|
|
||||||
- apiGroups: ["batch"]
|
|
||||||
resources:
|
|
||||||
- cronjobs
|
|
||||||
- jobs
|
|
||||||
verbs: ["list", "watch"]
|
|
||||||
- apiGroups: ["autoscaling"]
|
|
||||||
resources:
|
|
||||||
- horizontalpodautoscalers
|
|
||||||
verbs: ["list", "watch"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: Role
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics-resizer
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
rules:
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources:
|
|
||||||
- pods
|
|
||||||
verbs: ["get"]
|
|
||||||
- apiGroups: ["extensions"]
|
|
||||||
resources:
|
|
||||||
- deployments
|
|
||||||
resourceNames: ["kube-state-metrics"]
|
|
||||||
verbs: ["get", "update"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: kube-state-metrics
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: kube-system
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: RoleBinding
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: Role
|
|
||||||
name: kube-state-metrics-resizer
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: kube-system
|
|
@@ -1,23 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: kube-state-metrics
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
kubernetes.io/name: "kube-state-metrics"
|
|
||||||
annotations:
|
|
||||||
prometheus.io/scrape: 'true'
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: http-metrics
|
|
||||||
port: 8080
|
|
||||||
targetPort: http-metrics
|
|
||||||
protocol: TCP
|
|
||||||
- name: telemetry
|
|
||||||
port: 8081
|
|
||||||
targetPort: telemetry
|
|
||||||
protocol: TCP
|
|
||||||
selector:
|
|
||||||
k8s-app: kube-state-metrics
|
|
@@ -1,57 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: DaemonSet
|
|
||||||
metadata:
|
|
||||||
name: node-exporter
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
k8s-app: node-exporter
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
version: v0.15.2
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: node-exporter
|
|
||||||
version: v0.15.2
|
|
||||||
updateStrategy:
|
|
||||||
type: OnDelete
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: node-exporter
|
|
||||||
version: v0.15.2
|
|
||||||
spec:
|
|
||||||
priorityClassName: system-node-critical
|
|
||||||
containers:
|
|
||||||
- name: prometheus-node-exporter
|
|
||||||
image: "prom/node-exporter:v0.15.2"
|
|
||||||
imagePullPolicy: "IfNotPresent"
|
|
||||||
args:
|
|
||||||
- --path.procfs=/host/proc
|
|
||||||
- --path.sysfs=/host/sys
|
|
||||||
ports:
|
|
||||||
- name: metrics
|
|
||||||
containerPort: 9100
|
|
||||||
hostPort: 9100
|
|
||||||
volumeMounts:
|
|
||||||
- name: proc
|
|
||||||
mountPath: /host/proc
|
|
||||||
readOnly: true
|
|
||||||
- name: sys
|
|
||||||
mountPath: /host/sys
|
|
||||||
readOnly: true
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
memory: 50Mi
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 50Mi
|
|
||||||
hostNetwork: true
|
|
||||||
hostPID: true
|
|
||||||
volumes:
|
|
||||||
- name: proc
|
|
||||||
hostPath:
|
|
||||||
path: /proc
|
|
||||||
- name: sys
|
|
||||||
hostPath:
|
|
||||||
path: /sys
|
|
@@ -1,20 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: node-exporter
|
|
||||||
namespace: kube-system
|
|
||||||
annotations:
|
|
||||||
prometheus.io/scrape: "true"
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
kubernetes.io/name: "NodeExporter"
|
|
||||||
spec:
|
|
||||||
clusterIP: None
|
|
||||||
ports:
|
|
||||||
- name: metrics
|
|
||||||
port: 9100
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: 9100
|
|
||||||
selector:
|
|
||||||
k8s-app: node-exporter
|
|
@@ -1,171 +0,0 @@
|
|||||||
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: prometheus-config
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: EnsureExists
|
|
||||||
data:
|
|
||||||
prometheus.yml: |
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: prometheus
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- localhost:9090
|
|
||||||
|
|
||||||
- job_name: kubernetes-apiservers
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: endpoints
|
|
||||||
relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
regex: default;kubernetes;https
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_namespace
|
|
||||||
- __meta_kubernetes_service_name
|
|
||||||
- __meta_kubernetes_endpoint_port_name
|
|
||||||
scheme: https
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
|
|
||||||
- job_name: kubernetes-nodes-kubelet
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: node
|
|
||||||
relabel_configs:
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
|
||||||
scheme: https
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
|
|
||||||
- job_name: kubernetes-nodes-cadvisor
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: node
|
|
||||||
relabel_configs:
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
|
||||||
- target_label: __metrics_path__
|
|
||||||
replacement: /metrics/cadvisor
|
|
||||||
scheme: https
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
insecure_skip_verify: true
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
|
|
||||||
- job_name: kubernetes-service-endpoints
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: endpoints
|
|
||||||
relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
regex: true
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_service_annotation_prometheus_io_scrape
|
|
||||||
- action: replace
|
|
||||||
regex: (https?)
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_service_annotation_prometheus_io_scheme
|
|
||||||
target_label: __scheme__
|
|
||||||
- action: replace
|
|
||||||
regex: (.+)
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_service_annotation_prometheus_io_path
|
|
||||||
target_label: __metrics_path__
|
|
||||||
- action: replace
|
|
||||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
||||||
replacement: $1:$2
|
|
||||||
source_labels:
|
|
||||||
- __address__
|
|
||||||
- __meta_kubernetes_service_annotation_prometheus_io_port
|
|
||||||
target_label: __address__
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_service_label_(.+)
|
|
||||||
- action: replace
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_namespace
|
|
||||||
target_label: kubernetes_namespace
|
|
||||||
- action: replace
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_service_name
|
|
||||||
target_label: kubernetes_name
|
|
||||||
|
|
||||||
- job_name: kubernetes-services
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: service
|
|
||||||
metrics_path: /probe
|
|
||||||
params:
|
|
||||||
module:
|
|
||||||
- http_2xx
|
|
||||||
relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
regex: true
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_service_annotation_prometheus_io_probe
|
|
||||||
- source_labels:
|
|
||||||
- __address__
|
|
||||||
target_label: __param_target
|
|
||||||
- replacement: blackbox
|
|
||||||
target_label: __address__
|
|
||||||
- source_labels:
|
|
||||||
- __param_target
|
|
||||||
target_label: instance
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_service_label_(.+)
|
|
||||||
- source_labels:
|
|
||||||
- __meta_kubernetes_namespace
|
|
||||||
target_label: kubernetes_namespace
|
|
||||||
- source_labels:
|
|
||||||
- __meta_kubernetes_service_name
|
|
||||||
target_label: kubernetes_name
|
|
||||||
|
|
||||||
- job_name: kubernetes-pods
|
|
||||||
kubernetes_sd_configs:
|
|
||||||
- role: pod
|
|
||||||
relabel_configs:
|
|
||||||
- action: keep
|
|
||||||
regex: true
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
|
|
||||||
- action: replace
|
|
||||||
regex: (.+)
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_pod_annotation_prometheus_io_path
|
|
||||||
target_label: __metrics_path__
|
|
||||||
- action: replace
|
|
||||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
||||||
replacement: $1:$2
|
|
||||||
source_labels:
|
|
||||||
- __address__
|
|
||||||
- __meta_kubernetes_pod_annotation_prometheus_io_port
|
|
||||||
target_label: __address__
|
|
||||||
- action: labelmap
|
|
||||||
regex: __meta_kubernetes_pod_label_(.+)
|
|
||||||
- action: replace
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_namespace
|
|
||||||
target_label: kubernetes_namespace
|
|
||||||
- action: replace
|
|
||||||
source_labels:
|
|
||||||
- __meta_kubernetes_pod_name
|
|
||||||
target_label: kubernetes_pod_name
|
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- kubernetes_sd_configs:
|
|
||||||
- role: pod
|
|
||||||
tls_config:
|
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels: [__meta_kubernetes_namespace]
|
|
||||||
regex: kube-system
|
|
||||||
action: keep
|
|
||||||
- source_labels: [__meta_kubernetes_pod_label_k8s_app]
|
|
||||||
regex: alertmanager
|
|
||||||
action: keep
|
|
||||||
- source_labels: [__meta_kubernetes_pod_container_port_number]
|
|
||||||
regex:
|
|
||||||
action: drop
|
|
@@ -1,55 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: prometheus
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: prometheus
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
rules:
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- nodes
|
|
||||||
- nodes/metrics
|
|
||||||
- services
|
|
||||||
- endpoints
|
|
||||||
- pods
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
- apiGroups:
|
|
||||||
- ""
|
|
||||||
resources:
|
|
||||||
- configmaps
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
- nonResourceURLs:
|
|
||||||
- "/metrics"
|
|
||||||
verbs:
|
|
||||||
- get
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: prometheus
|
|
||||||
labels:
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: prometheus
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: prometheus
|
|
||||||
namespace: kube-system
|
|
@@ -1,17 +0,0 @@
|
|||||||
kind: Service
|
|
||||||
apiVersion: v1
|
|
||||||
metadata:
|
|
||||||
name: prometheus
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
kubernetes.io/name: "Prometheus"
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
spec:
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 9090
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: 9090
|
|
||||||
selector:
|
|
||||||
k8s-app: prometheus
|
|
@@ -1,107 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: StatefulSet
|
|
||||||
metadata:
|
|
||||||
name: prometheus
|
|
||||||
namespace: kube-system
|
|
||||||
labels:
|
|
||||||
k8s-app: prometheus
|
|
||||||
kubernetes.io/cluster-service: "true"
|
|
||||||
addonmanager.kubernetes.io/mode: Reconcile
|
|
||||||
version: v2.2.1
|
|
||||||
spec:
|
|
||||||
serviceName: "prometheus"
|
|
||||||
replicas: 1
|
|
||||||
podManagementPolicy: "Parallel"
|
|
||||||
updateStrategy:
|
|
||||||
type: "RollingUpdate"
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: prometheus
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
k8s-app: prometheus
|
|
||||||
spec:
|
|
||||||
priorityClassName: system-cluster-critical
|
|
||||||
serviceAccountName: prometheus
|
|
||||||
initContainers:
|
|
||||||
- name: "init-chown-data"
|
|
||||||
image: "busybox:latest"
|
|
||||||
imagePullPolicy: "IfNotPresent"
|
|
||||||
command: ["chown", "-R", "65534:65534", "/data"]
|
|
||||||
volumeMounts:
|
|
||||||
- name: prometheus-data
|
|
||||||
mountPath: /data
|
|
||||||
subPath: ""
|
|
||||||
containers:
|
|
||||||
- name: prometheus-server-configmap-reload
|
|
||||||
image: "jimmidyson/configmap-reload:v0.1"
|
|
||||||
imagePullPolicy: "IfNotPresent"
|
|
||||||
args:
|
|
||||||
- --volume-dir=/etc/config
|
|
||||||
- --webhook-url=http://localhost:9090/-/reload
|
|
||||||
volumeMounts:
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /etc/config
|
|
||||||
readOnly: true
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 10Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 10Mi
|
|
||||||
|
|
||||||
- name: prometheus-server
|
|
||||||
image: "prom/prometheus:v2.2.1"
|
|
||||||
imagePullPolicy: "IfNotPresent"
|
|
||||||
args:
|
|
||||||
- --config.file=/etc/config/prometheus.yml
|
|
||||||
- --storage.tsdb.path=/data
|
|
||||||
- --web.console.libraries=/etc/prometheus/console_libraries
|
|
||||||
- --web.console.templates=/etc/prometheus/consoles
|
|
||||||
- --web.enable-lifecycle
|
|
||||||
ports:
|
|
||||||
- containerPort: 9090
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /-/ready
|
|
||||||
port: 9090
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
timeoutSeconds: 30
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /-/healthy
|
|
||||||
port: 9090
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
timeoutSeconds: 30
|
|
||||||
# based on 10 running nodes with 30 pods each
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 200m
|
|
||||||
memory: 1000Mi
|
|
||||||
requests:
|
|
||||||
cpu: 200m
|
|
||||||
memory: 1000Mi
|
|
||||||
|
|
||||||
volumeMounts:
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /etc/config
|
|
||||||
- name: prometheus-data
|
|
||||||
mountPath: /data
|
|
||||||
subPath: ""
|
|
||||||
terminationGracePeriodSeconds: 300
|
|
||||||
volumes:
|
|
||||||
- name: config-volume
|
|
||||||
configMap:
|
|
||||||
name: prometheus-config
|
|
||||||
volumeClaimTemplates:
|
|
||||||
- metadata:
|
|
||||||
name: prometheus-data
|
|
||||||
spec:
|
|
||||||
storageClassName: standard
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: "16Gi"
|
|
@@ -160,9 +160,6 @@ ENABLE_L7_LOADBALANCING="${KUBE_ENABLE_L7_LOADBALANCING:-glbc}"
|
|||||||
# standalone - Heapster only. Metrics available via Heapster REST API.
|
# standalone - Heapster only. Metrics available via Heapster REST API.
|
||||||
ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}"
|
ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}"
|
||||||
|
|
||||||
# Optional: Enable deploying separate prometheus stack for monitoring kubernetes cluster
|
|
||||||
ENABLE_PROMETHEUS_MONITORING="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}"
|
|
||||||
|
|
||||||
# Optional: Enable Metrics Server. Metrics Server should be enable everywhere,
|
# Optional: Enable Metrics Server. Metrics Server should be enable everywhere,
|
||||||
# since it's a critical component, but in the first release we need a way to disable
|
# since it's a critical component, but in the first release we need a way to disable
|
||||||
# this in case of stability issues.
|
# this in case of stability issues.
|
||||||
|
@@ -172,9 +172,6 @@ ENABLE_L7_LOADBALANCING="${KUBE_ENABLE_L7_LOADBALANCING:-glbc}"
|
|||||||
# standalone - Heapster only. Metrics available via Heapster REST API.
|
# standalone - Heapster only. Metrics available via Heapster REST API.
|
||||||
ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}"
|
ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}"
|
||||||
|
|
||||||
# Optional: Enable deploying separate prometheus stack for monitoring kubernetes cluster
|
|
||||||
ENABLE_PROMETHEUS_MONITORING="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}"
|
|
||||||
|
|
||||||
# Optional: Enable Metrics Server. Metrics Server should be enable everywhere,
|
# Optional: Enable Metrics Server. Metrics Server should be enable everywhere,
|
||||||
# since it's a critical component, but in the first release we need a way to disable
|
# since it's a critical component, but in the first release we need a way to disable
|
||||||
# this in case of stability issues.
|
# this in case of stability issues.
|
||||||
|
@@ -2279,10 +2279,6 @@ EOF
|
|||||||
prepare-kube-proxy-manifest-variables "$src_dir/kube-proxy/kube-proxy-ds.yaml"
|
prepare-kube-proxy-manifest-variables "$src_dir/kube-proxy/kube-proxy-ds.yaml"
|
||||||
setup-addon-manifests "addons" "kube-proxy"
|
setup-addon-manifests "addons" "kube-proxy"
|
||||||
fi
|
fi
|
||||||
# Setup prometheus stack for monitoring kubernetes cluster
|
|
||||||
if [[ "${ENABLE_PROMETHEUS_MONITORING:-}" == "true" ]]; then
|
|
||||||
setup-addon-manifests "addons" "prometheus"
|
|
||||||
fi
|
|
||||||
# Setup cluster monitoring using heapster
|
# Setup cluster monitoring using heapster
|
||||||
if [[ "${ENABLE_CLUSTER_MONITORING:-}" == "influxdb" ]] || \
|
if [[ "${ENABLE_CLUSTER_MONITORING:-}" == "influxdb" ]] || \
|
||||||
[[ "${ENABLE_CLUSTER_MONITORING:-}" == "google" ]] || \
|
[[ "${ENABLE_CLUSTER_MONITORING:-}" == "google" ]] || \
|
||||||
|
@@ -1124,7 +1124,6 @@ SERVICE_CLUSTER_IP_RANGE: $(yaml-quote ${SERVICE_CLUSTER_IP_RANGE})
|
|||||||
KUBERNETES_MASTER_NAME: $(yaml-quote ${KUBERNETES_MASTER_NAME})
|
KUBERNETES_MASTER_NAME: $(yaml-quote ${KUBERNETES_MASTER_NAME})
|
||||||
ALLOCATE_NODE_CIDRS: $(yaml-quote ${ALLOCATE_NODE_CIDRS:-false})
|
ALLOCATE_NODE_CIDRS: $(yaml-quote ${ALLOCATE_NODE_CIDRS:-false})
|
||||||
ENABLE_CLUSTER_MONITORING: $(yaml-quote ${ENABLE_CLUSTER_MONITORING:-none})
|
ENABLE_CLUSTER_MONITORING: $(yaml-quote ${ENABLE_CLUSTER_MONITORING:-none})
|
||||||
ENABLE_PROMETHEUS_MONITORING: $(yaml-quote ${ENABLE_PROMETHEUS_MONITORING:-false})
|
|
||||||
ENABLE_METRICS_SERVER: $(yaml-quote ${ENABLE_METRICS_SERVER:-false})
|
ENABLE_METRICS_SERVER: $(yaml-quote ${ENABLE_METRICS_SERVER:-false})
|
||||||
ENABLE_METADATA_AGENT: $(yaml-quote ${ENABLE_METADATA_AGENT:-none})
|
ENABLE_METADATA_AGENT: $(yaml-quote ${ENABLE_METADATA_AGENT:-none})
|
||||||
METADATA_AGENT_CPU_REQUEST: $(yaml-quote ${METADATA_AGENT_CPU_REQUEST:-})
|
METADATA_AGENT_CPU_REQUEST: $(yaml-quote ${METADATA_AGENT_CPU_REQUEST:-})
|
||||||
|
@@ -167,7 +167,6 @@ export PATH
|
|||||||
--node-tag="${NODE_TAG:-}" \
|
--node-tag="${NODE_TAG:-}" \
|
||||||
--master-tag="${MASTER_TAG:-}" \
|
--master-tag="${MASTER_TAG:-}" \
|
||||||
--cluster-monitoring-mode="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}" \
|
--cluster-monitoring-mode="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}" \
|
||||||
--prometheus-monitoring="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}" \
|
|
||||||
--dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \
|
--dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \
|
||||||
--ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \
|
--ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \
|
||||||
${KUBE_CONTAINER_RUNTIME:+"--container-runtime=${KUBE_CONTAINER_RUNTIME}"} \
|
${KUBE_CONTAINER_RUNTIME:+"--container-runtime=${KUBE_CONTAINER_RUNTIME}"} \
|
||||||
|
@@ -108,13 +108,6 @@ func SkipIfMultizone(c clientset.Interface) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SkipUnlessPrometheusMonitoringIsEnabled skips if the prometheus monitoring is not enabled.
|
|
||||||
func SkipUnlessPrometheusMonitoringIsEnabled(supportedMonitoring ...string) {
|
|
||||||
if !TestContext.EnablePrometheusMonitoring {
|
|
||||||
skipInternalf(1, "Skipped because prometheus monitoring is not enabled")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipUnlessMasterOSDistroIs skips if the master OS distro is not included in the supportedMasterOsDistros.
|
// SkipUnlessMasterOSDistroIs skips if the master OS distro is not included in the supportedMasterOsDistros.
|
||||||
func SkipUnlessMasterOSDistroIs(supportedMasterOsDistros ...string) {
|
func SkipUnlessMasterOSDistroIs(supportedMasterOsDistros ...string) {
|
||||||
if !MasterOSDistroIs(supportedMasterOsDistros...) {
|
if !MasterOSDistroIs(supportedMasterOsDistros...) {
|
||||||
|
@@ -151,8 +151,6 @@ type TestContextType struct {
|
|||||||
NodeTestContextType
|
NodeTestContextType
|
||||||
// Monitoring solution that is used in current cluster.
|
// Monitoring solution that is used in current cluster.
|
||||||
ClusterMonitoringMode string
|
ClusterMonitoringMode string
|
||||||
// Separate Prometheus monitoring deployed in cluster
|
|
||||||
EnablePrometheusMonitoring bool
|
|
||||||
|
|
||||||
// Indicates what path the kubernetes-anywhere is installed on
|
// Indicates what path the kubernetes-anywhere is installed on
|
||||||
KubernetesAnywherePath string
|
KubernetesAnywherePath string
|
||||||
@@ -313,7 +311,6 @@ func RegisterClusterFlags(flags *flag.FlagSet) {
|
|||||||
flags.StringVar(&TestContext.MasterOSDistro, "master-os-distro", "debian", "The OS distribution of cluster master (debian, ubuntu, gci, coreos, or custom).")
|
flags.StringVar(&TestContext.MasterOSDistro, "master-os-distro", "debian", "The OS distribution of cluster master (debian, ubuntu, gci, coreos, or custom).")
|
||||||
flags.StringVar(&TestContext.NodeOSDistro, "node-os-distro", "debian", "The OS distribution of cluster VM instances (debian, ubuntu, gci, coreos, or custom).")
|
flags.StringVar(&TestContext.NodeOSDistro, "node-os-distro", "debian", "The OS distribution of cluster VM instances (debian, ubuntu, gci, coreos, or custom).")
|
||||||
flags.StringVar(&TestContext.ClusterMonitoringMode, "cluster-monitoring-mode", "standalone", "The monitoring solution that is used in the cluster.")
|
flags.StringVar(&TestContext.ClusterMonitoringMode, "cluster-monitoring-mode", "standalone", "The monitoring solution that is used in the cluster.")
|
||||||
flags.BoolVar(&TestContext.EnablePrometheusMonitoring, "prometheus-monitoring", false, "Separate Prometheus monitoring deployed in cluster.")
|
|
||||||
flags.StringVar(&TestContext.ClusterDNSDomain, "dns-domain", "cluster.local", "The DNS Domain of the cluster.")
|
flags.StringVar(&TestContext.ClusterDNSDomain, "dns-domain", "cluster.local", "The DNS Domain of the cluster.")
|
||||||
|
|
||||||
// TODO: Flags per provider? Rename gce-project/gce-zone?
|
// TODO: Flags per provider? Rename gce-project/gce-zone?
|
||||||
|
@@ -13,7 +13,6 @@ go_library(
|
|||||||
"custom_metrics_deployments.go",
|
"custom_metrics_deployments.go",
|
||||||
"custom_metrics_stackdriver.go",
|
"custom_metrics_stackdriver.go",
|
||||||
"metrics_grabber.go",
|
"metrics_grabber.go",
|
||||||
"prometheus.go",
|
|
||||||
"stackdriver.go",
|
"stackdriver.go",
|
||||||
"stackdriver_metadata_agent.go",
|
"stackdriver_metadata_agent.go",
|
||||||
],
|
],
|
||||||
@@ -46,7 +45,6 @@ go_library(
|
|||||||
"//test/utils/image:go_default_library",
|
"//test/utils/image:go_default_library",
|
||||||
"//vendor/github.com/onsi/ginkgo:go_default_library",
|
"//vendor/github.com/onsi/ginkgo:go_default_library",
|
||||||
"//vendor/github.com/onsi/gomega:go_default_library",
|
"//vendor/github.com/onsi/gomega:go_default_library",
|
||||||
"//vendor/github.com/prometheus/common/model:go_default_library",
|
|
||||||
"//vendor/golang.org/x/oauth2/google:go_default_library",
|
"//vendor/golang.org/x/oauth2/google:go_default_library",
|
||||||
"//vendor/google.golang.org/api/monitoring/v3:go_default_library",
|
"//vendor/google.golang.org/api/monitoring/v3:go_default_library",
|
||||||
],
|
],
|
||||||
|
@@ -1,388 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright 2018 The Kubernetes Authors.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package monitoring
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"math"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/prometheus/common/model"
|
|
||||||
|
|
||||||
"github.com/onsi/ginkgo"
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
|
||||||
"k8s.io/kubernetes/test/e2e/common"
|
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
|
||||||
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
prometheusQueryStep = time.Minute
|
|
||||||
prometheusMetricErrorTolerance = 0.25
|
|
||||||
prometheusMetricValidationDuration = time.Minute * 2
|
|
||||||
prometheusRate = time.Minute * 2
|
|
||||||
prometheusRequiredNodesUpDuration = time.Minute * 5
|
|
||||||
prometheusService = "prometheus"
|
|
||||||
prometheusSleepBetweenAttempts = time.Second * 30
|
|
||||||
prometheusTestTimeout = time.Minute * 5
|
|
||||||
customMetricValue = 1000
|
|
||||||
targetCPUUsage = 0.1
|
|
||||||
)
|
|
||||||
|
|
||||||
var _ = instrumentation.SIGDescribe("[Feature:PrometheusMonitoring] Prometheus", func() {
|
|
||||||
ginkgo.BeforeEach(func() {
|
|
||||||
framework.SkipUnlessPrometheusMonitoringIsEnabled()
|
|
||||||
})
|
|
||||||
|
|
||||||
f := framework.NewDefaultFramework("prometheus-monitoring")
|
|
||||||
ginkgo.It("should scrape container metrics from all nodes.", func() {
|
|
||||||
expectedNodes, err := getAllNodes(f.ClientSet)
|
|
||||||
framework.ExpectNoError(err)
|
|
||||||
retryUntilSucceeds(func() error {
|
|
||||||
return validateMetricAvailableForAllNodes(f.ClientSet, `container_cpu_usage_seconds_total`, expectedNodes)
|
|
||||||
}, prometheusTestTimeout)
|
|
||||||
})
|
|
||||||
ginkgo.It("should successfully scrape all targets", func() {
|
|
||||||
retryUntilSucceeds(func() error {
|
|
||||||
return validateAllActiveTargetsAreHealthy(f.ClientSet)
|
|
||||||
}, prometheusTestTimeout)
|
|
||||||
})
|
|
||||||
ginkgo.It("should contain correct container CPU metric.", func() {
|
|
||||||
query := prometheusCPUQuery(f.Namespace.Name, "prometheus-cpu-consumer", prometheusRate)
|
|
||||||
consumer := consumeCPUResources(f, "prometheus-cpu-consumer", targetCPUUsage*1000)
|
|
||||||
defer consumer.CleanUp()
|
|
||||||
retryUntilSucceeds(func() error {
|
|
||||||
return validateQueryReturnsCorrectValues(f.ClientSet, query, targetCPUUsage, 3, prometheusMetricErrorTolerance)
|
|
||||||
}, prometheusTestTimeout)
|
|
||||||
})
|
|
||||||
ginkgo.It("should scrape metrics from annotated pods.", func() {
|
|
||||||
query := prometheusPodCustomMetricQuery(f.Namespace.Name, "prometheus-custom-pod-metric")
|
|
||||||
consumer := exportCustomMetricFromPod(f, "prometheus-custom-pod-metric", customMetricValue)
|
|
||||||
defer consumer.CleanUp()
|
|
||||||
retryUntilSucceeds(func() error {
|
|
||||||
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
|
|
||||||
}, prometheusTestTimeout)
|
|
||||||
})
|
|
||||||
ginkgo.It("should scrape metrics from annotated services.", func() {
|
|
||||||
query := prometheusServiceCustomMetricQuery(f.Namespace.Name, "prometheus-custom-service-metric")
|
|
||||||
consumer := exportCustomMetricFromService(f, "prometheus-custom-service-metric", customMetricValue)
|
|
||||||
defer consumer.CleanUp()
|
|
||||||
retryUntilSucceeds(func() error {
|
|
||||||
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
|
|
||||||
}, prometheusTestTimeout)
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
func prometheusCPUQuery(namespace, podNamePrefix string, rate time.Duration) string {
|
|
||||||
return fmt.Sprintf(`sum(irate(container_cpu_usage_seconds_total{namespace="%v",pod=~"%v.*",image!=""}[%vm]))`,
|
|
||||||
namespace, podNamePrefix, int64(rate.Minutes()))
|
|
||||||
}
|
|
||||||
|
|
||||||
func prometheusServiceCustomMetricQuery(namespace, service string) string {
|
|
||||||
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%v",kubernetes_name="%v"})`, namespace, service)
|
|
||||||
}
|
|
||||||
|
|
||||||
func prometheusPodCustomMetricQuery(namespace, podNamePrefix string) string {
|
|
||||||
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%s",kubernetes_pod_name=~"%s.*"})`, namespace, podNamePrefix)
|
|
||||||
}
|
|
||||||
|
|
||||||
func consumeCPUResources(f *framework.Framework, consumerName string, cpuUsage int) *common.ResourceConsumer {
|
|
||||||
return common.NewDynamicResourceConsumer(consumerName, f.Namespace.Name, common.KindDeployment, 1, cpuUsage,
|
|
||||||
memoryUsed, 0, int64(cpuUsage), memoryLimit, f.ClientSet, f.ScalesGetter)
|
|
||||||
}
|
|
||||||
|
|
||||||
func exportCustomMetricFromPod(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
|
|
||||||
podAnnotations := map[string]string{
|
|
||||||
"prometheus.io/scrape": "true",
|
|
||||||
"prometheus.io/path": "/metrics",
|
|
||||||
"prometheus.io/port": "8080",
|
|
||||||
}
|
|
||||||
return common.NewMetricExporter(consumerName, f.Namespace.Name, podAnnotations, nil, metricValue, f.ClientSet, f.ScalesGetter)
|
|
||||||
}
|
|
||||||
|
|
||||||
func exportCustomMetricFromService(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
|
|
||||||
serviceAnnotations := map[string]string{
|
|
||||||
"prometheus.io/scrape": "true",
|
|
||||||
"prometheus.io/path": "/metrics",
|
|
||||||
"prometheus.io/port": "8080",
|
|
||||||
}
|
|
||||||
return common.NewMetricExporter(consumerName, f.Namespace.Name, nil, serviceAnnotations, metricValue, f.ClientSet, f.ScalesGetter)
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateMetricAvailableForAllNodes(c clientset.Interface, metric string, expectedNodesNames []string) error {
|
|
||||||
instanceLabels, err := getInstanceLabelsAvailableForMetric(c, prometheusRequiredNodesUpDuration, metric)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
nodesWithMetric := make(map[string]bool)
|
|
||||||
for _, instance := range instanceLabels {
|
|
||||||
nodesWithMetric[instance] = true
|
|
||||||
}
|
|
||||||
missedNodesCount := 0
|
|
||||||
for _, nodeName := range expectedNodesNames {
|
|
||||||
if _, found := nodesWithMetric[nodeName]; !found {
|
|
||||||
missedNodesCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if missedNodesCount > 0 {
|
|
||||||
return fmt.Errorf("Metric not found for %v out of %v nodes", missedNodesCount, len(expectedNodesNames))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateAllActiveTargetsAreHealthy(c clientset.Interface) error {
|
|
||||||
discovery, err := fetchPrometheusTargetDiscovery(c)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if len(discovery.ActiveTargets) == 0 {
|
|
||||||
return fmt.Errorf("Prometheus is not scraping any targets, at least one target is required")
|
|
||||||
}
|
|
||||||
for _, target := range discovery.ActiveTargets {
|
|
||||||
if target.Health != HealthGood {
|
|
||||||
return fmt.Errorf("Target health not good. Target: %v", target)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateQueryReturnsCorrectValues(c clientset.Interface, query string, expectedValue float64, minSamplesCount int, errorTolerance float64) error {
|
|
||||||
samples, err := fetchQueryValues(c, query, prometheusMetricValidationDuration)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if len(samples) < minSamplesCount {
|
|
||||||
return fmt.Errorf("Not enough samples for query '%v', got %v", query, samples)
|
|
||||||
}
|
|
||||||
framework.Logf("Executed query '%v' returned %v", query, samples)
|
|
||||||
for _, value := range samples {
|
|
||||||
error := math.Abs(value-expectedValue) / expectedValue
|
|
||||||
if error >= errorTolerance {
|
|
||||||
return fmt.Errorf("Query result values outside expected value tolerance. Expected error below %v, got %v", errorTolerance, error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchQueryValues(c clientset.Interface, query string, duration time.Duration) ([]float64, error) {
|
|
||||||
now := time.Now()
|
|
||||||
response, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
m, ok := response.(model.Matrix)
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("Expected matric response, got: %T", response)
|
|
||||||
}
|
|
||||||
values := make([]float64, 0)
|
|
||||||
for _, stream := range m {
|
|
||||||
for _, sample := range stream.Values {
|
|
||||||
values = append(values, float64(sample.Value))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getInstanceLabelsAvailableForMetric(c clientset.Interface, duration time.Duration, metric string) ([]string, error) {
|
|
||||||
var instance model.LabelValue
|
|
||||||
now := time.Now()
|
|
||||||
query := fmt.Sprintf(`sum(%v)by(instance)`, metric)
|
|
||||||
result, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
instanceLabels := make([]string, 0)
|
|
||||||
m, ok := result.(model.Matrix)
|
|
||||||
if !ok {
|
|
||||||
framework.Failf("Expected matrix response for query '%v', got: %T", query, result)
|
|
||||||
return instanceLabels, nil
|
|
||||||
}
|
|
||||||
for _, stream := range m {
|
|
||||||
if instance, ok = stream.Metric["instance"]; !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
instanceLabels = append(instanceLabels, string(instance))
|
|
||||||
}
|
|
||||||
return instanceLabels, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchPrometheusTargetDiscovery(c clientset.Interface) (TargetDiscovery, error) {
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
response, err := c.CoreV1().RESTClient().Get().
|
|
||||||
Context(ctx).
|
|
||||||
Namespace("kube-system").
|
|
||||||
Resource("services").
|
|
||||||
Name(prometheusService+":9090").
|
|
||||||
SubResource("proxy").
|
|
||||||
Suffix("api", "v1", "targets").
|
|
||||||
Do().
|
|
||||||
Raw()
|
|
||||||
var qres promTargetsResponse
|
|
||||||
if err != nil {
|
|
||||||
framework.Logf(string(response))
|
|
||||||
return qres.Data, err
|
|
||||||
}
|
|
||||||
err = json.Unmarshal(response, &qres)
|
|
||||||
|
|
||||||
return qres.Data, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type promTargetsResponse struct {
|
|
||||||
Status string `json:"status"`
|
|
||||||
Data TargetDiscovery `json:"data"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// TargetDiscovery has all the active targets.
|
|
||||||
type TargetDiscovery struct {
|
|
||||||
ActiveTargets []*Target `json:"activeTargets"`
|
|
||||||
DroppedTargets []*DroppedTarget `json:"droppedTargets"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Target has the information for one target.
|
|
||||||
type Target struct {
|
|
||||||
DiscoveredLabels map[string]string `json:"discoveredLabels"`
|
|
||||||
Labels map[string]string `json:"labels"`
|
|
||||||
|
|
||||||
ScrapeURL string `json:"scrapeUrl"`
|
|
||||||
|
|
||||||
LastError string `json:"lastError"`
|
|
||||||
LastScrape time.Time `json:"lastScrape"`
|
|
||||||
Health TargetHealth `json:"health"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// DroppedTarget has the information for one target that was dropped during relabelling.
|
|
||||||
type DroppedTarget struct {
|
|
||||||
// Labels before any processing.
|
|
||||||
DiscoveredLabels map[string]string `json:"discoveredLabels"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// The possible health states of a target based on the last performed scrape.
|
|
||||||
const (
|
|
||||||
HealthUnknown TargetHealth = "unknown"
|
|
||||||
HealthGood TargetHealth = "up"
|
|
||||||
HealthBad TargetHealth = "down"
|
|
||||||
)
|
|
||||||
|
|
||||||
// TargetHealth describes the health state of a target.
|
|
||||||
type TargetHealth string
|
|
||||||
|
|
||||||
func queryPrometheus(c clientset.Interface, query string, start, end time.Time, step time.Duration) (model.Value, error) {
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
response, err := c.CoreV1().RESTClient().Get().
|
|
||||||
Context(ctx).
|
|
||||||
Namespace("kube-system").
|
|
||||||
Resource("services").
|
|
||||||
Name(prometheusService+":9090").
|
|
||||||
SubResource("proxy").
|
|
||||||
Suffix("api", "v1", "query_range").
|
|
||||||
Param("query", query).
|
|
||||||
Param("start", fmt.Sprintf("%v", start.Unix())).
|
|
||||||
Param("end", fmt.Sprintf("%v", end.Unix())).
|
|
||||||
Param("step", fmt.Sprintf("%vs", step.Seconds())).
|
|
||||||
Do().
|
|
||||||
Raw()
|
|
||||||
if err != nil {
|
|
||||||
framework.Logf(string(response))
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var qres promQueryResponse
|
|
||||||
err = json.Unmarshal(response, &qres)
|
|
||||||
|
|
||||||
return model.Value(qres.Data.v), err
|
|
||||||
}
|
|
||||||
|
|
||||||
type promQueryResponse struct {
|
|
||||||
Status string `json:"status"`
|
|
||||||
Data responseData `json:"data"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type responseData struct {
|
|
||||||
Type model.ValueType `json:"resultType"`
|
|
||||||
Result interface{} `json:"result"`
|
|
||||||
|
|
||||||
// The decoded value.
|
|
||||||
v model.Value
|
|
||||||
}
|
|
||||||
|
|
||||||
func (qr *responseData) UnmarshalJSON(b []byte) error {
|
|
||||||
v := struct {
|
|
||||||
Type model.ValueType `json:"resultType"`
|
|
||||||
Result json.RawMessage `json:"result"`
|
|
||||||
}{}
|
|
||||||
|
|
||||||
err := json.Unmarshal(b, &v)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
switch v.Type {
|
|
||||||
case model.ValScalar:
|
|
||||||
var sv model.Scalar
|
|
||||||
err = json.Unmarshal(v.Result, &sv)
|
|
||||||
qr.v = &sv
|
|
||||||
|
|
||||||
case model.ValVector:
|
|
||||||
var vv model.Vector
|
|
||||||
err = json.Unmarshal(v.Result, &vv)
|
|
||||||
qr.v = vv
|
|
||||||
|
|
||||||
case model.ValMatrix:
|
|
||||||
var mv model.Matrix
|
|
||||||
err = json.Unmarshal(v.Result, &mv)
|
|
||||||
qr.v = mv
|
|
||||||
|
|
||||||
default:
|
|
||||||
err = fmt.Errorf("unexpected value type %q", v.Type)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func retryUntilSucceeds(validator func() error, timeout time.Duration) {
|
|
||||||
startTime := time.Now()
|
|
||||||
var err error
|
|
||||||
for {
|
|
||||||
err = validator()
|
|
||||||
if err == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if time.Since(startTime) >= timeout {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
framework.Logf(err.Error())
|
|
||||||
time.Sleep(prometheusSleepBetweenAttempts)
|
|
||||||
}
|
|
||||||
framework.Failf(err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
func getAllNodes(c clientset.Interface) ([]string, error) {
|
|
||||||
nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
result := []string{}
|
|
||||||
for _, node := range nodeList.Items {
|
|
||||||
result = append(result, node.Name)
|
|
||||||
}
|
|
||||||
return result, nil
|
|
||||||
}
|
|
Reference in New Issue
Block a user