From 017f57a6b0cc052cfb041bf653f1ba984c6ec26b Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Wed, 13 Feb 2019 18:48:45 +0100 Subject: [PATCH] proxy: add some useful metrics This adds some useful metrics around pending changes and last successful sync time. The goal is for administrators to be able to alert on proxies that, for whatever reason, are quite stale. Signed-off-by: Casey Callendrello --- pkg/proxy/BUILD | 1 + pkg/proxy/endpoints.go | 5 ++++ pkg/proxy/iptables/proxier.go | 1 + pkg/proxy/ipvs/proxier.go | 1 + pkg/proxy/metrics/metrics.go | 55 ++++++++++++++++++++++++++++++++++ pkg/proxy/service.go | 4 +++ pkg/proxy/winkernel/metrics.go | 11 +++++++ pkg/proxy/winkernel/proxier.go | 1 + 8 files changed, 79 insertions(+) diff --git a/pkg/proxy/BUILD b/pkg/proxy/BUILD index b43afc322c3..8c69b958350 100644 --- a/pkg/proxy/BUILD +++ b/pkg/proxy/BUILD @@ -18,6 +18,7 @@ go_library( deps = [ "//pkg/api/v1/service:go_default_library", "//pkg/proxy/config:go_default_library", + "//pkg/proxy/metrics:go_default_library", "//pkg/proxy/util:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", diff --git a/pkg/proxy/endpoints.go b/pkg/proxy/endpoints.go index 713f7e17ebe..d81b2cab9e2 100644 --- a/pkg/proxy/endpoints.go +++ b/pkg/proxy/endpoints.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/proxy/metrics" utilproxy "k8s.io/kubernetes/pkg/proxy/util" utilnet "k8s.io/utils/net" ) @@ -127,6 +128,7 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool { if endpoints == nil { return false } + metrics.EndpointChangesTotal.Inc() namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name} ect.lock.Lock() @@ -154,6 +156,8 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool { // should be exported. delete(ect.lastChangeTriggerTimes, namespacedName) } + + metrics.EndpointChangesPending.Set(float64(len(ect.items))) return len(ect.items) > 0 } @@ -295,6 +299,7 @@ func (em EndpointsMap) apply(changes *EndpointChangeTracker, staleEndpoints *[]S detectStaleConnections(change.previous, change.current, staleEndpoints, staleServiceNames) } changes.items = make(map[types.NamespacedName]*endpointsChange) + metrics.EndpointChangesPending.Set(0) for _, lastChangeTriggerTime := range changes.lastChangeTriggerTimes { *lastChangeTriggerTimes = append(*lastChangeTriggerTimes, lastChangeTriggerTime...) } diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index e4b13b17c9f..4d380f3f20f 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -1395,6 +1395,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.UpdateTimestamp() } + metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() // Update healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the healthChecker diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 02452930bb1..433cc5d9f55 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -1272,6 +1272,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.UpdateTimestamp() } + metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() // Update healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the healthChecker diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 38924387ff8..54d7f0a4ca4 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -46,6 +46,16 @@ var ( }, ) + // SyncProxyRulesLastTimestamp is the timestamp proxy rules were last + // successfully synced. + SyncProxyRulesLastTimestamp = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_last_timestamp_seconds", + Help: "The last time proxy rules were successfully synced", + }, + ) + // NetworkProgrammingLatency is defined as the time it took to program the network - from the time // the service or pod has changed to the time the change was propagated and the proper kube-proxy // rules were synced. Exported for each endpoints object that were part of the rules sync. @@ -63,6 +73,46 @@ var ( Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), }, ) + + // EndpointChangesPending is the number of pending endpoint changes that + // have not yet been synced to the proxy. + EndpointChangesPending = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_endpoint_changes_pending", + Help: "Pending proxy rules Endpoint changes", + }, + ) + + // EndpointChangesTotal is the number of endpoint changes that the proxy + // has seen. + EndpointChangesTotal = prometheus.NewCounter( + prometheus.CounterOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_endpoint_changes_total", + Help: "Cumulative proxy rules Endpoint changes", + }, + ) + + // ServiceChangesPending is the number of pending service changes that + // have not yet been synced to the proxy. + ServiceChangesPending = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_service_changes_pending", + Help: "Pending proxy rules Service changes", + }, + ) + + // ServiceChangesTotal is the number of service changes that the proxy has + // seen. + ServiceChangesTotal = prometheus.NewCounter( + prometheus.CounterOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_service_changes_total", + Help: "Cumulative proxy rules Service changes", + }, + ) ) var registerMetricsOnce sync.Once @@ -72,7 +122,12 @@ func RegisterMetrics() { registerMetricsOnce.Do(func() { prometheus.MustRegister(SyncProxyRulesLatency) prometheus.MustRegister(DeprecatedSyncProxyRulesLatency) + prometheus.MustRegister(SyncProxyRulesLastTimestamp) prometheus.MustRegister(NetworkProgrammingLatency) + prometheus.MustRegister(EndpointChangesPending) + prometheus.MustRegister(EndpointChangesTotal) + prometheus.MustRegister(ServiceChangesPending) + prometheus.MustRegister(ServiceChangesTotal) }) } diff --git a/pkg/proxy/service.go b/pkg/proxy/service.go index eab394a9258..eecc643adb2 100644 --- a/pkg/proxy/service.go +++ b/pkg/proxy/service.go @@ -30,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" apiservice "k8s.io/kubernetes/pkg/api/v1/service" + "k8s.io/kubernetes/pkg/proxy/metrics" utilproxy "k8s.io/kubernetes/pkg/proxy/util" utilnet "k8s.io/utils/net" ) @@ -198,6 +199,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool { if svc == nil { return false } + metrics.ServiceChangesTotal.Inc() namespacedName := types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name} sct.lock.Lock() @@ -214,6 +216,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool { if reflect.DeepEqual(change.previous, change.current) { delete(sct.items, namespacedName) } + metrics.ServiceChangesPending.Set(float64(len(sct.items))) return len(sct.items) > 0 } @@ -296,6 +299,7 @@ func (sm *ServiceMap) apply(changes *ServiceChangeTracker, UDPStaleClusterIP set } // clear changes after applying them to ServiceMap. changes.items = make(map[types.NamespacedName]*serviceChange) + metrics.ServiceChangesPending.Set(0) return } diff --git a/pkg/proxy/winkernel/metrics.go b/pkg/proxy/winkernel/metrics.go index 61cf962ee0e..729cc5e626f 100644 --- a/pkg/proxy/winkernel/metrics.go +++ b/pkg/proxy/winkernel/metrics.go @@ -43,6 +43,16 @@ var ( Buckets: prometheus.ExponentialBuckets(1000, 2, 15), }, ) + + // SyncProxyRulesLastTimestamp is the timestamp proxy rules were last + // successfully synced. + SyncProxyRulesLastTimestamp = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_last_timestamp_seconds", + Help: "The last time proxy rules were successfully synced", + }, + ) ) var registerMetricsOnce sync.Once @@ -51,6 +61,7 @@ func RegisterMetrics() { registerMetricsOnce.Do(func() { prometheus.MustRegister(SyncProxyRulesLatency) prometheus.MustRegister(DeprecatedSyncProxyRulesLatency) + prometheus.MustRegister(SyncProxyRulesLastTimestamp) }) } diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go index 061b8922c0a..4817a4a261f 100644 --- a/pkg/proxy/winkernel/proxier.go +++ b/pkg/proxy/winkernel/proxier.go @@ -1197,6 +1197,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.UpdateTimestamp() } + SyncProxyRulesLastTimestamp.SetToCurrentTime() // Update healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the healthChecker