From 1dda3d8dfc44a2dc1b0f0d3563ce85c677fdb3bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janek=20=C5=81ukaszewicz?= Date: Tue, 16 Jul 2019 16:32:34 +0200 Subject: [PATCH 1/2] kube-proxy: change buckets used by NetworkProgrammingLatency refs https://github.com/kubernetes/perf-tests/issues/640 We have too fine buckets granularity for lower latencies, at cost of the higher latecies (7+ minutes). This is causing spikes in SLI calculated based on that metrics. I don't have strong opinion about actual values - those seemed to be better matching our need. But let's have discussion about them. Values: 0.015 s 0.030 s 0.060 s 0.120 s 0.240 s 0.480 s 0.960 s 1.920 s 3.840 s 7.680 s 15.360 s 30.720 s 61.440 s 122.880 s 245.760 s 491.520 s 983.040 s 1966.080 s 3932.160 s 7864.320 s --- pkg/proxy/metrics/metrics.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 54d7f0a4ca4..79a08d9da7c 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -68,9 +68,7 @@ var ( Subsystem: kubeProxySubsystem, Name: "network_programming_duration_seconds", Help: "In Cluster Network Programming Latency in seconds", - // TODO(mm4tt): Reevaluate buckets before 1.14 release. - // The last bucket will be [0.001s*2^20 ~= 17min, +inf) - Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), + Buckets: prometheus.ExponentialBuckets(0.015, 2, 20), }, ) From f9b683532a45955ddf5cec75b676603c31589588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janek=20=C5=81ukaszewicz?= Date: Fri, 26 Jul 2019 14:44:55 +0200 Subject: [PATCH 2/2] second iteration --- pkg/proxy/metrics/metrics.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 79a08d9da7c..c12a11d0320 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -68,7 +68,12 @@ var ( Subsystem: kubeProxySubsystem, Name: "network_programming_duration_seconds", Help: "In Cluster Network Programming Latency in seconds", - Buckets: prometheus.ExponentialBuckets(0.015, 2, 20), + Buckets: merge( + prometheus.LinearBuckets(0.25, 0.25, 2), // 0.25s, 0.50s + prometheus.LinearBuckets(1, 1, 59), // 1s, 2s, 3s, ... 59s + prometheus.LinearBuckets(60, 5, 12), // 60s, 65s, 70s, ... 115s + prometheus.LinearBuckets(120, 30, 7), // 2min, 2.5min, 3min, ..., 5min + ), }, ) @@ -138,3 +143,11 @@ func SinceInMicroseconds(start time.Time) float64 { func SinceInSeconds(start time.Time) float64 { return time.Since(start).Seconds() } + +func merge(slices ...[]float64) []float64 { + result := make([]float64, 1) + for _, s := range slices { + result = append(result, s...) + } + return result +}