From ba57fd7c84839591044ab64f51bd683813e23ad8 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 22 May 2023 16:29:01 -0400 Subject: [PATCH 1/5] Merge linux and windows kube-proxy metric registration together Windows proxy metric registration was in a separate file, which had led to some metrics (eg the new ProxyHealthzTotal and ProxyLivezTotal) not being registered for Windows even though they were implemented by platform-generic code. (A few other metrics were neither registered on, nor implemented on Windows, and that's probably a bug.) Also, beyond linux-vs-windows, make it clearer which metrics are specific to individual backends. --- cmd/kube-proxy/app/server.go | 3 ++ cmd/kube-proxy/app/server_linux.go | 2 -- cmd/kube-proxy/app/server_windows.go | 1 - pkg/proxy/healthcheck/healthcheck_test.go | 4 +-- pkg/proxy/iptables/proxier_test.go | 12 +++---- pkg/proxy/ipvs/proxier_test.go | 3 +- pkg/proxy/metrics/metrics.go | 37 +++++++++++++++------ pkg/proxy/nftables/proxier_test.go | 5 +-- pkg/proxy/winkernel/doc.go | 18 +++++++++++ pkg/proxy/winkernel/metrics.go | 39 ----------------------- 10 files changed, 61 insertions(+), 63 deletions(-) create mode 100644 pkg/proxy/winkernel/doc.go delete mode 100644 pkg/proxy/winkernel/metrics.go diff --git a/cmd/kube-proxy/app/server.go b/cmd/kube-proxy/app/server.go index c2245348415..3e132baf95f 100644 --- a/cmd/kube-proxy/app/server.go +++ b/cmd/kube-proxy/app/server.go @@ -80,6 +80,7 @@ import ( "k8s.io/kubernetes/pkg/proxy/apis/config/validation" "k8s.io/kubernetes/pkg/proxy/config" "k8s.io/kubernetes/pkg/proxy/healthcheck" + proxymetrics "k8s.io/kubernetes/pkg/proxy/metrics" proxyutil "k8s.io/kubernetes/pkg/proxy/util" "k8s.io/kubernetes/pkg/util/filesystem" utilflag "k8s.io/kubernetes/pkg/util/flag" @@ -919,6 +920,8 @@ func (s *ProxyServer) Run(ctx context.Context) error { logger.Info("Golang settings", "GOGC", os.Getenv("GOGC"), "GOMAXPROCS", os.Getenv("GOMAXPROCS"), "GOTRACEBACK", os.Getenv("GOTRACEBACK")) + proxymetrics.RegisterMetrics(s.Config.Mode) + // TODO(vmarmol): Use container config for this. var oomAdjuster *oom.OOMAdjuster if s.Config.OOMScoreAdj != nil { diff --git a/cmd/kube-proxy/app/server_linux.go b/cmd/kube-proxy/app/server_linux.go index adeb91c75a6..395578c270d 100644 --- a/cmd/kube-proxy/app/server_linux.go +++ b/cmd/kube-proxy/app/server_linux.go @@ -50,7 +50,6 @@ import ( "k8s.io/kubernetes/pkg/proxy/ipvs" utilipset "k8s.io/kubernetes/pkg/proxy/ipvs/ipset" utilipvs "k8s.io/kubernetes/pkg/proxy/ipvs/util" - proxymetrics "k8s.io/kubernetes/pkg/proxy/metrics" "k8s.io/kubernetes/pkg/proxy/nftables" proxyutil "k8s.io/kubernetes/pkg/proxy/util" proxyutiliptables "k8s.io/kubernetes/pkg/proxy/util/iptables" @@ -101,7 +100,6 @@ func (s *ProxyServer) platformSetup(ctx context.Context) error { return err } - proxymetrics.RegisterMetrics() return nil } diff --git a/cmd/kube-proxy/app/server_windows.go b/cmd/kube-proxy/app/server_windows.go index 7cfb9c3f99f..4f34e7e2947 100644 --- a/cmd/kube-proxy/app/server_windows.go +++ b/cmd/kube-proxy/app/server_windows.go @@ -51,7 +51,6 @@ func (o *Options) platformApplyDefaults(config *proxyconfigapi.KubeProxyConfigur // Proxier. It should fill in any platform-specific fields and perform other // platform-specific setup. func (s *ProxyServer) platformSetup(ctx context.Context) error { - winkernel.RegisterMetrics() // Preserve backward-compatibility with the old secondary IP behavior if s.PrimaryIPFamily == v1.IPv4Protocol { s.NodeIPs[v1.IPv6Protocol] = net.IPv6zero diff --git a/pkg/proxy/healthcheck/healthcheck_test.go b/pkg/proxy/healthcheck/healthcheck_test.go index c1c451ef8c2..a3047a1829d 100644 --- a/pkg/proxy/healthcheck/healthcheck_test.go +++ b/pkg/proxy/healthcheck/healthcheck_test.go @@ -465,7 +465,7 @@ type serverTest struct { } func TestHealthzServer(t *testing.T) { - metrics.RegisterMetrics() + metrics.RegisterMetrics("") listener := newFakeListener() httpFactory := newFakeHTTPServerFactory() fakeClock := testingclock.NewFakeClock(time.Now()) @@ -500,7 +500,7 @@ func TestHealthzServer(t *testing.T) { } func TestLivezServer(t *testing.T) { - metrics.RegisterMetrics() + metrics.RegisterMetrics("") listener := newFakeListener() httpFactory := newFakeHTTPServerFactory() fakeClock := testingclock.NewFakeClock(time.Now()) diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 24edaf19a2e..52dbf8c7c11 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -49,10 +49,10 @@ import ( klogtesting "k8s.io/klog/v2/ktesting" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/proxy" + kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config" "k8s.io/kubernetes/pkg/proxy/conntrack" - "k8s.io/kubernetes/pkg/proxy/metrics" - "k8s.io/kubernetes/pkg/proxy/healthcheck" + "k8s.io/kubernetes/pkg/proxy/metrics" proxyutil "k8s.io/kubernetes/pkg/proxy/util" proxyutiliptables "k8s.io/kubernetes/pkg/proxy/util/iptables" proxyutiltest "k8s.io/kubernetes/pkg/proxy/util/testing" @@ -1544,7 +1544,7 @@ func TestOverallIPTablesRules(t *testing.T) { logger, _ := klogtesting.NewTestContext(t) ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt) - metrics.RegisterMetrics() + metrics.RegisterMetrics(kubeproxyconfig.ProxyModeIPTables) makeServiceMap(fp, // create ClusterIP service @@ -4148,7 +4148,7 @@ func TestProxierMetricsIptablesTotalRules(t *testing.T) { ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt) - metrics.RegisterMetrics() + metrics.RegisterMetrics(kubeproxyconfig.ProxyModeIPTables) svcIP := "172.30.0.41" svcPort := 80 @@ -5828,7 +5828,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { logger, _ := klogtesting.NewTestContext(t) ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt) - metrics.RegisterMetrics() + metrics.RegisterMetrics(kubeproxyconfig.ProxyModeIPTables) defer legacyregistry.Reset() // Create initial state @@ -6472,7 +6472,7 @@ func TestNoEndpointsMetric(t *testing.T) { hostname string } - metrics.RegisterMetrics() + metrics.RegisterMetrics(kubeproxyconfig.ProxyModeIPTables) testCases := []struct { name string internalTrafficPolicy *v1.ServiceInternalTrafficPolicy diff --git a/pkg/proxy/ipvs/proxier_test.go b/pkg/proxy/ipvs/proxier_test.go index 96c3306daac..4ee158fe448 100644 --- a/pkg/proxy/ipvs/proxier_test.go +++ b/pkg/proxy/ipvs/proxier_test.go @@ -42,6 +42,7 @@ import ( "k8s.io/component-base/metrics/testutil" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/proxy" + kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config" "k8s.io/kubernetes/pkg/proxy/conntrack" "k8s.io/kubernetes/pkg/proxy/healthcheck" utilipset "k8s.io/kubernetes/pkg/proxy/ipvs/ipset" @@ -5616,7 +5617,7 @@ func TestNoEndpointsMetric(t *testing.T) { hostname string } - metrics.RegisterMetrics() + metrics.RegisterMetrics(kubeproxyconfig.ProxyModeIPVS) testCases := []struct { name string diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 19fb8923cf2..43618e1670f 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -22,6 +22,7 @@ import ( "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" + kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config" ) const kubeProxySubsystem = "kubeproxy" @@ -237,26 +238,42 @@ var ( var registerMetricsOnce sync.Once // RegisterMetrics registers kube-proxy metrics. -func RegisterMetrics() { +func RegisterMetrics(mode kubeproxyconfig.ProxyMode) { registerMetricsOnce.Do(func() { + // Core kube-proxy metrics for all backends legacyregistry.MustRegister(SyncProxyRulesLatency) - legacyregistry.MustRegister(SyncFullProxyRulesLatency) - legacyregistry.MustRegister(SyncPartialProxyRulesLatency) + legacyregistry.MustRegister(SyncProxyRulesLastQueuedTimestamp) legacyregistry.MustRegister(SyncProxyRulesLastTimestamp) - legacyregistry.MustRegister(NetworkProgrammingLatency) legacyregistry.MustRegister(EndpointChangesPending) legacyregistry.MustRegister(EndpointChangesTotal) legacyregistry.MustRegister(ServiceChangesPending) legacyregistry.MustRegister(ServiceChangesTotal) - legacyregistry.MustRegister(IptablesRulesTotal) - legacyregistry.MustRegister(IptablesRulesLastSync) - legacyregistry.MustRegister(IptablesRestoreFailuresTotal) - legacyregistry.MustRegister(IptablesPartialRestoreFailuresTotal) - legacyregistry.MustRegister(SyncProxyRulesLastQueuedTimestamp) - legacyregistry.MustRegister(SyncProxyRulesNoLocalEndpointsTotal) legacyregistry.MustRegister(ProxyHealthzTotal) legacyregistry.MustRegister(ProxyLivezTotal) + // FIXME: winkernel does not implement these + legacyregistry.MustRegister(NetworkProgrammingLatency) + legacyregistry.MustRegister(SyncProxyRulesNoLocalEndpointsTotal) + + switch mode { + case kubeproxyconfig.ProxyModeIPTables: + legacyregistry.MustRegister(SyncFullProxyRulesLatency) + legacyregistry.MustRegister(SyncPartialProxyRulesLatency) + legacyregistry.MustRegister(IptablesRestoreFailuresTotal) + legacyregistry.MustRegister(IptablesPartialRestoreFailuresTotal) + legacyregistry.MustRegister(IptablesRulesTotal) + legacyregistry.MustRegister(IptablesRulesLastSync) + + case kubeproxyconfig.ProxyModeIPVS: + legacyregistry.MustRegister(IptablesRestoreFailuresTotal) + + case kubeproxyconfig.ProxyModeNFTables: + // FIXME: should not use the iptables-specific metric + legacyregistry.MustRegister(IptablesRestoreFailuresTotal) + + case kubeproxyconfig.ProxyModeKernelspace: + // currently no winkernel-specific metrics + } }) } diff --git a/pkg/proxy/nftables/proxier_test.go b/pkg/proxy/nftables/proxier_test.go index a4754cb2154..ec063fdbc25 100644 --- a/pkg/proxy/nftables/proxier_test.go +++ b/pkg/proxy/nftables/proxier_test.go @@ -40,6 +40,7 @@ import ( "k8s.io/component-base/metrics/testutil" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/proxy" + kubeproxyconfig "k8s.io/kubernetes/pkg/proxy/apis/config" "k8s.io/kubernetes/pkg/proxy/conntrack" "k8s.io/kubernetes/pkg/proxy/healthcheck" "k8s.io/kubernetes/pkg/proxy/metrics" @@ -141,7 +142,7 @@ func NewFakeProxier(ipFamily v1.IPFamily) (*knftables.Fake, *Proxier) { // rules are exactly as expected. func TestOverallNFTablesRules(t *testing.T) { nft, fp := NewFakeProxier(v1.IPv4Protocol) - metrics.RegisterMetrics() + metrics.RegisterMetrics(kubeproxyconfig.ProxyModeNFTables) makeServiceMap(fp, // create ClusterIP service @@ -4455,7 +4456,7 @@ func TestNoEndpointsMetric(t *testing.T) { hostname string } - metrics.RegisterMetrics() + metrics.RegisterMetrics(kubeproxyconfig.ProxyModeNFTables) testCases := []struct { name string internalTrafficPolicy *v1.ServiceInternalTrafficPolicy diff --git a/pkg/proxy/winkernel/doc.go b/pkg/proxy/winkernel/doc.go new file mode 100644 index 00000000000..9d4f4d013fe --- /dev/null +++ b/pkg/proxy/winkernel/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package winkernel implements the Windows-kernel-based proxy +package winkernel // import "k8s.io/kubernetes/pkg/proxy/winkernel" diff --git a/pkg/proxy/winkernel/metrics.go b/pkg/proxy/winkernel/metrics.go deleted file mode 100644 index e4357e4eceb..00000000000 --- a/pkg/proxy/winkernel/metrics.go +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package winkernel - -import ( - "sync" - - "k8s.io/component-base/metrics/legacyregistry" - "k8s.io/kubernetes/pkg/proxy/metrics" -) - -var registerMetricsOnce sync.Once - -// RegisterMetrics registers kube-proxy metrics for Windows modes. -func RegisterMetrics() { - registerMetricsOnce.Do(func() { - legacyregistry.MustRegister(metrics.SyncProxyRulesLatency) - legacyregistry.MustRegister(metrics.SyncProxyRulesLastTimestamp) - legacyregistry.MustRegister(metrics.EndpointChangesPending) - legacyregistry.MustRegister(metrics.EndpointChangesTotal) - legacyregistry.MustRegister(metrics.ServiceChangesPending) - legacyregistry.MustRegister(metrics.ServiceChangesTotal) - legacyregistry.MustRegister(metrics.SyncProxyRulesLastQueuedTimestamp) - }) -} From 1823de063b61b7c200d20c7e3e153a6f686c95ca Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 25 Dec 2023 10:01:02 -0500 Subject: [PATCH 2/5] fix "Iptables" -> "IPTables" in metrics variable names --- pkg/proxy/iptables/proxier.go | 16 ++++++++-------- pkg/proxy/iptables/proxier_test.go | 10 +++++----- pkg/proxy/ipvs/proxier.go | 2 +- pkg/proxy/metrics/metrics.go | 28 ++++++++++++++-------------- pkg/proxy/nftables/proxier.go | 2 +- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 4d1c52e8e87..13b49d8d776 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -451,7 +451,7 @@ func CleanupLeftovers(ctx context.Context, ipt utiliptables.Interface) (encounte err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) if err != nil { logger.Error(err, "Failed to execute iptables-restore", "table", utiliptables.TableNAT) - metrics.IptablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.Inc() encounteredError = true } } @@ -478,7 +478,7 @@ func CleanupLeftovers(ctx context.Context, ipt utiliptables.Interface) (encounte // Write it. if err := ipt.Restore(utiliptables.TableFilter, filterLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil { logger.Error(err, "Failed to execute iptables-restore", "table", utiliptables.TableFilter) - metrics.IptablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.Inc() encounteredError = true } } @@ -818,7 +818,7 @@ func (proxier *Proxier) syncProxyRules() { proxier.logger.Info("Sync failed", "retryingTime", proxier.syncPeriod) proxier.syncRunner.RetryAfter(proxier.syncPeriod) if tryPartialSync { - metrics.IptablesPartialRestoreFailuresTotal.Inc() + metrics.IPTablesPartialRestoreFailuresTotal.Inc() } // proxier.serviceChanges and proxier.endpointChanges have already // been flushed, so we've lost the state needed to be able to do @@ -1483,10 +1483,10 @@ func (proxier *Proxier) syncProxyRules() { "-j", "ACCEPT", ) - metrics.IptablesRulesTotal.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines())) - metrics.IptablesRulesLastSync.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines())) - metrics.IptablesRulesTotal.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() + skippedNatRules.Lines() - deletedChains)) - metrics.IptablesRulesLastSync.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() - deletedChains)) + metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines())) + metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines())) + metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() + skippedNatRules.Lines() - deletedChains)) + metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() - deletedChains)) // Sync rules. proxier.iptablesData.Reset() @@ -1518,7 +1518,7 @@ func (proxier *Proxier) syncProxyRules() { } else { proxier.logger.Error(err, "Failed to execute iptables-restore") } - metrics.IptablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.Inc() return } success = true diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 52dbf8c7c11..3076504418b 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -409,7 +409,7 @@ func countRules(logger klog.Logger, tableName utiliptables.Table, ruleData strin } func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table) int { - numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IptablesRulesTotal.WithLabelValues(string(tableName))) + numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesTotal.WithLabelValues(string(tableName))) if err != nil { logger.Error(err, "metrics are not registered?") return -1 @@ -418,7 +418,7 @@ func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table) int } func countRulesFromLastSyncMetric(logger klog.Logger, tableName utiliptables.Table) int { - numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IptablesRulesLastSync.WithLabelValues(string(tableName))) + numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesLastSync.WithLabelValues(string(tableName))) if err != nil { logger.Error(err, "metrics are not registered?") return -1 @@ -4143,7 +4143,7 @@ func TestHealthCheckNodePortWhenTerminating(t *testing.T) { } } -func TestProxierMetricsIptablesTotalRules(t *testing.T) { +func TestProxierMetricsIPTablesTotalRules(t *testing.T) { logger, _ := klogtesting.NewTestContext(t) ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt) @@ -6357,7 +6357,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { if fp.needFullSync { t.Fatalf("Proxier unexpectedly already needs a full sync?") } - partialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IptablesPartialRestoreFailuresTotal) + partialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal) if err != nil { t.Fatalf("Could not get partial restore failures metric: %v", err) } @@ -6391,7 +6391,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { if !fp.needFullSync { t.Errorf("Proxier did not fail on previous partial resync?") } - updatedPartialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IptablesPartialRestoreFailuresTotal) + updatedPartialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal) if err != nil { t.Errorf("Could not get partial restore failures metric: %v", err) } diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index c11fbb6ab8d..a987e9b7ad2 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -1445,7 +1445,7 @@ func (proxier *Proxier) syncProxyRules() { } else { proxier.logger.Error(err, "Failed to execute iptables-restore", "rules", proxier.iptablesData.Bytes()) } - metrics.IptablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.Inc() return } for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes { diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 43618e1670f..0a59bd80c80 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -139,9 +139,9 @@ var ( }, ) - // IptablesRestoreFailuresTotal is the number of iptables restore failures that the proxy has + // IPTablesRestoreFailuresTotal is the number of iptables restore failures that the proxy has // seen. - IptablesRestoreFailuresTotal = metrics.NewCounter( + IPTablesRestoreFailuresTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_iptables_restore_failures_total", @@ -150,9 +150,9 @@ var ( }, ) - // IptablesPartialRestoreFailuresTotal is the number of iptables *partial* restore + // IPTablesPartialRestoreFailuresTotal is the number of iptables *partial* restore // failures (resulting in a fall back to a full restore) that the proxy has seen. - IptablesPartialRestoreFailuresTotal = metrics.NewCounter( + IPTablesPartialRestoreFailuresTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_iptables_partial_restore_failures_total", @@ -161,9 +161,9 @@ var ( }, ) - // IptablesRulesTotal is the total number of iptables rules that the iptables + // IPTablesRulesTotal is the total number of iptables rules that the iptables // proxy has installed. - IptablesRulesTotal = metrics.NewGaugeVec( + IPTablesRulesTotal = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_iptables_total", @@ -173,9 +173,9 @@ var ( []string{"table"}, ) - // IptablesRulesLastSync is the number of iptables rules that the iptables proxy + // IPTablesRulesLastSync is the number of iptables rules that the iptables proxy // updated in the last sync. - IptablesRulesLastSync = metrics.NewGaugeVec( + IPTablesRulesLastSync = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_iptables_last", @@ -259,17 +259,17 @@ func RegisterMetrics(mode kubeproxyconfig.ProxyMode) { case kubeproxyconfig.ProxyModeIPTables: legacyregistry.MustRegister(SyncFullProxyRulesLatency) legacyregistry.MustRegister(SyncPartialProxyRulesLatency) - legacyregistry.MustRegister(IptablesRestoreFailuresTotal) - legacyregistry.MustRegister(IptablesPartialRestoreFailuresTotal) - legacyregistry.MustRegister(IptablesRulesTotal) - legacyregistry.MustRegister(IptablesRulesLastSync) + legacyregistry.MustRegister(IPTablesRestoreFailuresTotal) + legacyregistry.MustRegister(IPTablesPartialRestoreFailuresTotal) + legacyregistry.MustRegister(IPTablesRulesTotal) + legacyregistry.MustRegister(IPTablesRulesLastSync) case kubeproxyconfig.ProxyModeIPVS: - legacyregistry.MustRegister(IptablesRestoreFailuresTotal) + legacyregistry.MustRegister(IPTablesRestoreFailuresTotal) case kubeproxyconfig.ProxyModeNFTables: // FIXME: should not use the iptables-specific metric - legacyregistry.MustRegister(IptablesRestoreFailuresTotal) + legacyregistry.MustRegister(IPTablesRestoreFailuresTotal) case kubeproxyconfig.ProxyModeKernelspace: // currently no winkernel-specific metrics diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index 31b7c77e1be..0db33256a04 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -1620,7 +1620,7 @@ func (proxier *Proxier) syncProxyRules() { err = proxier.nftables.Run(context.TODO(), tx) if err != nil { proxier.logger.Error(err, "nftables sync failed") - metrics.IptablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.Inc() return } success = true From fc05a294cca75c1660220a7e1f4c58fdd048880b Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 25 Apr 2024 18:40:58 -0400 Subject: [PATCH 3/5] Rename nftables sync failure metric --- pkg/proxy/metrics/metrics.go | 14 ++++++++++++-- pkg/proxy/nftables/proxier.go | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 0a59bd80c80..45c196b63e6 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -185,6 +185,17 @@ var ( []string{"table"}, ) + // NFTablesSyncFailuresTotal is the number of nftables sync failures that the + // proxy has seen. + NFTablesSyncFailuresTotal = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_nftables_sync_failures_total", + Help: "Cumulative proxy nftables sync failures", + StabilityLevel: metrics.ALPHA, + }, + ) + // ProxyHealthzTotal is the number of returned HTTP Status for each // healthz probe. ProxyHealthzTotal = metrics.NewCounterVec( @@ -268,8 +279,7 @@ func RegisterMetrics(mode kubeproxyconfig.ProxyMode) { legacyregistry.MustRegister(IPTablesRestoreFailuresTotal) case kubeproxyconfig.ProxyModeNFTables: - // FIXME: should not use the iptables-specific metric - legacyregistry.MustRegister(IPTablesRestoreFailuresTotal) + legacyregistry.MustRegister(NFTablesSyncFailuresTotal) case kubeproxyconfig.ProxyModeKernelspace: // currently no winkernel-specific metrics diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index 0db33256a04..c921ccabccf 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -1620,7 +1620,7 @@ func (proxier *Proxier) syncProxyRules() { err = proxier.nftables.Run(context.TODO(), tx) if err != nil { proxier.logger.Error(err, "nftables sync failed") - metrics.IPTablesRestoreFailuresTotal.Inc() + metrics.NFTablesSyncFailuresTotal.Inc() return } success = true From d4e6e62134aea2112f8ef779da0b60989149e58e Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Fri, 26 Apr 2024 07:19:24 -0400 Subject: [PATCH 4/5] Add nftables cleanup failure metric, fix cleanup bug If the sync fails, don't try to cleanup, since it's guaranteed to fail too. --- pkg/proxy/metrics/metrics.go | 12 ++++++++++++ pkg/proxy/nftables/proxier.go | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 45c196b63e6..a40cfb9f564 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -196,6 +196,17 @@ var ( }, ) + // NFTablesCleanupFailuresTotal is the number of nftables stale chain cleanup + // failures that the proxy has seen. + NFTablesCleanupFailuresTotal = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_nftables_cleanup_failures_total", + Help: "Cumulative proxy nftables cleanup failures", + StabilityLevel: metrics.ALPHA, + }, + ) + // ProxyHealthzTotal is the number of returned HTTP Status for each // healthz probe. ProxyHealthzTotal = metrics.NewCounterVec( @@ -280,6 +291,7 @@ func RegisterMetrics(mode kubeproxyconfig.ProxyMode) { case kubeproxyconfig.ProxyModeNFTables: legacyregistry.MustRegister(NFTablesSyncFailuresTotal) + legacyregistry.MustRegister(NFTablesCleanupFailuresTotal) case kubeproxyconfig.ProxyModeKernelspace: // currently no winkernel-specific metrics diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index c921ccabccf..b2b7accba21 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -1033,7 +1033,7 @@ func (proxier *Proxier) syncProxyRules() { // the chains still exist, they'll just get added back // (with a later timestamp) at the end of the sync. proxier.logger.Error(err, "Unable to delete stale chains; will retry later") - // FIXME: metric + metrics.NFTablesCleanupFailuresTotal.Inc() } } } @@ -1621,6 +1621,10 @@ func (proxier *Proxier) syncProxyRules() { if err != nil { proxier.logger.Error(err, "nftables sync failed") metrics.NFTablesSyncFailuresTotal.Inc() + + // staleChains is now incorrect since we didn't actually flush the + // chains in it. We can recompute it next time. + clear(proxier.staleChains) return } success = true From c4dd2c5ad71997be162e0a3f0c401d417bf2c25b Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 25 Apr 2024 21:41:52 -0400 Subject: [PATCH 5/5] Re-enable V(9) transaction logging in nftables proxy --- pkg/proxy/nftables/proxier.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index b2b7accba21..e86c77aa555 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -1614,8 +1614,9 @@ func (proxier *Proxier) syncProxyRules() { "numEndpoints", totalEndpoints, ) - // FIXME - // klog.V(9).InfoS("Running nftables transaction", "transaction", tx.Bytes()) + if klogV9 := klog.V(9); klogV9.Enabled() { + klogV9.InfoS("Running nftables transaction", "transaction", tx.String()) + } err = proxier.nftables.Run(context.TODO(), tx) if err != nil {