diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 086c4f30bb4..4feba919afa 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -466,7 +466,7 @@ func CleanupLeftovers(ctx context.Context, ipt utiliptables.Interface) (encounte err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters) if err != nil { logger.Error(err, "Failed to execute iptables-restore", "table", utiliptables.TableNAT) - metrics.IPTablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(ipt.Protocol())).Inc() encounteredError = true } } @@ -493,7 +493,7 @@ func CleanupLeftovers(ctx context.Context, ipt utiliptables.Interface) (encounte // Write it. if err := ipt.Restore(utiliptables.TableFilter, filterLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil { logger.Error(err, "Failed to execute iptables-restore", "table", utiliptables.TableFilter) - metrics.IPTablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(ipt.Protocol())).Inc() encounteredError = true } } @@ -527,7 +527,7 @@ func (proxier *Proxier) Sync() { if proxier.healthzServer != nil { proxier.healthzServer.QueuedUpdate(proxier.ipFamily) } - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Run() } @@ -539,7 +539,7 @@ func (proxier *Proxier) SyncLoop() { } // synthesize "last change queued" time as the informers are syncing. - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Loop(wait.NeverStop) } @@ -813,11 +813,11 @@ func (proxier *Proxier) syncProxyRules() { // Keep track of how long syncs take. start := time.Now() defer func() { - metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) if tryPartialSync { - metrics.SyncPartialProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncPartialProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) } else { - metrics.SyncFullProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncFullProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) } proxier.logger.V(2).Info("SyncProxyRules complete", "elapsed", time.Since(start)) }() @@ -833,7 +833,7 @@ func (proxier *Proxier) syncProxyRules() { proxier.logger.Info("Sync failed", "retryingTime", proxier.syncPeriod) proxier.syncRunner.RetryAfter(proxier.syncPeriod) if tryPartialSync { - metrics.IPTablesPartialRestoreFailuresTotal.Inc() + metrics.IPTablesPartialRestoreFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc() } // proxier.serviceChanges and proxier.endpointChanges have already // been flushed, so we've lost the state needed to be able to do @@ -1528,10 +1528,10 @@ func (proxier *Proxier) syncProxyRules() { "-j", "ACCEPT", ) - metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines())) - metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines())) - metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() + skippedNatRules.Lines() - deletedChains)) - metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() - deletedChains)) + metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableFilter), string(proxier.ipFamily)).Set(float64(proxier.filterRules.Lines())) + metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableFilter), string(proxier.ipFamily)).Set(float64(proxier.filterRules.Lines())) + metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableNAT), string(proxier.ipFamily)).Set(float64(proxier.natRules.Lines() + skippedNatRules.Lines() - deletedChains)) + metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableNAT), string(proxier.ipFamily)).Set(float64(proxier.natRules.Lines() - deletedChains)) // Sync rules. proxier.iptablesData.Reset() @@ -1563,7 +1563,7 @@ func (proxier *Proxier) syncProxyRules() { } else { proxier.logger.Error(err, "Failed to execute iptables-restore") } - metrics.IPTablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc() return } success = true @@ -1572,17 +1572,17 @@ func (proxier *Proxier) syncProxyRules() { for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes { for _, lastChangeTriggerTime := range lastChangeTriggerTimes { latency := metrics.SinceInSeconds(lastChangeTriggerTime) - metrics.NetworkProgrammingLatency.Observe(latency) + metrics.NetworkProgrammingLatency.WithLabelValues(string(proxier.ipFamily)).Observe(latency) proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency) } } - metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(serviceNoLocalEndpointsTotalInternal)) - metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(serviceNoLocalEndpointsTotalExternal)) + metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalInternal)) + metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalExternal)) if proxier.healthzServer != nil { proxier.healthzServer.Updated(proxier.ipFamily) } - metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() // Update service healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the serviceHealthServer diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index b42490764a8..3d7bb161b22 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -413,8 +413,8 @@ func countRules(logger klog.Logger, tableName utiliptables.Table, ruleData strin return rules } -func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table) int { - numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesTotal.WithLabelValues(string(tableName))) +func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table, ipFamily string) int { + numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesTotal.WithLabelValues(string(tableName), ipFamily)) if err != nil { logger.Error(err, "metrics are not registered?") return -1 @@ -422,8 +422,8 @@ func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table) int return int(numRulesFloat) } -func countRulesFromLastSyncMetric(logger klog.Logger, tableName utiliptables.Table) int { - numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesLastSync.WithLabelValues(string(tableName))) +func countRulesFromLastSyncMetric(logger klog.Logger, tableName utiliptables.Table, ipFamily string) int { + numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesLastSync.WithLabelValues(string(tableName), ipFamily)) if err != nil { logger.Error(err, "metrics are not registered?") return -1 @@ -1809,7 +1809,7 @@ func TestOverallIPTablesRules(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), true, expected, fp.iptablesData.String()) - nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT) + nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) expectedNatRules := countRules(logger, utiliptables.TableNAT, fp.iptablesData.String()) if nNatRules != expectedNatRules { @@ -4075,14 +4075,14 @@ func TestProxierMetricsIPTablesTotalRules(t *testing.T) { fp.syncProxyRules() iptablesData := fp.iptablesData.String() - nFilterRules := countRulesFromMetric(logger, utiliptables.TableFilter) + nFilterRules := countRulesFromMetric(logger, utiliptables.TableFilter, string(fp.ipFamily)) expectedFilterRules := countRules(logger, utiliptables.TableFilter, iptablesData) if nFilterRules != expectedFilterRules { t.Fatalf("Wrong number of filter rule: expected %d got %d\n%s", expectedFilterRules, nFilterRules, iptablesData) } - nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT) + nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) expectedNatRules := countRules(logger, utiliptables.TableNAT, iptablesData) if nNatRules != expectedNatRules { @@ -4108,14 +4108,14 @@ func TestProxierMetricsIPTablesTotalRules(t *testing.T) { fp.syncProxyRules() iptablesData = fp.iptablesData.String() - nFilterRules = countRulesFromMetric(logger, utiliptables.TableFilter) + nFilterRules = countRulesFromMetric(logger, utiliptables.TableFilter, string(fp.ipFamily)) expectedFilterRules = countRules(logger, utiliptables.TableFilter, iptablesData) if nFilterRules != expectedFilterRules { t.Fatalf("Wrong number of filter rule: expected %d got %d\n%s", expectedFilterRules, nFilterRules, iptablesData) } - nNatRules = countRulesFromMetric(logger, utiliptables.TableNAT) + nNatRules = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) expectedNatRules = countRules(logger, utiliptables.TableNAT, iptablesData) if nNatRules != expectedNatRules { @@ -5827,13 +5827,13 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), true, expected, fp.iptablesData.String()) rulesSynced := countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric := countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric := countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } rulesTotal := rulesSynced - rulesTotalMetric := countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric := countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -5905,7 +5905,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } @@ -5913,7 +5913,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { // We added 1 KUBE-SERVICES rule, 2 KUBE-SVC-X27LE4BHSL4DOUIK rules, and 2 // KUBE-SEP-BSWRHOQ77KEXZLNL rules. rulesTotal += 5 - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -5956,7 +5956,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } @@ -5964,7 +5964,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { // We deleted 1 KUBE-SERVICES rule, 2 KUBE-SVC-2VJB64SDSIJUP5T6 rules, and 2 // KUBE-SEP-UHEGFW77JX3KXTOV rules rulesTotal -= 5 - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -6016,14 +6016,14 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } // The REJECT rule is in "filter", not NAT, so the number of NAT rules hasn't // changed. - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -6079,7 +6079,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } @@ -6087,7 +6087,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { // We added 1 KUBE-SERVICES rule, 2 KUBE-SVC-4SW47YFZTEDKD3PK rules, and // 2 KUBE-SEP-AYCN5HPXMIRJNJXU rules rulesTotal += 5 - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -6138,13 +6138,13 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } // We rewrote existing rules but did not change the overall number of rules. - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -6196,7 +6196,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } @@ -6205,7 +6205,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { // jumping to the new SEP chain. The other rules related to svc3 got rewritten, // but that does not change the count of rules. rulesTotal += 3 - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -6244,13 +6244,13 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } // (No changes) - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -6259,7 +6259,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { if fp.needFullSync { t.Fatalf("Proxier unexpectedly already needs a full sync?") } - partialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal) + partialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal.WithLabelValues(string(fp.ipFamily))) if err != nil { t.Fatalf("Could not get partial restore failures metric: %v", err) } @@ -6293,7 +6293,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { if !fp.needFullSync { t.Errorf("Proxier did not fail on previous partial resync?") } - updatedPartialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal) + updatedPartialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal.WithLabelValues(string(fp.ipFamily))) if err != nil { t.Errorf("Could not get partial restore failures metric: %v", err) } @@ -6354,7 +6354,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String()) rulesSynced = countRules(logger, utiliptables.TableNAT, expected) - rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT) + rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesSyncedMetric != rulesSynced { t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced) } @@ -6362,7 +6362,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { // We deleted 1 KUBE-SERVICES rule, 2 KUBE-SVC-4SW47YFZTEDKD3PK rules, and 2 // KUBE-SEP-AYCN5HPXMIRJNJXU rules rulesTotal -= 5 - rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT) + rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily)) if rulesTotalMetric != rulesTotal { t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal) } @@ -6504,7 +6504,7 @@ func TestNoEndpointsMetric(t *testing.T) { fp.OnEndpointSliceAdd(endpointSlice) fp.syncProxyRules() - syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal")) + syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(fp.ipFamily))) if err != nil { t.Errorf("failed to get %s value, err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err) } @@ -6513,7 +6513,7 @@ func TestNoEndpointsMetric(t *testing.T) { t.Errorf("sync_proxy_rules_no_endpoints_total metric mismatch(internal): got=%d, expected %d", int(syncProxyRulesNoLocalEndpointsTotalInternal), tc.expectedSyncProxyRulesNoLocalEndpointsTotalInternal) } - syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external")) + syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(fp.ipFamily))) if err != nil { t.Errorf("failed to get %s value(external), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err) } diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index f7c220e2def..43bb1776387 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -744,7 +744,7 @@ func (proxier *Proxier) Sync() { if proxier.healthzServer != nil { proxier.healthzServer.QueuedUpdate(proxier.ipFamily) } - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Run() } @@ -755,7 +755,7 @@ func (proxier *Proxier) SyncLoop() { proxier.healthzServer.Updated(proxier.ipFamily) } // synthesize "last change queued" time as the informers are syncing. - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Loop(wait.NeverStop) } @@ -925,7 +925,7 @@ func (proxier *Proxier) syncProxyRules() { // Keep track of how long syncs take. start := time.Now() defer func() { - metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) proxier.logger.V(4).Info("syncProxyRules complete", "elapsed", time.Since(start)) }() @@ -1444,13 +1444,13 @@ func (proxier *Proxier) syncProxyRules() { } else { proxier.logger.Error(err, "Failed to execute iptables-restore", "rules", proxier.iptablesData.Bytes()) } - metrics.IPTablesRestoreFailuresTotal.Inc() + metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc() return } for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes { for _, lastChangeTriggerTime := range lastChangeTriggerTimes { latency := metrics.SinceInSeconds(lastChangeTriggerTime) - metrics.NetworkProgrammingLatency.Observe(latency) + metrics.NetworkProgrammingLatency.WithLabelValues(string(proxier.ipFamily)).Observe(latency) proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency) } } @@ -1482,7 +1482,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.Updated(proxier.ipFamily) } - metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() // Update service healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the serviceHealthServer @@ -1494,8 +1494,8 @@ func (proxier *Proxier) syncProxyRules() { proxier.logger.Error(err, "Error syncing healthcheck endpoints") } - metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(proxier.serviceNoLocalEndpointsInternal.Len())) - metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(proxier.serviceNoLocalEndpointsExternal.Len())) + metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(proxier.ipFamily)).Set(float64(proxier.serviceNoLocalEndpointsInternal.Len())) + metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(proxier.ipFamily)).Set(float64(proxier.serviceNoLocalEndpointsExternal.Len())) if endpointUpdateResult.ConntrackCleanupRequired { // Finish housekeeping, clear stale conntrack entries for UDP Services diff --git a/pkg/proxy/ipvs/proxier_test.go b/pkg/proxy/ipvs/proxier_test.go index 1b54f9a285e..004b54f95ba 100644 --- a/pkg/proxy/ipvs/proxier_test.go +++ b/pkg/proxy/ipvs/proxier_test.go @@ -5620,7 +5620,7 @@ func TestNoEndpointsMetric(t *testing.T) { fp.OnEndpointSliceAdd(endpointSlice) fp.syncProxyRules() - syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal")) + syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(fp.ipFamily))) if err != nil { t.Errorf("failed to get %s value(internal), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err) } @@ -5629,7 +5629,7 @@ func TestNoEndpointsMetric(t *testing.T) { t.Errorf("sync_proxy_rules_no_endpoints_total metric mismatch(internal): got=%d, expected %d", int(syncProxyRulesNoLocalEndpointsTotalInternal), tc.expectedSyncProxyRulesNoLocalEndpointsTotalInternal) } - syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external")) + syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(fp.ipFamily))) if err != nil { t.Errorf("failed to get %s value(external), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err) } diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index d65e7c29d0f..8b3136ff057 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -32,7 +32,7 @@ const kubeProxySubsystem = "kubeproxy" var ( // SyncProxyRulesLatency is the latency of one round of kube-proxy syncing proxy // rules. (With the iptables proxy, this includes both full and partial syncs.) - SyncProxyRulesLatency = metrics.NewHistogram( + SyncProxyRulesLatency = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_duration_seconds", @@ -40,10 +40,11 @@ var ( Buckets: metrics.ExponentialBuckets(0.001, 2, 15), StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // SyncFullProxyRulesLatency is the latency of one round of full rule syncing. - SyncFullProxyRulesLatency = metrics.NewHistogram( + SyncFullProxyRulesLatency = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: kubeProxySubsystem, Name: "sync_full_proxy_rules_duration_seconds", @@ -51,10 +52,11 @@ var ( Buckets: metrics.ExponentialBuckets(0.001, 2, 15), StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // SyncPartialProxyRulesLatency is the latency of one round of partial rule syncing. - SyncPartialProxyRulesLatency = metrics.NewHistogram( + SyncPartialProxyRulesLatency = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: kubeProxySubsystem, Name: "sync_partial_proxy_rules_duration_seconds", @@ -62,17 +64,19 @@ var ( Buckets: metrics.ExponentialBuckets(0.001, 2, 15), StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // SyncProxyRulesLastTimestamp is the timestamp proxy rules were last // successfully synced. - SyncProxyRulesLastTimestamp = metrics.NewGauge( + SyncProxyRulesLastTimestamp = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_last_timestamp_seconds", Help: "The last time proxy rules were successfully synced", StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // NetworkProgrammingLatency is defined as the time it took to program the network - from the time @@ -82,7 +86,7 @@ var ( // Note that the metrics is partially based on the time exported by the endpoints controller on // the master machine. The measurement may be inaccurate if there is a clock drift between the // node and master machine. - NetworkProgrammingLatency = metrics.NewHistogram( + NetworkProgrammingLatency = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: kubeProxySubsystem, Name: "network_programming_duration_seconds", @@ -95,6 +99,7 @@ var ( ), StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // EndpointChangesPending is the number of pending endpoint changes that @@ -151,24 +156,26 @@ var ( // IPTablesRestoreFailuresTotal is the number of iptables restore failures that the proxy has // seen. - IPTablesRestoreFailuresTotal = metrics.NewCounter( + IPTablesRestoreFailuresTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_iptables_restore_failures_total", Help: "Cumulative proxy iptables restore failures", StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // IPTablesPartialRestoreFailuresTotal is the number of iptables *partial* restore // failures (resulting in a fall back to a full restore) that the proxy has seen. - IPTablesPartialRestoreFailuresTotal = metrics.NewCounter( + IPTablesPartialRestoreFailuresTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_iptables_partial_restore_failures_total", Help: "Cumulative proxy iptables partial restore failures", StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // IPTablesRulesTotal is the total number of iptables rules that the iptables @@ -180,7 +187,7 @@ var ( Help: "Total number of iptables rules owned by kube-proxy", StabilityLevel: metrics.ALPHA, }, - []string{"table"}, + []string{"table", "ip_family"}, ) // IPTablesRulesLastSync is the number of iptables rules that the iptables proxy @@ -192,29 +199,31 @@ var ( Help: "Number of iptables rules written by kube-proxy in last sync", StabilityLevel: metrics.ALPHA, }, - []string{"table"}, + []string{"table", "ip_family"}, ) // NFTablesSyncFailuresTotal is the number of nftables sync failures that the // proxy has seen. - NFTablesSyncFailuresTotal = metrics.NewCounter( + NFTablesSyncFailuresTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_nftables_sync_failures_total", Help: "Cumulative proxy nftables sync failures", StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // NFTablesCleanupFailuresTotal is the number of nftables stale chain cleanup // failures that the proxy has seen. - NFTablesCleanupFailuresTotal = metrics.NewCounter( + NFTablesCleanupFailuresTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_nftables_cleanup_failures_total", Help: "Cumulative proxy nftables cleanup failures", StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // ProxyHealthzTotal is the number of returned HTTP Status for each @@ -244,13 +253,14 @@ var ( // SyncProxyRulesLastQueuedTimestamp is the last time a proxy sync was // requested. If this is much larger than // kubeproxy_sync_proxy_rules_last_timestamp_seconds, then something is hung. - SyncProxyRulesLastQueuedTimestamp = metrics.NewGauge( + SyncProxyRulesLastQueuedTimestamp = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: kubeProxySubsystem, Name: "sync_proxy_rules_last_queued_timestamp_seconds", Help: "The last time a sync of proxy rules was queued", StabilityLevel: metrics.ALPHA, }, + []string{"ip_family"}, ) // SyncProxyRulesNoLocalEndpointsTotal is the total number of rules that do @@ -263,7 +273,7 @@ var ( Help: "Number of services with a Local traffic policy and no endpoints", StabilityLevel: metrics.ALPHA, }, - []string{"traffic_policy"}, + []string{"traffic_policy", "ip_family"}, ) // localhostNodePortsAcceptedPacketsDescription describe the metrics for the number of packets accepted diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index 538869cb061..06b347bd532 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -743,7 +743,7 @@ func (proxier *Proxier) Sync() { if proxier.healthzServer != nil { proxier.healthzServer.QueuedUpdate(proxier.ipFamily) } - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Run() } @@ -755,7 +755,7 @@ func (proxier *Proxier) SyncLoop() { } // synthesize "last change queued" time as the informers are syncing. - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Loop(wait.NeverStop) } @@ -1159,11 +1159,11 @@ func (proxier *Proxier) syncProxyRules() { // Keep track of how long syncs take. start := time.Now() defer func() { - metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) if tryPartialSync { - metrics.SyncPartialProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncPartialProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) } else { - metrics.SyncFullProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncFullProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) } proxier.logger.V(2).Info("SyncProxyRules complete", "elapsed", time.Since(start)) }() @@ -1208,7 +1208,7 @@ func (proxier *Proxier) syncProxyRules() { // the chains still exist, they'll just get added back // (with a later timestamp) at the end of the sync. proxier.logger.Error(err, "Unable to delete stale chains; will retry later") - metrics.NFTablesCleanupFailuresTotal.Inc() + metrics.NFTablesCleanupFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc() } } } @@ -1803,7 +1803,7 @@ func (proxier *Proxier) syncProxyRules() { err = proxier.nftables.Run(context.TODO(), tx) if err != nil { proxier.logger.Error(err, "nftables sync failed") - metrics.NFTablesSyncFailuresTotal.Inc() + metrics.NFTablesSyncFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc() // staleChains is now incorrect since we didn't actually flush the // chains in it. We can recompute it next time. @@ -1816,17 +1816,17 @@ func (proxier *Proxier) syncProxyRules() { for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes { for _, lastChangeTriggerTime := range lastChangeTriggerTimes { latency := metrics.SinceInSeconds(lastChangeTriggerTime) - metrics.NetworkProgrammingLatency.Observe(latency) + metrics.NetworkProgrammingLatency.WithLabelValues(string(proxier.ipFamily)).Observe(latency) proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency) } } - metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(serviceNoLocalEndpointsTotalInternal)) - metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(serviceNoLocalEndpointsTotalExternal)) + metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalInternal)) + metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalExternal)) if proxier.healthzServer != nil { proxier.healthzServer.Updated(proxier.ipFamily) } - metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() // Update service healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the serviceHealthServer diff --git a/pkg/proxy/nftables/proxier_test.go b/pkg/proxy/nftables/proxier_test.go index d94f8590ed4..183001040e3 100644 --- a/pkg/proxy/nftables/proxier_test.go +++ b/pkg/proxy/nftables/proxier_test.go @@ -4454,7 +4454,7 @@ func TestNoEndpointsMetric(t *testing.T) { fp.OnEndpointSliceAdd(endpointSlice) fp.syncProxyRules() - syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal")) + syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(fp.ipFamily))) if err != nil { t.Errorf("failed to get %s value, err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err) } @@ -4463,7 +4463,7 @@ func TestNoEndpointsMetric(t *testing.T) { t.Errorf("sync_proxy_rules_no_endpoints_total metric mismatch(internal): got=%d, expected %d", int(syncProxyRulesNoLocalEndpointsTotalInternal), tc.expectedSyncProxyRulesNoLocalEndpointsTotalInternal) } - syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external")) + syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(fp.ipFamily))) if err != nil { t.Errorf("failed to get %s value(external), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err) } diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go index 92a8969c98f..d50b4b39291 100644 --- a/pkg/proxy/winkernel/proxier.go +++ b/pkg/proxy/winkernel/proxier.go @@ -933,7 +933,7 @@ func (proxier *Proxier) Sync() { if proxier.healthzServer != nil { proxier.healthzServer.QueuedUpdate(proxier.ipFamily) } - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Run() } @@ -944,7 +944,7 @@ func (proxier *Proxier) SyncLoop() { proxier.healthzServer.Updated(proxier.ipFamily) } // synthesize "last change queued" time as the informers are syncing. - metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() proxier.syncRunner.Loop(wait.NeverStop) } @@ -1140,7 +1140,7 @@ func (proxier *Proxier) syncProxyRules() { // Keep track of how long syncs take. start := time.Now() defer func() { - metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) + metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) klog.V(4).InfoS("Syncing proxy rules complete", "elapsed", time.Since(start)) }() @@ -1695,7 +1695,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.Updated(proxier.ipFamily) } - metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() + metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime() // Update service healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the serviceHealthServer