kube-proxy implement dual stack metrics

Signed-off-by: Daman Arora <aroradaman@gmail.com>
Co-authored-by: Antonio Ojea <aojea@google.com>
This commit is contained in:
Antonio Ojea 2024-11-29 15:18:16 +00:00 committed by Daman Arora
parent 3bec2450ef
commit f93e6f3d3a
8 changed files with 98 additions and 88 deletions

View File

@ -466,7 +466,7 @@ func CleanupLeftovers(ctx context.Context, ipt utiliptables.Interface) (encounte
err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
logger.Error(err, "Failed to execute iptables-restore", "table", utiliptables.TableNAT)
metrics.IPTablesRestoreFailuresTotal.Inc()
metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(ipt.Protocol())).Inc()
encounteredError = true
}
}
@ -493,7 +493,7 @@ func CleanupLeftovers(ctx context.Context, ipt utiliptables.Interface) (encounte
// Write it.
if err := ipt.Restore(utiliptables.TableFilter, filterLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil {
logger.Error(err, "Failed to execute iptables-restore", "table", utiliptables.TableFilter)
metrics.IPTablesRestoreFailuresTotal.Inc()
metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(ipt.Protocol())).Inc()
encounteredError = true
}
}
@ -527,7 +527,7 @@ func (proxier *Proxier) Sync() {
if proxier.healthzServer != nil {
proxier.healthzServer.QueuedUpdate(proxier.ipFamily)
}
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Run()
}
@ -539,7 +539,7 @@ func (proxier *Proxier) SyncLoop() {
}
// synthesize "last change queued" time as the informers are syncing.
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Loop(wait.NeverStop)
}
@ -813,11 +813,11 @@ func (proxier *Proxier) syncProxyRules() {
// Keep track of how long syncs take.
start := time.Now()
defer func() {
metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
if tryPartialSync {
metrics.SyncPartialProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncPartialProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
} else {
metrics.SyncFullProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncFullProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
}
proxier.logger.V(2).Info("SyncProxyRules complete", "elapsed", time.Since(start))
}()
@ -833,7 +833,7 @@ func (proxier *Proxier) syncProxyRules() {
proxier.logger.Info("Sync failed", "retryingTime", proxier.syncPeriod)
proxier.syncRunner.RetryAfter(proxier.syncPeriod)
if tryPartialSync {
metrics.IPTablesPartialRestoreFailuresTotal.Inc()
metrics.IPTablesPartialRestoreFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc()
}
// proxier.serviceChanges and proxier.endpointChanges have already
// been flushed, so we've lost the state needed to be able to do
@ -1528,10 +1528,10 @@ func (proxier *Proxier) syncProxyRules() {
"-j", "ACCEPT",
)
metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines()))
metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableFilter)).Set(float64(proxier.filterRules.Lines()))
metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() + skippedNatRules.Lines() - deletedChains))
metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableNAT)).Set(float64(proxier.natRules.Lines() - deletedChains))
metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableFilter), string(proxier.ipFamily)).Set(float64(proxier.filterRules.Lines()))
metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableFilter), string(proxier.ipFamily)).Set(float64(proxier.filterRules.Lines()))
metrics.IPTablesRulesTotal.WithLabelValues(string(utiliptables.TableNAT), string(proxier.ipFamily)).Set(float64(proxier.natRules.Lines() + skippedNatRules.Lines() - deletedChains))
metrics.IPTablesRulesLastSync.WithLabelValues(string(utiliptables.TableNAT), string(proxier.ipFamily)).Set(float64(proxier.natRules.Lines() - deletedChains))
// Sync rules.
proxier.iptablesData.Reset()
@ -1563,7 +1563,7 @@ func (proxier *Proxier) syncProxyRules() {
} else {
proxier.logger.Error(err, "Failed to execute iptables-restore")
}
metrics.IPTablesRestoreFailuresTotal.Inc()
metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc()
return
}
success = true
@ -1572,17 +1572,17 @@ func (proxier *Proxier) syncProxyRules() {
for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes {
for _, lastChangeTriggerTime := range lastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency)
metrics.NetworkProgrammingLatency.WithLabelValues(string(proxier.ipFamily)).Observe(latency)
proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency)
}
}
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(serviceNoLocalEndpointsTotalInternal))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(serviceNoLocalEndpointsTotalExternal))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalInternal))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalExternal))
if proxier.healthzServer != nil {
proxier.healthzServer.Updated(proxier.ipFamily)
}
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
// Update service healthchecks. The endpoints list might include services that are
// not "OnlyLocal", but the services list will not, and the serviceHealthServer

View File

@ -413,8 +413,8 @@ func countRules(logger klog.Logger, tableName utiliptables.Table, ruleData strin
return rules
}
func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table) int {
numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesTotal.WithLabelValues(string(tableName)))
func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table, ipFamily string) int {
numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesTotal.WithLabelValues(string(tableName), ipFamily))
if err != nil {
logger.Error(err, "metrics are not registered?")
return -1
@ -422,8 +422,8 @@ func countRulesFromMetric(logger klog.Logger, tableName utiliptables.Table) int
return int(numRulesFloat)
}
func countRulesFromLastSyncMetric(logger klog.Logger, tableName utiliptables.Table) int {
numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesLastSync.WithLabelValues(string(tableName)))
func countRulesFromLastSyncMetric(logger klog.Logger, tableName utiliptables.Table, ipFamily string) int {
numRulesFloat, err := testutil.GetGaugeMetricValue(metrics.IPTablesRulesLastSync.WithLabelValues(string(tableName), ipFamily))
if err != nil {
logger.Error(err, "metrics are not registered?")
return -1
@ -1809,7 +1809,7 @@ func TestOverallIPTablesRules(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), true, expected, fp.iptablesData.String())
nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT)
nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
expectedNatRules := countRules(logger, utiliptables.TableNAT, fp.iptablesData.String())
if nNatRules != expectedNatRules {
@ -4075,14 +4075,14 @@ func TestProxierMetricsIPTablesTotalRules(t *testing.T) {
fp.syncProxyRules()
iptablesData := fp.iptablesData.String()
nFilterRules := countRulesFromMetric(logger, utiliptables.TableFilter)
nFilterRules := countRulesFromMetric(logger, utiliptables.TableFilter, string(fp.ipFamily))
expectedFilterRules := countRules(logger, utiliptables.TableFilter, iptablesData)
if nFilterRules != expectedFilterRules {
t.Fatalf("Wrong number of filter rule: expected %d got %d\n%s", expectedFilterRules, nFilterRules, iptablesData)
}
nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT)
nNatRules := countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
expectedNatRules := countRules(logger, utiliptables.TableNAT, iptablesData)
if nNatRules != expectedNatRules {
@ -4108,14 +4108,14 @@ func TestProxierMetricsIPTablesTotalRules(t *testing.T) {
fp.syncProxyRules()
iptablesData = fp.iptablesData.String()
nFilterRules = countRulesFromMetric(logger, utiliptables.TableFilter)
nFilterRules = countRulesFromMetric(logger, utiliptables.TableFilter, string(fp.ipFamily))
expectedFilterRules = countRules(logger, utiliptables.TableFilter, iptablesData)
if nFilterRules != expectedFilterRules {
t.Fatalf("Wrong number of filter rule: expected %d got %d\n%s", expectedFilterRules, nFilterRules, iptablesData)
}
nNatRules = countRulesFromMetric(logger, utiliptables.TableNAT)
nNatRules = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
expectedNatRules = countRules(logger, utiliptables.TableNAT, iptablesData)
if nNatRules != expectedNatRules {
@ -5827,13 +5827,13 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), true, expected, fp.iptablesData.String())
rulesSynced := countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric := countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric := countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
rulesTotal := rulesSynced
rulesTotalMetric := countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric := countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -5905,7 +5905,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
@ -5913,7 +5913,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
// We added 1 KUBE-SERVICES rule, 2 KUBE-SVC-X27LE4BHSL4DOUIK rules, and 2
// KUBE-SEP-BSWRHOQ77KEXZLNL rules.
rulesTotal += 5
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -5956,7 +5956,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
@ -5964,7 +5964,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
// We deleted 1 KUBE-SERVICES rule, 2 KUBE-SVC-2VJB64SDSIJUP5T6 rules, and 2
// KUBE-SEP-UHEGFW77JX3KXTOV rules
rulesTotal -= 5
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -6016,14 +6016,14 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
// The REJECT rule is in "filter", not NAT, so the number of NAT rules hasn't
// changed.
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -6079,7 +6079,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
@ -6087,7 +6087,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
// We added 1 KUBE-SERVICES rule, 2 KUBE-SVC-4SW47YFZTEDKD3PK rules, and
// 2 KUBE-SEP-AYCN5HPXMIRJNJXU rules
rulesTotal += 5
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -6138,13 +6138,13 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
// We rewrote existing rules but did not change the overall number of rules.
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -6196,7 +6196,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
@ -6205,7 +6205,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
// jumping to the new SEP chain. The other rules related to svc3 got rewritten,
// but that does not change the count of rules.
rulesTotal += 3
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -6244,13 +6244,13 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
// (No changes)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -6259,7 +6259,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
if fp.needFullSync {
t.Fatalf("Proxier unexpectedly already needs a full sync?")
}
partialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal)
partialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal.WithLabelValues(string(fp.ipFamily)))
if err != nil {
t.Fatalf("Could not get partial restore failures metric: %v", err)
}
@ -6293,7 +6293,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
if !fp.needFullSync {
t.Errorf("Proxier did not fail on previous partial resync?")
}
updatedPartialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal)
updatedPartialRestoreFailures, err := testutil.GetCounterMetricValue(metrics.IPTablesPartialRestoreFailuresTotal.WithLabelValues(string(fp.ipFamily)))
if err != nil {
t.Errorf("Could not get partial restore failures metric: %v", err)
}
@ -6354,7 +6354,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
assertIPTablesRulesEqual(t, getLine(), false, expected, fp.iptablesData.String())
rulesSynced = countRules(logger, utiliptables.TableNAT, expected)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT)
rulesSyncedMetric = countRulesFromLastSyncMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesSyncedMetric != rulesSynced {
t.Errorf("metric shows %d rules synced but iptables data shows %d", rulesSyncedMetric, rulesSynced)
}
@ -6362,7 +6362,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
// We deleted 1 KUBE-SERVICES rule, 2 KUBE-SVC-4SW47YFZTEDKD3PK rules, and 2
// KUBE-SEP-AYCN5HPXMIRJNJXU rules
rulesTotal -= 5
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT)
rulesTotalMetric = countRulesFromMetric(logger, utiliptables.TableNAT, string(fp.ipFamily))
if rulesTotalMetric != rulesTotal {
t.Errorf("metric shows %d rules total but expected %d", rulesTotalMetric, rulesTotal)
}
@ -6504,7 +6504,7 @@ func TestNoEndpointsMetric(t *testing.T) {
fp.OnEndpointSliceAdd(endpointSlice)
fp.syncProxyRules()
syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal"))
syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(fp.ipFamily)))
if err != nil {
t.Errorf("failed to get %s value, err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err)
}
@ -6513,7 +6513,7 @@ func TestNoEndpointsMetric(t *testing.T) {
t.Errorf("sync_proxy_rules_no_endpoints_total metric mismatch(internal): got=%d, expected %d", int(syncProxyRulesNoLocalEndpointsTotalInternal), tc.expectedSyncProxyRulesNoLocalEndpointsTotalInternal)
}
syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external"))
syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(fp.ipFamily)))
if err != nil {
t.Errorf("failed to get %s value(external), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err)
}

View File

@ -744,7 +744,7 @@ func (proxier *Proxier) Sync() {
if proxier.healthzServer != nil {
proxier.healthzServer.QueuedUpdate(proxier.ipFamily)
}
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Run()
}
@ -755,7 +755,7 @@ func (proxier *Proxier) SyncLoop() {
proxier.healthzServer.Updated(proxier.ipFamily)
}
// synthesize "last change queued" time as the informers are syncing.
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Loop(wait.NeverStop)
}
@ -925,7 +925,7 @@ func (proxier *Proxier) syncProxyRules() {
// Keep track of how long syncs take.
start := time.Now()
defer func() {
metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
proxier.logger.V(4).Info("syncProxyRules complete", "elapsed", time.Since(start))
}()
@ -1444,13 +1444,13 @@ func (proxier *Proxier) syncProxyRules() {
} else {
proxier.logger.Error(err, "Failed to execute iptables-restore", "rules", proxier.iptablesData.Bytes())
}
metrics.IPTablesRestoreFailuresTotal.Inc()
metrics.IPTablesRestoreFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc()
return
}
for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes {
for _, lastChangeTriggerTime := range lastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency)
metrics.NetworkProgrammingLatency.WithLabelValues(string(proxier.ipFamily)).Observe(latency)
proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency)
}
}
@ -1482,7 +1482,7 @@ func (proxier *Proxier) syncProxyRules() {
if proxier.healthzServer != nil {
proxier.healthzServer.Updated(proxier.ipFamily)
}
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
// Update service healthchecks. The endpoints list might include services that are
// not "OnlyLocal", but the services list will not, and the serviceHealthServer
@ -1494,8 +1494,8 @@ func (proxier *Proxier) syncProxyRules() {
proxier.logger.Error(err, "Error syncing healthcheck endpoints")
}
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(proxier.serviceNoLocalEndpointsInternal.Len()))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(proxier.serviceNoLocalEndpointsExternal.Len()))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(proxier.ipFamily)).Set(float64(proxier.serviceNoLocalEndpointsInternal.Len()))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(proxier.ipFamily)).Set(float64(proxier.serviceNoLocalEndpointsExternal.Len()))
if endpointUpdateResult.ConntrackCleanupRequired {
// Finish housekeeping, clear stale conntrack entries for UDP Services

View File

@ -5620,7 +5620,7 @@ func TestNoEndpointsMetric(t *testing.T) {
fp.OnEndpointSliceAdd(endpointSlice)
fp.syncProxyRules()
syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal"))
syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(fp.ipFamily)))
if err != nil {
t.Errorf("failed to get %s value(internal), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err)
}
@ -5629,7 +5629,7 @@ func TestNoEndpointsMetric(t *testing.T) {
t.Errorf("sync_proxy_rules_no_endpoints_total metric mismatch(internal): got=%d, expected %d", int(syncProxyRulesNoLocalEndpointsTotalInternal), tc.expectedSyncProxyRulesNoLocalEndpointsTotalInternal)
}
syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external"))
syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(fp.ipFamily)))
if err != nil {
t.Errorf("failed to get %s value(external), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err)
}

View File

@ -32,7 +32,7 @@ const kubeProxySubsystem = "kubeproxy"
var (
// SyncProxyRulesLatency is the latency of one round of kube-proxy syncing proxy
// rules. (With the iptables proxy, this includes both full and partial syncs.)
SyncProxyRulesLatency = metrics.NewHistogram(
SyncProxyRulesLatency = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_duration_seconds",
@ -40,10 +40,11 @@ var (
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// SyncFullProxyRulesLatency is the latency of one round of full rule syncing.
SyncFullProxyRulesLatency = metrics.NewHistogram(
SyncFullProxyRulesLatency = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_full_proxy_rules_duration_seconds",
@ -51,10 +52,11 @@ var (
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// SyncPartialProxyRulesLatency is the latency of one round of partial rule syncing.
SyncPartialProxyRulesLatency = metrics.NewHistogram(
SyncPartialProxyRulesLatency = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_partial_proxy_rules_duration_seconds",
@ -62,17 +64,19 @@ var (
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// SyncProxyRulesLastTimestamp is the timestamp proxy rules were last
// successfully synced.
SyncProxyRulesLastTimestamp = metrics.NewGauge(
SyncProxyRulesLastTimestamp = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_last_timestamp_seconds",
Help: "The last time proxy rules were successfully synced",
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// NetworkProgrammingLatency is defined as the time it took to program the network - from the time
@ -82,7 +86,7 @@ var (
// Note that the metrics is partially based on the time exported by the endpoints controller on
// the master machine. The measurement may be inaccurate if there is a clock drift between the
// node and master machine.
NetworkProgrammingLatency = metrics.NewHistogram(
NetworkProgrammingLatency = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: kubeProxySubsystem,
Name: "network_programming_duration_seconds",
@ -95,6 +99,7 @@ var (
),
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// EndpointChangesPending is the number of pending endpoint changes that
@ -151,24 +156,26 @@ var (
// IPTablesRestoreFailuresTotal is the number of iptables restore failures that the proxy has
// seen.
IPTablesRestoreFailuresTotal = metrics.NewCounter(
IPTablesRestoreFailuresTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_iptables_restore_failures_total",
Help: "Cumulative proxy iptables restore failures",
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// IPTablesPartialRestoreFailuresTotal is the number of iptables *partial* restore
// failures (resulting in a fall back to a full restore) that the proxy has seen.
IPTablesPartialRestoreFailuresTotal = metrics.NewCounter(
IPTablesPartialRestoreFailuresTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_iptables_partial_restore_failures_total",
Help: "Cumulative proxy iptables partial restore failures",
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// IPTablesRulesTotal is the total number of iptables rules that the iptables
@ -180,7 +187,7 @@ var (
Help: "Total number of iptables rules owned by kube-proxy",
StabilityLevel: metrics.ALPHA,
},
[]string{"table"},
[]string{"table", "ip_family"},
)
// IPTablesRulesLastSync is the number of iptables rules that the iptables proxy
@ -192,29 +199,31 @@ var (
Help: "Number of iptables rules written by kube-proxy in last sync",
StabilityLevel: metrics.ALPHA,
},
[]string{"table"},
[]string{"table", "ip_family"},
)
// NFTablesSyncFailuresTotal is the number of nftables sync failures that the
// proxy has seen.
NFTablesSyncFailuresTotal = metrics.NewCounter(
NFTablesSyncFailuresTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_nftables_sync_failures_total",
Help: "Cumulative proxy nftables sync failures",
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// NFTablesCleanupFailuresTotal is the number of nftables stale chain cleanup
// failures that the proxy has seen.
NFTablesCleanupFailuresTotal = metrics.NewCounter(
NFTablesCleanupFailuresTotal = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_nftables_cleanup_failures_total",
Help: "Cumulative proxy nftables cleanup failures",
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// ProxyHealthzTotal is the number of returned HTTP Status for each
@ -244,13 +253,14 @@ var (
// SyncProxyRulesLastQueuedTimestamp is the last time a proxy sync was
// requested. If this is much larger than
// kubeproxy_sync_proxy_rules_last_timestamp_seconds, then something is hung.
SyncProxyRulesLastQueuedTimestamp = metrics.NewGauge(
SyncProxyRulesLastQueuedTimestamp = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_last_queued_timestamp_seconds",
Help: "The last time a sync of proxy rules was queued",
StabilityLevel: metrics.ALPHA,
},
[]string{"ip_family"},
)
// SyncProxyRulesNoLocalEndpointsTotal is the total number of rules that do
@ -263,7 +273,7 @@ var (
Help: "Number of services with a Local traffic policy and no endpoints",
StabilityLevel: metrics.ALPHA,
},
[]string{"traffic_policy"},
[]string{"traffic_policy", "ip_family"},
)
// localhostNodePortsAcceptedPacketsDescription describe the metrics for the number of packets accepted

View File

@ -743,7 +743,7 @@ func (proxier *Proxier) Sync() {
if proxier.healthzServer != nil {
proxier.healthzServer.QueuedUpdate(proxier.ipFamily)
}
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Run()
}
@ -755,7 +755,7 @@ func (proxier *Proxier) SyncLoop() {
}
// synthesize "last change queued" time as the informers are syncing.
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Loop(wait.NeverStop)
}
@ -1159,11 +1159,11 @@ func (proxier *Proxier) syncProxyRules() {
// Keep track of how long syncs take.
start := time.Now()
defer func() {
metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
if tryPartialSync {
metrics.SyncPartialProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncPartialProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
} else {
metrics.SyncFullProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncFullProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
}
proxier.logger.V(2).Info("SyncProxyRules complete", "elapsed", time.Since(start))
}()
@ -1208,7 +1208,7 @@ func (proxier *Proxier) syncProxyRules() {
// the chains still exist, they'll just get added back
// (with a later timestamp) at the end of the sync.
proxier.logger.Error(err, "Unable to delete stale chains; will retry later")
metrics.NFTablesCleanupFailuresTotal.Inc()
metrics.NFTablesCleanupFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc()
}
}
}
@ -1803,7 +1803,7 @@ func (proxier *Proxier) syncProxyRules() {
err = proxier.nftables.Run(context.TODO(), tx)
if err != nil {
proxier.logger.Error(err, "nftables sync failed")
metrics.NFTablesSyncFailuresTotal.Inc()
metrics.NFTablesSyncFailuresTotal.WithLabelValues(string(proxier.ipFamily)).Inc()
// staleChains is now incorrect since we didn't actually flush the
// chains in it. We can recompute it next time.
@ -1816,17 +1816,17 @@ func (proxier *Proxier) syncProxyRules() {
for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes {
for _, lastChangeTriggerTime := range lastChangeTriggerTimes {
latency := metrics.SinceInSeconds(lastChangeTriggerTime)
metrics.NetworkProgrammingLatency.Observe(latency)
metrics.NetworkProgrammingLatency.WithLabelValues(string(proxier.ipFamily)).Observe(latency)
proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency)
}
}
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(serviceNoLocalEndpointsTotalInternal))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(serviceNoLocalEndpointsTotalExternal))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalInternal))
metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(proxier.ipFamily)).Set(float64(serviceNoLocalEndpointsTotalExternal))
if proxier.healthzServer != nil {
proxier.healthzServer.Updated(proxier.ipFamily)
}
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
// Update service healthchecks. The endpoints list might include services that are
// not "OnlyLocal", but the services list will not, and the serviceHealthServer

View File

@ -4454,7 +4454,7 @@ func TestNoEndpointsMetric(t *testing.T) {
fp.OnEndpointSliceAdd(endpointSlice)
fp.syncProxyRules()
syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal"))
syncProxyRulesNoLocalEndpointsTotalInternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal", string(fp.ipFamily)))
if err != nil {
t.Errorf("failed to get %s value, err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err)
}
@ -4463,7 +4463,7 @@ func TestNoEndpointsMetric(t *testing.T) {
t.Errorf("sync_proxy_rules_no_endpoints_total metric mismatch(internal): got=%d, expected %d", int(syncProxyRulesNoLocalEndpointsTotalInternal), tc.expectedSyncProxyRulesNoLocalEndpointsTotalInternal)
}
syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external"))
syncProxyRulesNoLocalEndpointsTotalExternal, err := testutil.GetGaugeMetricValue(metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external", string(fp.ipFamily)))
if err != nil {
t.Errorf("failed to get %s value(external), err: %v", metrics.SyncProxyRulesNoLocalEndpointsTotal.Name, err)
}

View File

@ -933,7 +933,7 @@ func (proxier *Proxier) Sync() {
if proxier.healthzServer != nil {
proxier.healthzServer.QueuedUpdate(proxier.ipFamily)
}
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Run()
}
@ -944,7 +944,7 @@ func (proxier *Proxier) SyncLoop() {
proxier.healthzServer.Updated(proxier.ipFamily)
}
// synthesize "last change queued" time as the informers are syncing.
metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastQueuedTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
proxier.syncRunner.Loop(wait.NeverStop)
}
@ -1140,7 +1140,7 @@ func (proxier *Proxier) syncProxyRules() {
// Keep track of how long syncs take.
start := time.Now()
defer func() {
metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start))
klog.V(4).InfoS("Syncing proxy rules complete", "elapsed", time.Since(start))
}()
@ -1695,7 +1695,7 @@ func (proxier *Proxier) syncProxyRules() {
if proxier.healthzServer != nil {
proxier.healthzServer.Updated(proxier.ipFamily)
}
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
metrics.SyncProxyRulesLastTimestamp.WithLabelValues(string(proxier.ipFamily)).SetToCurrentTime()
// Update service healthchecks. The endpoints list might include services that are
// not "OnlyLocal", but the services list will not, and the serviceHealthServer