mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-07 04:03:20 +00:00
Add iptables restore failure metrics
As mentioned in issue #80061, in iptables lock contention case, we can see increasing rate of iptables restore failures because it need to grab iptables file lock. The failure metric can provide administrators more insight Metrics will be collected in kube-proxy iptables and ipvs modes Signed-off-by: Hui Luo <luoh@vmware.com>
This commit is contained in:
@@ -429,6 +429,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
|
||||
err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableNAT, err)
|
||||
metrics.IptablesRestoreFailuresTotal.Inc()
|
||||
encounteredError = true
|
||||
}
|
||||
}
|
||||
@@ -455,6 +456,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
|
||||
// Write it.
|
||||
if err := ipt.Restore(utiliptables.TableFilter, filterLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil {
|
||||
klog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableFilter, err)
|
||||
metrics.IptablesRestoreFailuresTotal.Inc()
|
||||
encounteredError = true
|
||||
}
|
||||
}
|
||||
@@ -1401,6 +1403,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to execute iptables-restore: %v", err)
|
||||
metrics.IptablesRestoreFailuresTotal.Inc()
|
||||
// Revert new local ports.
|
||||
klog.V(2).Infof("Closing local ports after iptables-restore failure")
|
||||
utilproxy.RevertPorts(replacementPortsMap, proxier.portsMap)
|
||||
|
@@ -1310,6 +1310,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to execute iptables-restore: %v\nRules:\n%s", err, proxier.iptablesData.Bytes())
|
||||
metrics.IptablesRestoreFailuresTotal.Inc()
|
||||
// Revert new local ports.
|
||||
utilproxy.RevertPorts(replacementPortsMap, proxier.portsMap)
|
||||
return
|
||||
|
@@ -116,6 +116,16 @@ var (
|
||||
Help: "Cumulative proxy rules Service changes",
|
||||
},
|
||||
)
|
||||
|
||||
// IptablesRestoreFailuresTotal is the number of iptables restore failures that the proxy has
|
||||
// seen.
|
||||
IptablesRestoreFailuresTotal = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: kubeProxySubsystem,
|
||||
Name: "sync_proxy_rules_iptables_restore_failures_total",
|
||||
Help: "Cumulative proxy iptables restore failures",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetricsOnce sync.Once
|
||||
@@ -131,6 +141,7 @@ func RegisterMetrics() {
|
||||
prometheus.MustRegister(EndpointChangesTotal)
|
||||
prometheus.MustRegister(ServiceChangesPending)
|
||||
prometheus.MustRegister(ServiceChangesTotal)
|
||||
prometheus.MustRegister(IptablesRestoreFailuresTotal)
|
||||
})
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user