Add iptables restore failure metrics

As mentioned in issue #80061, in iptables lock contention case,
we can see increasing rate of iptables restore failures because it
need to grab iptables file lock.

The failure metric can provide administrators more insight

Metrics will be collected in kube-proxy iptables and ipvs modes

Signed-off-by: Hui Luo <luoh@vmware.com>
This commit is contained in:
hui luo 2019-08-08 20:43:25 -07:00
parent eadf68ebd9
commit a2ef00c1b1
3 changed files with 15 additions and 0 deletions

View File

@ -429,6 +429,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
klog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableNAT, err)
metrics.IptablesRestoreFailuresTotal.Inc()
encounteredError = true
}
}
@ -455,6 +456,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
// Write it.
if err := ipt.Restore(utiliptables.TableFilter, filterLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil {
klog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableFilter, err)
metrics.IptablesRestoreFailuresTotal.Inc()
encounteredError = true
}
}
@ -1401,6 +1403,7 @@ func (proxier *Proxier) syncProxyRules() {
err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
klog.Errorf("Failed to execute iptables-restore: %v", err)
metrics.IptablesRestoreFailuresTotal.Inc()
// Revert new local ports.
klog.V(2).Infof("Closing local ports after iptables-restore failure")
utilproxy.RevertPorts(replacementPortsMap, proxier.portsMap)

View File

@ -1310,6 +1310,7 @@ func (proxier *Proxier) syncProxyRules() {
err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
klog.Errorf("Failed to execute iptables-restore: %v\nRules:\n%s", err, proxier.iptablesData.Bytes())
metrics.IptablesRestoreFailuresTotal.Inc()
// Revert new local ports.
utilproxy.RevertPorts(replacementPortsMap, proxier.portsMap)
return

View File

@ -116,6 +116,16 @@ var (
Help: "Cumulative proxy rules Service changes",
},
)
// IptablesRestoreFailuresTotal is the number of iptables restore failures that the proxy has
// seen.
IptablesRestoreFailuresTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_iptables_restore_failures_total",
Help: "Cumulative proxy iptables restore failures",
},
)
)
var registerMetricsOnce sync.Once
@ -131,6 +141,7 @@ func RegisterMetrics() {
prometheus.MustRegister(EndpointChangesTotal)
prometheus.MustRegister(ServiceChangesPending)
prometheus.MustRegister(ServiceChangesTotal)
prometheus.MustRegister(IptablesRestoreFailuresTotal)
})
}