From 3266136c1d964f992fea00c5c7362b88d86e401c Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Sat, 27 Mar 2021 17:51:07 +0900 Subject: [PATCH 1/2] Fire an event when failing to open NodePort [issue] When creating a NodePort service with the kubectl create command, the NodePort assignment may fail. Failure to assign a NodePort can be simulated with the following malicious command[1]. $ kubectl create service nodeport temp-svc --tcp=`python3 < 1:31335/TCP,2:32367/TCP,3:30263/TCP,(omitted),1023:31821/TCP,1024:32475/TCP,1025:30311/TCP 12s The user does not recognize failure to assign a NodePort because create/get/describe command does not show any error. This is the issue. [solution] Users can notice errors by looking at the kube-proxy logs, but it may be difficult to see the kube-proxy logs of all nodes. E0327 08:50:10.216571 660960 proxier.go:1286] "can't open port, skipping this nodePort" err="listen tcp4 :30641: socket: too many open files" port="\"nodePort for default/temp-svc:744\" (:30641/tcp4)" E0327 08:50:10.216611 660960 proxier.go:1286] "can't open port, skipping this nodePort" err="listen tcp4 :30827: socket: too many open files" port="\"nodePort for default/temp-svc:857\" (:30827/tcp4)" ... E0327 08:50:10.217119 660960 proxier.go:1286] "can't open port, skipping this nodePort" err="listen tcp4 :32484: socket: too many open files" port="\"nodePort for default/temp-svc:805\" (:32484/tcp4)" E0327 08:50:10.217293 660960 proxier.go:1612] "Failed to execute iptables-restore" err="pipe2: too many open files ()" I0327 08:50:10.217341 660960 proxier.go:1615] "Closing local ports after iptables-restore failure" So, this patch will fire an event when NodePort assignment fails. In fact, when the externalIP assignment fails, it is also notified by event. The event will be displayed like this. $ kubectl get event LAST SEEN TYPE REASON OBJECT MESSAGE ... 2s Warning listen tcp4 :31055: socket: too many open files node/127.0.0.1 can't open "nodePort for default/temp-svc:901" (:31055/tcp4), skipping this nodePort: listen tcp4 :31055: socket: too many open files 2s Warning listen tcp4 :31422: socket: too many open files node/127.0.0.1 can't open "nodePort for default/temp-svc:474" (:31422/tcp4), skipping this nodePort: listen tcp4 :31422: socket: too many open files ... This PR fixes iptables and ipvs proxier. Since userspace proxier does not seem to be affected by this issue, it is not fixed. [1] Assume that fd limit is 1024(default). $ ulimit -n 1024 --- pkg/proxy/iptables/proxier.go | 9 +++++++++ pkg/proxy/ipvs/proxier.go | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 1d6b5551036..4b7b44601cb 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -1283,6 +1283,15 @@ func (proxier *Proxier) syncProxyRules() { } else if svcInfo.Protocol() != v1.ProtocolSCTP { socket, err := proxier.portMapper.OpenLocalPort(&lp) if err != nil { + msg := fmt.Sprintf("can't open %s, skipping this nodePort: %v", lp.String(), err) + + proxier.recorder.Eventf( + &v1.ObjectReference{ + Kind: "Node", + Name: proxier.hostname, + UID: types.UID(proxier.hostname), + Namespace: "", + }, v1.EventTypeWarning, err.Error(), msg) klog.ErrorS(err, "can't open port, skipping this nodePort", "port", lp.String()) continue } diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 4961381c8ae..208337940c3 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -1466,6 +1466,15 @@ func (proxier *Proxier) syncProxyRules() { } else if svcInfo.Protocol() != v1.ProtocolSCTP { socket, err := proxier.portMapper.OpenLocalPort(&lp) if err != nil { + msg := fmt.Sprintf("can't open %s, skipping this nodePort: %v", lp.String(), err) + + proxier.recorder.Eventf( + &v1.ObjectReference{ + Kind: "Node", + Name: proxier.hostname, + UID: types.UID(proxier.hostname), + Namespace: "", + }, v1.EventTypeWarning, err.Error(), msg) klog.Errorf("can't open %s, skipping this nodePort: %v", lp.String(), err) continue } From d43b8dbf4e4ea076913317ae6488d278353c482e Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Thu, 1 Apr 2021 09:13:45 +0900 Subject: [PATCH 2/2] Use simpler expressions for error messages 1. Do not describe port type in message because lp.String() already has the information. 2. Remove duplicate error detail from event log. Previous log is like this. 47s Warning listen tcp4 :30764: socket: too many open files node/127.0.0.1 can't open port "nodePort for default/temp-svc:834" (:30764/tcp4), skipping it: listen tcp4 :30764: socket: too many open files --- pkg/proxy/iptables/proxier.go | 8 ++++---- pkg/proxy/ipvs/proxier.go | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 4b7b44601cb..87b907d21ab 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -1110,7 +1110,7 @@ func (proxier *Proxier) syncProxyRules() { } else { socket, err := proxier.portMapper.OpenLocalPort(&lp) if err != nil { - msg := fmt.Sprintf("can't open %s, skipping this externalIP: %v", lp.String(), err) + msg := fmt.Sprintf("can't open port %s, skipping it", lp.String()) proxier.recorder.Eventf( &v1.ObjectReference{ @@ -1119,7 +1119,7 @@ func (proxier *Proxier) syncProxyRules() { UID: types.UID(proxier.hostname), Namespace: "", }, v1.EventTypeWarning, err.Error(), msg) - klog.ErrorS(err, "can't open port, skipping externalIP", "port", lp.String()) + klog.ErrorS(err, "can't open port, skipping it", "port", lp.String()) continue } klog.V(2).InfoS("Opened local port", "port", lp.String()) @@ -1283,7 +1283,7 @@ func (proxier *Proxier) syncProxyRules() { } else if svcInfo.Protocol() != v1.ProtocolSCTP { socket, err := proxier.portMapper.OpenLocalPort(&lp) if err != nil { - msg := fmt.Sprintf("can't open %s, skipping this nodePort: %v", lp.String(), err) + msg := fmt.Sprintf("can't open port %s, skipping it", lp.String()) proxier.recorder.Eventf( &v1.ObjectReference{ @@ -1292,7 +1292,7 @@ func (proxier *Proxier) syncProxyRules() { UID: types.UID(proxier.hostname), Namespace: "", }, v1.EventTypeWarning, err.Error(), msg) - klog.ErrorS(err, "can't open port, skipping this nodePort", "port", lp.String()) + klog.ErrorS(err, "can't open port, skipping it", "port", lp.String()) continue } klog.V(2).InfoS("Opened local port", "port", lp.String()) diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 208337940c3..6618447ec9c 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -1263,7 +1263,7 @@ func (proxier *Proxier) syncProxyRules() { } else { socket, err := proxier.portMapper.OpenLocalPort(&lp) if err != nil { - msg := fmt.Sprintf("can't open %s, skipping this externalIP: %v", lp.String(), err) + msg := fmt.Sprintf("can't open port %s, skipping it", lp.String()) proxier.recorder.Eventf( &v1.ObjectReference{ @@ -1272,7 +1272,7 @@ func (proxier *Proxier) syncProxyRules() { UID: types.UID(proxier.hostname), Namespace: "", }, v1.EventTypeWarning, err.Error(), msg) - klog.Error(msg) + klog.ErrorS(err, "can't open port, skipping it", "port", lp.String()) continue } klog.V(2).Infof("Opened local port %s", lp.String()) @@ -1466,7 +1466,7 @@ func (proxier *Proxier) syncProxyRules() { } else if svcInfo.Protocol() != v1.ProtocolSCTP { socket, err := proxier.portMapper.OpenLocalPort(&lp) if err != nil { - msg := fmt.Sprintf("can't open %s, skipping this nodePort: %v", lp.String(), err) + msg := fmt.Sprintf("can't open port %s, skipping it", lp.String()) proxier.recorder.Eventf( &v1.ObjectReference{ @@ -1475,7 +1475,7 @@ func (proxier *Proxier) syncProxyRules() { UID: types.UID(proxier.hostname), Namespace: "", }, v1.EventTypeWarning, err.Error(), msg) - klog.Errorf("can't open %s, skipping this nodePort: %v", lp.String(), err) + klog.ErrorS(err, "can't open port, skipping it", "port", lp.String()) continue } klog.V(2).Infof("Opened local port %s", lp.String())