From 424685321191af4f3b9c3fe88581f454448718a4 Mon Sep 17 00:00:00 2001 From: Random-Liu Date: Fri, 8 Jul 2016 10:45:20 -0700 Subject: [PATCH] Prevent kube-proxy from panicing when sysfs is mounted as read-only. Send a node event when this happens and hint to the administrator about the remediation. --- cmd/kube-proxy/app/conntrack.go | 40 +++++++++++++++++++++++++++++++++ cmd/kube-proxy/app/server.go | 15 +++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/cmd/kube-proxy/app/conntrack.go b/cmd/kube-proxy/app/conntrack.go index b7323033e15..001d9911193 100644 --- a/cmd/kube-proxy/app/conntrack.go +++ b/cmd/kube-proxy/app/conntrack.go @@ -17,11 +17,13 @@ limitations under the License. package app import ( + "errors" "io/ioutil" "strconv" "github.com/golang/glog" + "k8s.io/kubernetes/pkg/util/mount" "k8s.io/kubernetes/pkg/util/sysctl" ) @@ -32,11 +34,25 @@ type Conntracker interface { type realConntracker struct{} +var readOnlySysFSError = errors.New("ReadOnlySysFS") + func (realConntracker) SetMax(max int) error { glog.Infof("Setting nf_conntrack_max to %d", max) if err := sysctl.SetSysctl("net/netfilter/nf_conntrack_max", max); err != nil { return err } + // sysfs is expected to be mounted as 'rw'. However, it may be unexpectedly mounted as + // 'ro' by docker because of a known docker issue (https://github.com/docker/docker/issues/24000). + // Setting conntrack will fail when sysfs is readonly. When that happens, we don't set conntrack + // hashsize and return a special error readOnlySysFSError here. The caller should deal with + // readOnlySysFSError differently. + writable, err := isSysFSWritable() + if err != nil { + return err + } + if !writable { + return readOnlySysFSError + } // TODO: generify this and sysctl to a new sysfs.WriteInt() glog.Infof("Setting conntrack hashsize to %d", max/4) return ioutil.WriteFile("/sys/module/nf_conntrack/parameters/hashsize", []byte(strconv.Itoa(max/4)), 0640) @@ -46,3 +62,27 @@ func (realConntracker) SetTCPEstablishedTimeout(seconds int) error { glog.Infof("Setting nf_conntrack_tcp_timeout_established to %d", seconds) return sysctl.SetSysctl("net/netfilter/nf_conntrack_tcp_timeout_established", seconds) } + +// isSysFSWritable checks /proc/mounts to see whether sysfs is 'rw' or not. +func isSysFSWritable() (bool, error) { + const permWritable = "rw" + const sysfsDevice = "sysfs" + m := mount.New() + mountPoints, err := m.List() + if err != nil { + glog.Errorf("failed to list mount points: %v", err) + return false, err + } + for _, mountPoint := range mountPoints { + if mountPoint.Device != sysfsDevice { + continue + } + // Check whether sysfs is 'rw' + if len(mountPoint.Opts) > 0 && mountPoint.Opts[0] == permWritable { + return true, nil + } + glog.Errorf("sysfs is not writable: %+v", mountPoint) + break + } + return false, nil +} diff --git a/cmd/kube-proxy/app/server.go b/cmd/kube-proxy/app/server.go index dc5d4df0007..51e8d2eec3f 100644 --- a/cmd/kube-proxy/app/server.go +++ b/cmd/kube-proxy/app/server.go @@ -299,8 +299,19 @@ func (s *ProxyServer) Run() error { // Tune conntrack, if requested if s.Conntracker != nil { if s.Config.ConntrackMax > 0 { - if err := s.Conntracker.SetMax(int(s.Config.ConntrackMax)); err != nil { - return err + err := s.Conntracker.SetMax(int(s.Config.ConntrackMax)) + if err != nil { + if err != readOnlySysFSError { + return err + } + // readOnlySysFSError is caused by a known docker issue (https://github.com/docker/docker/issues/24000), + // the only remediation we know is to restart the docker daemon. + // Here we'll send an node event with specific reason and message, the + // administrator should decide whether and how to handle this issue, + // whether to drain the node and restart docker. + // TODO(random-liu): Remove this when the docker bug is fixed. + const message = "DOCKER RESTART NEEDED (docker issue #24000): /sys is read-only: can't raise conntrack limits, problems may arise later." + s.Recorder.Eventf(s.Config.NodeRef, api.EventTypeWarning, err.Error(), message) } } if s.Config.ConntrackTCPEstablishedTimeout.Duration > 0 {