diff --git a/pkg/util/mount/mount_linux.go b/pkg/util/mount/mount_linux.go index 1685ecb48f0..ae8a96cba3b 100644 --- a/pkg/util/mount/mount_linux.go +++ b/pkg/util/mount/mount_linux.go @@ -115,13 +115,44 @@ func doMount(mounterPath string, mountCmd string, source string, target string, mountCmd = mounterPath } + if systemdRunPath, err := exec.LookPath("systemd-run"); err == nil { + // Try to run mount via systemd-run --scope. This will escape the + // service where kubelet runs and any fuse daemons will be started in a + // specific scope. kubelet service than can be restarted without killing + // these fuse daemons. + // + // Complete command line (when mounterPath is not used): + // systemd-run --description=... --scope -- mount -t + // + // Expected flow: + // * systemd-run creates a transient scope (=~ cgroup) and executes its + // argument (/bin/mount) there. + // * mount does its job, forks a fuse daemon if necessary and finishes. + // (systemd-run --scope finishes at this point, returning mount's exit + // code and stdout/stderr - thats one of --scope benefits). + // * systemd keeps the fuse daemon running in the scope (i.e. in its own + // cgroup) until the fuse daemon dies (another --scope benefit). + // Kubelet service can be restarted and the fuse daemon survives. + // * When the fuse daemon dies (e.g. during unmount) systemd removes the + // scope automatically. + // + // systemd-mount is not used because it's too new for older distros + // (CentOS 7, Debian Jessie). + mountCmd, mountArgs = addSystemdScope(systemdRunPath, target, mountCmd, mountArgs) + } else { + // No systemd-run on the host (or we failed to check it), assume kubelet + // does not run as a systemd service. + // No code here, mountCmd and mountArgs are already populated. + } + glog.V(4).Infof("Mounting cmd (%s) with arguments (%s)", mountCmd, mountArgs) command := exec.Command(mountCmd, mountArgs...) output, err := command.CombinedOutput() if err != nil { - glog.Errorf("Mount failed: %v\nMounting command: %s\nMounting arguments: %s %s %s %v\nOutput: %s\n", err, mountCmd, source, target, fstype, options, string(output)) - return fmt.Errorf("mount failed: %v\nMounting command: %s\nMounting arguments: %s %s %s %v\nOutput: %s\n", - err, mountCmd, source, target, fstype, options, string(output)) + args := strings.Join(mountArgs, " ") + glog.Errorf("Mount failed: %v\nMounting command: %s\nMounting arguments: %s\nOutput: %s\n", err, mountCmd, args, string(output)) + return fmt.Errorf("mount failed: %v\nMounting command: %s\nMounting arguments: %s\nOutput: %s\n", + err, mountCmd, args, string(output)) } return err } @@ -145,6 +176,13 @@ func makeMountArgs(source, target, fstype string, options []string) []string { return mountArgs } +// addSystemdScope adds "system-run --scope" to given command line +func addSystemdScope(systemdRunPath, mountName, command string, args []string) (string, []string) { + descriptionArg := fmt.Sprintf("--description=Kubernetes transient mount for %s", mountName) + systemdRunArgs := []string{descriptionArg, "--scope", "--", command} + return systemdRunPath, append(systemdRunArgs, args...) +} + // Unmount unmounts the target. func (mounter *Mounter) Unmount(target string) error { glog.V(4).Infof("Unmounting %s", target) diff --git a/pkg/util/mount/nsenter_mount.go b/pkg/util/mount/nsenter_mount.go index f3a4afc1b0b..e0b82d1cd7d 100644 --- a/pkg/util/mount/nsenter_mount.go +++ b/pkg/util/mount/nsenter_mount.go @@ -50,7 +50,7 @@ import ( // contents. TODO: remove this requirement. // 6. The host image must have mount, findmnt, and umount binaries in /bin, // /usr/sbin, or /usr/bin -// +// 7. The host image should have systemd-run in /bin, /usr/sbin, or /usr/bin // For more information about mount propagation modes, see: // https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt type NsenterMounter struct { @@ -61,9 +61,10 @@ type NsenterMounter struct { func NewNsenterMounter() *NsenterMounter { m := &NsenterMounter{ paths: map[string]string{ - "mount": "", - "findmnt": "", - "umount": "", + "mount": "", + "findmnt": "", + "umount": "", + "systemd-run": "", }, } // search for the mount command in other locations besides /usr/bin @@ -79,6 +80,7 @@ func NewNsenterMounter() *NsenterMounter { break } // TODO: error, so that the kubelet can stop if the mounts don't exist + // (don't forget that systemd-run is optional) } return m } @@ -127,15 +129,47 @@ func (n *NsenterMounter) doNsenterMount(source, target, fstype string, options [ // makeNsenterArgs makes a list of argument to nsenter in order to do the // requested mount. func (n *NsenterMounter) makeNsenterArgs(source, target, fstype string, options []string) []string { + mountCmd := n.absHostPath("mount") + mountArgs := makeMountArgs(source, target, fstype, options) + + if systemdRunPath, hasSystemd := n.paths["systemd-run"]; hasSystemd { + // Complete command line: + // nsenter --mount=/rootfs/proc/1/ns/mnt -- /bin/systemd-run --description=... --scope -- /bin/mount -t + // Expected flow is: + // * nsenter breaks out of container's mount namespace and executes + // host's systemd-run. + // * systemd-run creates a transient scope (=~ cgroup) and executes its + // argument (/bin/mount) there. + // * mount does its job, forks a fuse daemon if necessary and finishes. + // (systemd-run --scope finishes at this point, returning mount's exit + // code and stdout/stderr - thats one of --scope benefits). + // * systemd keeps the fuse daemon running in the scope (i.e. in its own + // cgroup) until the fuse daemon dies (another --scope benefit). + // Kubelet container can be restarted and the fuse daemon survives. + // * When the daemon dies (e.g. during unmount) systemd removes the + // scope automatically. + mountCmd, mountArgs = addSystemdScope(systemdRunPath, target, mountCmd, mountArgs) + } else { + // Fall back to simple mount when the host has no systemd. + // Complete command line: + // nsenter --mount=/rootfs/proc/1/ns/mnt -- /bin/mount -t + // Expected flow is: + // * nsenter breaks out of container's mount namespace and executes host's /bin/mount. + // * mount does its job, forks a fuse daemon if necessary and finishes. + // * Any fuse daemon runs in cgroup of kubelet docker container, + // restart of kubelet container will kill it! + + // No code here, mountCmd and mountArgs use /bin/mount + } + nsenterArgs := []string{ "--mount=/rootfs/proc/1/ns/mnt", "--", - n.absHostPath("mount"), + mountCmd, } + nsenterArgs = append(nsenterArgs, mountArgs...) - args := makeMountArgs(source, target, fstype, options) - - return append(nsenterArgs, args...) + return nsenterArgs } // Unmount runs umount(8) in the host's mount namespace. @@ -146,7 +180,9 @@ func (n *NsenterMounter) Unmount(target string) error { n.absHostPath("umount"), target, } - + // No need to execute systemd-run here, it's enough that unmount is executed + // in the host's mount namespace. It will finish appropriate fuse daemon(s) + // running in any scope. glog.V(5).Infof("Unmount command: %v %v", nsenterPath, args) exec := exec.New() outputBytes, err := exec.Command(nsenterPath, args...).CombinedOutput()