diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index 8b05bfc2fc..bac62e8cfd 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -25,6 +25,8 @@ const cpBinaryName = "cp" const fileMode0755 = os.FileMode(0755) +const maxWaitDelay = 50 * time.Millisecond + // The DefaultRateLimiterRefillTime is used for calculating the rate at // which a TokenBucket is replinished, in cases where a RateLimiter is // applied to either network or disk I/O. @@ -306,6 +308,44 @@ func ConvertAddressFamily(family int32) pbTypes.IPFamily { } } +func waitProcessUsingWaitLoop(pid int, timeoutSecs uint, logger *logrus.Entry) bool { + secs := time.Duration(timeoutSecs) * time.Second + timeout := time.After(secs) + delay := 1 * time.Millisecond + + for { + // Wait4 is used to reap and check that a child terminated. + // Without the Wait4 call, Kill(0) for a child will always exit without + // error because the process isn't reaped. + // Wait4 return ECHLD error for non-child processes. Kill(0) is meant + // to address this case, once the process is reaped by init process, + // the call will return ESRCH error. + + // "A watched pot never boils" and an unwaited-for process never appears to die! + waitedPid, err := syscall.Wait4(pid, nil, syscall.WNOHANG, nil) + + if waitedPid == pid && err == nil { + return false + } + + if err := syscall.Kill(pid, syscall.Signal(0)); err != nil { + return false + } + + select { + case <-time.After(delay): + delay = delay * 5 + + if delay > maxWaitDelay { + delay = maxWaitDelay + } + case <-timeout: + logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs) + return true + } + } +} + // WaitLocalProcess waits for the specified process for up to timeoutSecs seconds. // // Notes: @@ -334,49 +374,24 @@ func WaitLocalProcess(pid int, timeoutSecs uint, initialSignal syscall.Signal, l } } - pidRunning := true - - secs := time.Duration(timeoutSecs) - timeout := time.After(secs * time.Second) - - // Wait for the VM process to terminate -outer: - for { - select { - case <-time.After(50 * time.Millisecond): - // Check if the process is running periodically to avoid a busy loop - - var _status syscall.WaitStatus - var _rusage syscall.Rusage - var waitedPid int - - // "A watched pot never boils" and an unwaited-for process never appears to die! - waitedPid, err = syscall.Wait4(pid, &_status, syscall.WNOHANG, &_rusage) - - if waitedPid == pid && err == nil { - pidRunning = false - break outer - } - - if err = syscall.Kill(pid, syscall.Signal(0)); err != nil { - pidRunning = false - break outer - } - - break - - case <-timeout: - logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs) - - break outer - } - } + pidRunning := waitForProcessCompletion(pid, timeoutSecs, logger) if pidRunning { // Force process to die if err = syscall.Kill(pid, syscall.SIGKILL); err != nil { + if err == syscall.ESRCH { + logger.WithField("pid", pid).Warnf("process already finished") + return nil + } return fmt.Errorf("Failed to stop process %v: %s", pid, err) } + + for { + _, err := syscall.Wait4(pid, nil, 0, nil) + if err != syscall.EINTR { + break + } + } } return nil diff --git a/src/runtime/virtcontainers/utils/utils_darwin.go b/src/runtime/virtcontainers/utils/utils_darwin.go index 54b2124ce1..db1d9ea750 100644 --- a/src/runtime/virtcontainers/utils/utils_darwin.go +++ b/src/runtime/virtcontainers/utils/utils_darwin.go @@ -5,6 +5,12 @@ package utils +import "github.com/sirupsen/logrus" + func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string, fsOptions []string, err error) { return } + +func waitForProcessCompletion(pid int, timeoutSecs uint, logger *logrus.Entry) bool { + return waitProcessUsingWaitLoop(pid, timeoutSecs, logger) +} diff --git a/src/runtime/virtcontainers/utils/utils_linux.go b/src/runtime/virtcontainers/utils/utils_linux.go index c223fb93bb..40c5c360ea 100644 --- a/src/runtime/virtcontainers/utils/utils_linux.go +++ b/src/runtime/virtcontainers/utils/utils_linux.go @@ -14,8 +14,10 @@ import ( "os" "strings" "syscall" + "time" "unsafe" + "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -151,3 +153,60 @@ func IsAPVFIOMediatedDevice(sysfsdev string) bool { } return false } + +func waitProcessUsingPidfd(pid int, timeoutSecs uint, logger *logrus.Entry) (bool, error) { + pidfd, err := unix.PidfdOpen(pid, 0) + + if err != nil { + if err == unix.ESRCH { + return false, nil + } + + return true, err + } + + defer unix.Close(pidfd) + var n int + + maxDelay := time.Duration(timeoutSecs) * time.Second + end := time.Now().Add(maxDelay) + + for { + remaining := time.Until(end).Milliseconds() + if remaining < 0 { + remaining = 0 + } + + n, err = unix.Poll([]unix.PollFd{{Fd: int32(pidfd), Events: unix.POLLIN}}, int(remaining)) + if err != unix.EINTR { + break + } + } + + if err != nil || n != 1 { + logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs) + return true, err + } + + for { + err := unix.Waitid(unix.P_PIDFD, pidfd, nil, unix.WEXITED, nil) + if err == unix.EINVAL { + err = unix.Waitid(unix.P_PID, pid, nil, unix.WEXITED, nil) + } + + if err != unix.EINTR { + break + } + } + return false, nil +} + +func waitForProcessCompletion(pid int, timeoutSecs uint, logger *logrus.Entry) bool { + pidRunning, err := waitProcessUsingPidfd(pid, timeoutSecs, logger) + + if err == unix.ENOSYS { + pidRunning = waitProcessUsingWaitLoop(pid, timeoutSecs, logger) + } + + return pidRunning +}