From 54923164b5ec04cf70f94e133e75b3051b809bcc Mon Sep 17 00:00:00 2001 From: Alexandru Matei Date: Mon, 8 Apr 2024 15:44:46 +0300 Subject: [PATCH] clh: isClhRunning waits for full timeout when clh exits isClhRunning uses signal 0 to test whether the process is still alive or not. This doesn't work because the process is a direct child of the shim. Once it is dead the process becomes zombie. Since no one waits for it the process lingers until its parent dies and init reaps it. Hence sending signal 0 in isClhRunning will always return success whether the process is dead or not. This patch calls wait to reap the process, if it succeeds that means it is our child process, if not we send the signal. Fixes: #9431 Signed-off-by: Alexandru Matei --- src/runtime/virtcontainers/clh.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index a91550781b..1bc17fe525 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1467,7 +1467,12 @@ func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) { timeStart := time.Now() cl := clh.client() for { - err := syscall.Kill(pid, syscall.Signal(0)) + waitedPid, err := syscall.Wait4(pid, nil, syscall.WNOHANG, nil) + if waitedPid == pid && err == nil { + return false, nil + } + + err = syscall.Kill(pid, syscall.Signal(0)) if err != nil { return false, nil }