From 2e86db78cf5ced72268a691196b4d49e349ebf58 Mon Sep 17 00:00:00 2001 From: Feng Wang <feng.wang@databricks.com> Date: Fri, 11 Mar 2022 10:44:26 -0800 Subject: [PATCH] runtime: Properly handle ESRCH error when signaling container Currently kata shim v2 doesn't translate ESRCH signal, causing container fail to stop and shim leak. Fixes: #3874 Signed-off-by: Feng Wang <feng.wang@databricks.com> (cherry picked from commit aa5ae6b17c688a3fa22c28d1ec3a71a6cabb9c57) --- src/runtime/virtcontainers/container.go | 14 +++++++++++++- src/runtime/virtcontainers/utils/utils.go | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index d482f2a129..8b031dba80 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -14,6 +14,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "syscall" "time" @@ -1144,7 +1145,18 @@ func (c *Container) signalProcess(ctx context.Context, processID string, signal return fmt.Errorf("Container not ready, running or paused, impossible to signal the container") } - return c.sandbox.agent.signalProcess(ctx, c, processID, signal, all) + // kill(2) method can return ESRCH in certain cases, which is not handled by containerd cri server in container_stop.go. + // CRIO server also doesn't handle ESRCH. So kata runtime will swallow it here. + var err error + if err = c.sandbox.agent.signalProcess(ctx, c, processID, signal, all); err != nil && + strings.Contains(err.Error(), "ESRCH: No such process") { + c.Logger().WithFields(logrus.Fields{ + "container": c.id, + "process-id": processID, + }).Warn("signal encounters ESRCH, process already finished") + return nil + } + return err } func (c *Container) winsizeProcess(ctx context.Context, processID string, height, width uint32) error { diff --git a/src/runtime/virtcontainers/utils/utils.go b/src/runtime/virtcontainers/utils/utils.go index caf5965858..35a7eaf7a9 100644 --- a/src/runtime/virtcontainers/utils/utils.go +++ b/src/runtime/virtcontainers/utils/utils.go @@ -321,6 +321,7 @@ func WaitLocalProcess(pid int, timeoutSecs uint, initialSignal syscall.Signal, l if initialSignal != syscall.Signal(0) { if err = syscall.Kill(pid, initialSignal); err != nil { if err == syscall.ESRCH { + logger.WithField("pid", pid).Warnf("kill encounters ESRCH, process already finished") return nil }