runtime: Properly handle ESRCH error when signaling container

Currently kata shim v2 doesn't translate ESRCH signal, causing container
fail to stop and shim leak.

Fixes: #3874

Signed-off-by: Feng Wang <feng.wang@databricks.com>
This commit is contained in:
Feng Wang 2022-03-11 10:44:26 -08:00
parent 5a7fd943c1
commit aa5ae6b17c
2 changed files with 14 additions and 1 deletions

View File

@ -12,6 +12,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings"
"syscall" "syscall"
"time" "time"
@ -1060,7 +1061,18 @@ func (c *Container) signalProcess(ctx context.Context, processID string, signal
return fmt.Errorf("Container not ready, running or paused, impossible to signal the container") return fmt.Errorf("Container not ready, running or paused, impossible to signal the container")
} }
return c.sandbox.agent.signalProcess(ctx, c, processID, signal, all) // kill(2) method can return ESRCH in certain cases, which is not handled by containerd cri server in container_stop.go.
// CRIO server also doesn't handle ESRCH. So kata runtime will swallow it here.
var err error
if err = c.sandbox.agent.signalProcess(ctx, c, processID, signal, all); err != nil &&
strings.Contains(err.Error(), "ESRCH: No such process") {
c.Logger().WithFields(logrus.Fields{
"container": c.id,
"process-id": processID,
}).Warn("signal encounters ESRCH, process already finished")
return nil
}
return err
} }
func (c *Container) winsizeProcess(ctx context.Context, processID string, height, width uint32) error { func (c *Container) winsizeProcess(ctx context.Context, processID string, height, width uint32) error {

View File

@ -321,6 +321,7 @@ func WaitLocalProcess(pid int, timeoutSecs uint, initialSignal syscall.Signal, l
if initialSignal != syscall.Signal(0) { if initialSignal != syscall.Signal(0) {
if err = syscall.Kill(pid, initialSignal); err != nil { if err = syscall.Kill(pid, initialSignal); err != nil {
if err == syscall.ESRCH { if err == syscall.ESRCH {
logger.WithField("pid", pid).Warnf("kill encounters ESRCH, process already finished")
return nil return nil
} }