runtime: Properly handle ESRCH error when signaling container

Currently kata shim v2 doesn't translate ESRCH signal, causing container
fail to stop and shim leak.

Fixes: #3874

Signed-off-by: Feng Wang <feng.wang@databricks.com>
This commit is contained in:
Feng Wang 2022-03-11 10:44:26 -08:00
parent 5a7fd943c1
commit aa5ae6b17c
2 changed files with 14 additions and 1 deletions

View File

@ -12,6 +12,7 @@ import (
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
@ -1060,7 +1061,18 @@ func (c *Container) signalProcess(ctx context.Context, processID string, signal
return fmt.Errorf("Container not ready, running or paused, impossible to signal the container")
}
return c.sandbox.agent.signalProcess(ctx, c, processID, signal, all)
// kill(2) method can return ESRCH in certain cases, which is not handled by containerd cri server in container_stop.go.
// CRIO server also doesn't handle ESRCH. So kata runtime will swallow it here.
var err error
if err = c.sandbox.agent.signalProcess(ctx, c, processID, signal, all); err != nil &&
strings.Contains(err.Error(), "ESRCH: No such process") {
c.Logger().WithFields(logrus.Fields{
"container": c.id,
"process-id": processID,
}).Warn("signal encounters ESRCH, process already finished")
return nil
}
return err
}
func (c *Container) winsizeProcess(ctx context.Context, processID string, height, width uint32) error {

View File

@ -321,6 +321,7 @@ func WaitLocalProcess(pid int, timeoutSecs uint, initialSignal syscall.Signal, l
if initialSignal != syscall.Signal(0) {
if err = syscall.Kill(pid, initialSignal); err != nil {
if err == syscall.ESRCH {
logger.WithField("pid", pid).Warnf("kill encounters ESRCH, process already finished")
return nil
}