shimv2: monitor sandbox liveness

When sandbox quits unexpected, clean things up as much as we can.

Fixes: #1896
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
This commit is contained in:
Peng Tao 2019-07-19 04:22:43 -07:00
parent 262484de68
commit e02f6dc067
3 changed files with 47 additions and 0 deletions

View File

@ -114,6 +114,7 @@ type service struct {
containers map[string]*container
config *oci.RuntimeConfig
events chan interface{}
monitor chan error
cancel func()

View File

@ -30,6 +30,12 @@ func startContainer(ctx context.Context, s *service, c *container) error {
if err != nil {
return err
}
// Start monitor after starting sandbox
s.monitor, err = s.sandbox.Monitor()
if err != nil {
return err
}
go watchSandbox(s)
} else {
_, err := s.sandbox.StartContainer(c.id)
if err != nil {

View File

@ -6,9 +6,11 @@
package containerdshim
import (
"path"
"time"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/mount"
"github.com/sirupsen/logrus"
)
@ -82,3 +84,41 @@ func wait(s *service, c *container, execID string) (int32, error) {
return ret, nil
}
func watchSandbox(s *service) {
if s.monitor == nil {
return
}
err := <-s.monitor
if err == nil {
return
}
s.monitor = nil
s.mu.Lock()
defer s.mu.Unlock()
// sandbox malfunctioning, cleanup as much as we can
logrus.WithError(err).Warn("sandbox stopped unexpectedly")
err = s.sandbox.Stop(true)
if err != nil {
logrus.WithError(err).Warn("stop sandbox failed")
}
err = s.sandbox.Delete()
if err != nil {
logrus.WithError(err).Warn("delete sandbox failed")
}
if s.mount {
for _, c := range s.containers {
rootfs := path.Join(c.bundle, "rootfs")
logrus.WithField("rootfs", rootfs).WithField("id", c.id).Debug("container umount rootfs")
if err := mount.UnmountAll(rootfs, 0); err != nil {
logrus.WithError(err).Warn("failed to cleanup rootfs mount")
}
}
}
s.containers = make(map[string]*container)
// Existing container/exec will be cleaned up by its waiters.
// No need to send async events here.
}