Merge pull request #1723 from lifupan/fixcleanup

fix the issue of hypervisor process is killed by kubelet
This commit is contained in:
Julio Montes 2019-05-22 08:44:37 -05:00 committed by GitHub
commit 7b9a9ffb79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 38 deletions

View File

@ -6,7 +6,6 @@
package containerdshim
import (
"sync"
"time"
"github.com/containerd/containerd/api/types/task"
@ -31,7 +30,6 @@ type container struct {
stderr string
bundle string
cType vc.ContainerType
mu sync.Mutex
exit uint32
status task.Status
terminal bool

View File

@ -17,21 +17,21 @@ import (
)
func deleteContainer(ctx context.Context, s *service, c *container) error {
status, err := s.sandbox.StatusContainer(c.id)
if err != nil {
return err
}
if status.State.State != types.StateStopped {
_, err = s.sandbox.StopContainer(c.id)
if !c.cType.IsSandbox() {
status, err := s.sandbox.StatusContainer(c.id)
if err != nil {
return err
}
}
if status.State.State != types.StateStopped {
_, err = s.sandbox.StopContainer(c.id)
if err != nil {
return err
}
}
_, err = s.sandbox.DeleteContainer(c.id)
if err != nil {
return err
if _, err = s.sandbox.DeleteContainer(c.id); err != nil {
return err
}
}
// Run post-stop OCI hooks.

View File

@ -431,29 +431,12 @@ func (s *service) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (_ *task
}
if r.ExecID == "" {
err = deleteContainer(ctx, s, c)
if err != nil {
if err = deleteContainer(ctx, s, c); err != nil {
return nil, err
}
// Take care of the use case where it is a sandbox.
// Right after the container representing the sandbox has
// been deleted, let's make sure we stop and delete the
// sandbox.
if c.cType.IsSandbox() {
if err = s.sandbox.Stop(); err != nil {
logrus.WithField("sandbox", s.sandbox.ID()).Error("failed to stop sandbox")
return nil, err
}
if err = s.sandbox.Delete(); err != nil {
logrus.WithField("sandbox", s.sandbox.ID()).Error("failed to delete sandbox")
return nil, err
}
}
s.send(&eventstypes.TaskDelete{
ContainerID: s.id,
ContainerID: c.id,
Pid: s.pid,
ExitStatus: c.exit,
ExitedAt: c.exitTime,
@ -677,6 +660,20 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E
return nil, err
}
// According to CRI specs, kubelet will call StopPodSandbox()
// at least once before calling RemovePodSandbox, and this call
// is idempotent, and must not return an error if all relevant
// resources have already been reclaimed. And in that call it will
// send a SIGKILL signal first to try to stop the container, thus
// once the container has terminated, here should ignore this signal
// and return directly.
if signum == syscall.SIGKILL || signum == syscall.SIGTERM {
if c.status == task.StatusStopped {
logrus.WithField("sandbox", s.sandbox.ID()).WithField("Container", c.id).Debug("Container has already been stopped")
return empty, nil
}
}
processID := c.id
if r.ExecID != "" {
execs, err := c.getExec(r.ExecID)

View File

@ -42,23 +42,41 @@ func wait(s *service, c *container, execID string) (int32, error) {
}
timeStamp := time.Now()
c.mu.Lock()
s.mu.Lock()
if execID == "" {
// Take care of the use case where it is a sandbox.
// Right after the container representing the sandbox has
// been deleted, let's make sure we stop and delete the
// sandbox.
if c.cType.IsSandbox() {
if err = s.sandbox.Stop(); err != nil {
logrus.WithField("sandbox", s.sandbox.ID()).Error("failed to stop sandbox")
}
if err = s.sandbox.Delete(); err != nil {
logrus.WithField("sandbox", s.sandbox.ID()).Error("failed to delete sandbox")
}
} else {
if _, err = s.sandbox.StopContainer(c.id); err != nil {
logrus.WithError(err).WithField("container", c.id).Warn("stop container failed")
}
}
c.status = task.StatusStopped
c.exit = uint32(ret)
c.exitTime = timeStamp
c.exitCh <- uint32(ret)
} else {
execs.status = task.StatusStopped
execs.exitCode = ret
execs.exitTime = timeStamp
}
c.mu.Unlock()
if execID == "" {
c.exitCh <- uint32(ret)
} else {
execs.exitCh <- uint32(ret)
}
s.mu.Unlock()
go cReap(s, int(ret), c.id, execID, timeStamp)