1
0
mirror of https://github.com/kata-containers/kata-containers.git synced 2025-09-04 18:30:03 +00:00

runtime: Add more debug logs for container io stream copy

This can help debugging container lifecycle issues

Fixes: 

Signed-off-by: Feng Wang <feng.wang@databricks.com>
This commit is contained in:
Feng Wang
2022-03-24 07:51:07 -07:00
parent 853dd98b7b
commit 19f372b5f5
4 changed files with 33 additions and 5 deletions
src/runtime/pkg/containerd-shim-v2

@@ -8,12 +8,14 @@ package containerdshim
import (
"context"
"fmt"
"github.com/sirupsen/logrus"
"github.com/containerd/containerd/api/types/task"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
)
func startContainer(ctx context.Context, s *service, c *container) (retErr error) {
shimLog.WithField("container", c.id).Debug("start container")
defer func() {
if retErr != nil {
// notify the wait goroutine to continue
@@ -78,7 +80,8 @@ func startContainer(ctx context.Context, s *service, c *container) (retErr error
return err
}
c.ttyio = tty
go ioCopy(c.exitIOch, c.stdinCloser, tty, stdin, stdout, stderr)
go ioCopy(shimLog.WithField("container", c.id), c.exitIOch, c.stdinCloser, tty, stdin, stdout, stderr)
} else {
// close the io exit channel, since there is no io for this container,
// otherwise the following wait goroutine will hang on this channel.
@@ -94,6 +97,10 @@ func startContainer(ctx context.Context, s *service, c *container) (retErr error
}
func startExec(ctx context.Context, s *service, containerID, execID string) (e *exec, retErr error) {
shimLog.WithFields(logrus.Fields{
"container": containerID,
"exec": execID,
}).Debug("start container execution")
// start an exec
c, err := s.getContainer(containerID)
if err != nil {
@@ -140,7 +147,10 @@ func startExec(ctx context.Context, s *service, containerID, execID string) (e *
}
execs.ttyio = tty
go ioCopy(execs.exitIOch, execs.stdinCloser, tty, stdin, stdout, stderr)
go ioCopy(shimLog.WithFields(logrus.Fields{
"container": c.id,
"exec": execID,
}), execs.exitIOch, execs.stdinCloser, tty, stdin, stdout, stderr)
go wait(ctx, s, c, execID)

@@ -12,6 +12,7 @@ import (
"syscall"
"github.com/containerd/fifo"
"github.com/sirupsen/logrus"
)
// The buffer size used to specify the buffer for IO streams copy
@@ -86,18 +87,20 @@ func newTtyIO(ctx context.Context, stdin, stdout, stderr string, console bool) (
return ttyIO, nil
}
func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteCloser, stdoutPipe, stderrPipe io.Reader) {
func ioCopy(shimLog *logrus.Entry, exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteCloser, stdoutPipe, stderrPipe io.Reader) {
var wg sync.WaitGroup
if tty.Stdin != nil {
wg.Add(1)
go func() {
shimLog.Debug("stdin io stream copy started")
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(stdinPipe, tty.Stdin, *p)
// notify that we can close process's io safely.
close(stdinCloser)
wg.Done()
shimLog.Debug("stdin io stream copy exited")
}()
}
@@ -105,6 +108,7 @@ func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteClo
wg.Add(1)
go func() {
shimLog.Debug("stdout io stream copy started")
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(tty.Stdout, stdoutPipe, *p)
@@ -113,20 +117,24 @@ func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteClo
// close stdin to make the other routine stop
tty.Stdin.Close()
}
shimLog.Debug("stdout io stream copy exited")
}()
}
if tty.Stderr != nil && stderrPipe != nil {
wg.Add(1)
go func() {
shimLog.Debug("stderr io stream copy started")
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(tty.Stderr, stderrPipe, *p)
wg.Done()
shimLog.Debug("stderr io stream copy exited")
}()
}
wg.Wait()
tty.close()
close(exitch)
shimLog.Debug("all io stream copy goroutines exited")
}

@@ -7,6 +7,7 @@ package containerdshim
import (
"context"
"github.com/sirupsen/logrus"
"io"
"os"
"path/filepath"
@@ -179,7 +180,7 @@ func TestIoCopy(t *testing.T) {
defer tty.close()
// start the ioCopy threads : copy from src to dst
go ioCopy(exitioch, stdinCloser, tty, dstInW, srcOutR, srcErrR)
go ioCopy(logrus.WithContext(context.Background()), exitioch, stdinCloser, tty, dstInW, srcOutR, srcErrR)
var firstW, secondW, thirdW io.WriteCloser
var firstR, secondR, thirdR io.Reader

@@ -31,12 +31,17 @@ func wait(ctx context.Context, s *service, c *container, execID string) (int32,
if execID == "" {
//wait until the io closed, then wait the container
<-c.exitIOch
shimLog.WithField("container", c.id).Debug("The container io streams closed")
} else {
execs, err = c.getExec(execID)
if err != nil {
return exitCode255, err
}
<-execs.exitIOch
shimLog.WithFields(logrus.Fields{
"container": c.id,
"exec": execID,
}).Debug("The container process io streams closed")
//This wait could be triggered before exec start which
//will get the exec's id, thus this assignment must after
//the exec exit, to make sure it get the exec's id.
@@ -82,13 +87,17 @@ func wait(ctx context.Context, s *service, c *container, execID string) (int32,
c.exitTime = timeStamp
c.exitCh <- uint32(ret)
shimLog.WithField("container", c.id).Debug("The container status is StatusStopped")
} else {
execs.status = task.StatusStopped
execs.exitCode = ret
execs.exitTime = timeStamp
execs.exitCh <- uint32(ret)
shimLog.WithFields(logrus.Fields{
"container": c.id,
"exec": execID,
}).Debug("The container exec status is StatusStopped")
}
s.mu.Unlock()