From ff95ae0d3cb75d6449c35eb668b381dc356a66d7 Mon Sep 17 00:00:00 2001 From: Sascha Grunert Date: Fri, 22 Mar 2024 09:04:40 +0100 Subject: [PATCH] Continue streaming kubelet logs when runtime is unavailable Container runtimes are able to run existing containers even when their main CRI server is not available for any reason. The call to the container status RPC happens quite frequently during log parsing, means that a single CRI interruption will also abort streaming the logs. We now check that specific use case and continue following the log streaming if the CRI is unavailable. We still abort the streaming accordingly if the CRI comes back and the container status reports that the workload has exited. Signed-off-by: Sascha Grunert --- staging/src/k8s.io/cri-client/pkg/logs/logs.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/staging/src/k8s.io/cri-client/pkg/logs/logs.go b/staging/src/k8s.io/cri-client/pkg/logs/logs.go index 8642f5a180a..eadeee3f7d0 100644 --- a/staging/src/k8s.io/cri-client/pkg/logs/logs.go +++ b/staging/src/k8s.io/cri-client/pkg/logs/logs.go @@ -30,6 +30,8 @@ import ( "time" "github.com/fsnotify/fsnotify" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" v1 "k8s.io/api/core/v1" internalapi "k8s.io/cri-api/pkg/apis" @@ -422,6 +424,13 @@ func ReadLogs(ctx context.Context, logger *klog.Logger, path, containerID string func isContainerRunning(ctx context.Context, logger *klog.Logger, id string, r internalapi.RuntimeService) (bool, error) { resp, err := r.ContainerStatus(ctx, id, false) if err != nil { + // Assume that the container is still running when the runtime is + // unavailable. Most runtimes support that containers can be in running + // state even if their CRI server is not available right now. + if status.Code(err) == codes.Unavailable { + return true, nil + } + return false, err } status := resp.GetStatus()