From 3bff11244b1016164357b13d414033e2733d5895 Mon Sep 17 00:00:00 2001 From: Vincent Boulineau Date: Tue, 28 Apr 2020 12:07:30 +0200 Subject: [PATCH] kubelet: fix `/stats/summary` endpoint on Windows when init-containers are present on the node Following changes in #87730, Kubelet is directly hcsshim to gather stats. However, unlike `docker stats` API that was used before, hcsshim does not keep information about exited containers. When the Kubelet lists containers (`docker_container.go:ListContainers()`), it sets `All: true`, retrieving non-running containers. When docker stats is called with such container id, it'll return a valid JSON with all values set to 0. The non-running containers are filtered later on in the process. When the hcsshim is called with such container id, it'll return an error, effectively stopping the stats retrieval for all containers. --- pkg/kubelet/dockershim/docker_stats.go | 5 +++-- pkg/kubelet/dockershim/docker_stats_windows.go | 8 +++++++- test/e2e/windows/kubelet_stats.go | 12 +++++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/dockershim/docker_stats.go b/pkg/kubelet/dockershim/docker_stats.go index ede30c4efbb..0a186cac31c 100644 --- a/pkg/kubelet/dockershim/docker_stats.go +++ b/pkg/kubelet/dockershim/docker_stats.go @@ -53,8 +53,9 @@ func (ds *dockerService) ListContainerStats(ctx context.Context, r *runtimeapi.L if err != nil { return nil, err } - - stats = append(stats, containerStats) + if containerStats != nil { + stats = append(stats, containerStats) + } } return &runtimeapi.ListContainerStatsResponse{Stats: stats}, nil diff --git a/pkg/kubelet/dockershim/docker_stats_windows.go b/pkg/kubelet/dockershim/docker_stats_windows.go index 086624c4737..cd457d9f1b2 100644 --- a/pkg/kubelet/dockershim/docker_stats_windows.go +++ b/pkg/kubelet/dockershim/docker_stats_windows.go @@ -35,7 +35,13 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont hcsshim_container, err := hcsshim.OpenContainer(containerID) if err != nil { - return nil, err + // As we moved from using Docker stats to hcsshim directly, we may query HCS with already exited container IDs. + // That will typically happen with init-containers in Exited state. Docker still knows about them but the HCS does not. + // As we don't want to block stats retrieval for other containers, we only log errors. + if !hcsshim.IsNotExist(err) && !hcsshim.IsAlreadyStopped(err) { + klog.Errorf("Error opening container (stats will be missing) '%s': %v", containerID, err) + } + return nil, nil } defer func() { closeErr := hcsshim_container.Close() diff --git a/test/e2e/windows/kubelet_stats.go b/test/e2e/windows/kubelet_stats.go index 25710f14715..89a41a9922e 100644 --- a/test/e2e/windows/kubelet_stats.go +++ b/test/e2e/windows/kubelet_stats.go @@ -146,7 +146,17 @@ func newKubeletStatsTestPods(numPods int, image imageutils.Config, nodeName stri }, }, }, - + InitContainers: []v1.Container{ + { + Image: image.GetE2EImage(), + Name: podName, + Command: []string{ + "powershell.exe", + "-Command", + "sleep -Seconds 1", + }, + }, + }, NodeName: nodeName, }, }