kubelet: fix /stats/summary endpoint on Windows when init-containers are present on the node

Following changes in #87730, Kubelet is directly hcsshim to gather stats.
However, unlike `docker stats` API that was used before, hcsshim does not
keep information about exited containers.

When the Kubelet lists containers (`docker_container.go:ListContainers()`),
it sets `All: true`, retrieving non-running containers.

When docker stats is called with such container id, it'll return a valid JSON
with all values set to 0. The non-running containers are filtered later on in the process.

When the hcsshim is called with such container id, it'll return an error, effectively
stopping the stats retrieval for all containers.
This commit is contained in:
Vincent Boulineau
2020-04-28 12:07:30 +02:00
parent 885e1e929a
commit 3bff11244b
3 changed files with 21 additions and 4 deletions

View File

@@ -53,8 +53,9 @@ func (ds *dockerService) ListContainerStats(ctx context.Context, r *runtimeapi.L
if err != nil {
return nil, err
}
stats = append(stats, containerStats)
if containerStats != nil {
stats = append(stats, containerStats)
}
}
return &runtimeapi.ListContainerStatsResponse{Stats: stats}, nil

View File

@@ -35,7 +35,13 @@ func (ds *dockerService) getContainerStats(containerID string) (*runtimeapi.Cont
hcsshim_container, err := hcsshim.OpenContainer(containerID)
if err != nil {
return nil, err
// As we moved from using Docker stats to hcsshim directly, we may query HCS with already exited container IDs.
// That will typically happen with init-containers in Exited state. Docker still knows about them but the HCS does not.
// As we don't want to block stats retrieval for other containers, we only log errors.
if !hcsshim.IsNotExist(err) && !hcsshim.IsAlreadyStopped(err) {
klog.Errorf("Error opening container (stats will be missing) '%s': %v", containerID, err)
}
return nil, nil
}
defer func() {
closeErr := hcsshim_container.Close()

View File

@@ -146,7 +146,17 @@ func newKubeletStatsTestPods(numPods int, image imageutils.Config, nodeName stri
},
},
},
InitContainers: []v1.Container{
{
Image: image.GetE2EImage(),
Name: podName,
Command: []string{
"powershell.exe",
"-Command",
"sleep -Seconds 1",
},
},
},
NodeName: nodeName,
},
}