From 5802f3a9103c661da49ab194d454ae60c7078bb7 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Wed, 15 Jan 2020 20:08:41 +0100 Subject: [PATCH 1/2] Add proper activePods list in TestGetTopologyHints for CPUManager --- pkg/kubelet/cm/cpumanager/topology_hints_test.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go index 62d91e9cf98..aaf28f49a44 100644 --- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go +++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go @@ -23,6 +23,7 @@ import ( cadvisorapi "github.com/google/cadvisor/info/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" @@ -238,6 +239,18 @@ func TestGetTopologyHints(t *testing.T) { for _, tc := range tcases { topology, _ := topology.Discover(&machineInfo, numaNodeInfo) + var activePods []*v1.Pod + for p := range tc.assignments { + pod := v1.Pod{} + pod.UID = types.UID(p) + for c := range tc.assignments[p] { + container := v1.Container{} + container.Name = c + pod.Spec.Containers = append(pod.Spec.Containers, container) + } + activePods = append(activePods, &pod) + } + m := manager{ policy: &staticPolicy{ topology: topology, @@ -247,7 +260,7 @@ func TestGetTopologyHints(t *testing.T) { defaultCPUSet: tc.defaultCPUSet, }, topology: topology, - activePods: func() []*v1.Pod { return nil }, + activePods: func() []*v1.Pod { return activePods }, podStatusProvider: mockPodStatusProvider{}, sourcesReady: &sourcesReadyStub{}, } From 34b942a41d71bee936d0c4d97388f01d0412801b Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Wed, 15 Jan 2020 20:09:24 +0100 Subject: [PATCH 2/2] Remove check for empty activePods list in CPUManager removeStaleState This check is redundant since we protect this call with a call to `m.sourcesReady.AllReady()` earlier on. Moreover, having this check in place means that we will leave some stale state around in cases where there are actually no active pods in the system and this loop hasn't cleaned them up yet. This can happen, for example, if a pod exits while the kubelet is down for some reason. We see this exact case being triggered in our e2e tests, where a test has been failing since October when this change was first introduced. --- pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index ec9ae4333be..e19a596bfdd 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -312,12 +312,6 @@ func (m *manager) removeStaleState() { // Get the list of active pods. activePods := m.activePods() - if len(activePods) == 0 { - // If there are no active pods, skip the removal of stale state. - // Since this function is called periodically, we will just try again - // next time this function is called. - return - } // Build a list of (podUID, containerName) pairs for all containers in all active Pods. activeContainers := make(map[string]map[string]struct{})