diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 9498eb47279..58aec52cbfd 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2217,7 +2217,7 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) { if !kl.podWorkers.IsPodTerminationRequested(pod.UID) { // We failed pods that we rejected, so activePods include all admitted // pods that are alive. - activePods := kl.filterOutTerminatedPods(existingPods) + activePods := kl.filterOutInactivePods(existingPods) // Check if we can admit the pod; if not, reject it. if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok { diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 9a8b8cc8a30..1edbbb5113b 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -92,16 +92,17 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) { return pods, nil } -// GetActivePods returns pods that may have a running container (a -// terminated pod is one that is known to have no running containers and -// will not get any more). +// GetActivePods returns pods that have been admitted to the kubelet that +// are not fully terminated. This is mapped to the "desired state" of the +// kubelet - what pods should be running. // -// TODO: This method must include pods that have been force deleted from -// the config source (and thus removed from the pod manager) but are still -// terminating. +// WARNING: Currently this list does not include pods that have been force +// deleted but may still be terminating, which means resources assigned to +// those pods during admission may still be in use. See +// https://github.com/kubernetes/kubernetes/issues/104824 func (kl *Kubelet) GetActivePods() []*v1.Pod { allPods := kl.podManager.GetPods() - activePods := kl.filterOutTerminatedPods(allPods) + activePods := kl.filterOutInactivePods(allPods) return activePods } @@ -968,19 +969,32 @@ func (kl *Kubelet) podResourcesAreReclaimed(pod *v1.Pod) bool { return kl.PodResourcesAreReclaimed(pod, status) } -// filterOutTerminatedPods returns pods that are not in a terminal phase +// filterOutInactivePods returns pods that are not in a terminal phase // or are known to be fully terminated. This method should only be used // when the set of pods being filtered is upstream of the pod worker, i.e. // the pods the pod manager is aware of. -func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod { +func (kl *Kubelet) filterOutInactivePods(pods []*v1.Pod) []*v1.Pod { filteredPods := make([]*v1.Pod, 0, len(pods)) for _, p := range pods { + // if a pod is fully terminated by UID, it should be excluded from the + // list of pods if kl.podWorkers.IsPodKnownTerminated(p.UID) { continue } - if p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed { + + // terminal pods are considered inactive UNLESS they are actively terminating + isTerminal := p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed + if !isTerminal { + // a pod that has been marked terminal within the Kubelet is considered + // inactive (may have been rejected by Kubelet admision) + if status, ok := kl.statusManager.GetPodStatus(p.UID); ok { + isTerminal = status.Phase == v1.PodSucceeded || status.Phase == v1.PodFailed + } + } + if isTerminal && !kl.podWorkers.IsPodTerminationRequested(p.UID) { continue } + filteredPods = append(filteredPods, p) } return filteredPods diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index b95d82f743d..2c072216ee3 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -1418,15 +1418,18 @@ func TestNetworkErrorsWithoutHostNetwork(t *testing.T) { assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error") } -func TestFilterOutTerminatedPods(t *testing.T) { +func TestFilterOutInactivePods(t *testing.T) { testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) defer testKubelet.Cleanup() kubelet := testKubelet.kubelet - pods := newTestPods(5) + pods := newTestPods(8) now := metav1.NewTime(time.Now()) + + // terminal pods are excluded pods[0].Status.Phase = v1.PodFailed pods[1].Status.Phase = v1.PodSucceeded - // The pod is terminating, should not filter out. + + // deleted pod is included unless it's known to be terminated pods[2].Status.Phase = v1.PodRunning pods[2].DeletionTimestamp = &now pods[2].Status.ContainerStatuses = []v1.ContainerStatus{ @@ -1436,18 +1439,31 @@ func TestFilterOutTerminatedPods(t *testing.T) { }, }}, } + + // pending and running pods are included pods[3].Status.Phase = v1.PodPending pods[4].Status.Phase = v1.PodRunning - kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{ - pods[2].UID: true, - pods[3].UID: true, - pods[4].UID: true, + // pod that is running but has been rejected by admission is excluded + pods[5].Status.Phase = v1.PodRunning + kubelet.statusManager.SetPodStatus(pods[5], v1.PodStatus{Phase: v1.PodFailed}) + + // pod that is running according to the api but is known terminated is excluded + pods[6].Status.Phase = v1.PodRunning + kubelet.podWorkers.(*fakePodWorkers).terminated = map[types.UID]bool{ + pods[6].UID: true, } - expected := []*v1.Pod{pods[2], pods[3], pods[4]} + // pod that is failed but still terminating is included (it may still be consuming + // resources) + pods[7].Status.Phase = v1.PodFailed + kubelet.podWorkers.(*fakePodWorkers).terminationRequested = map[types.UID]bool{ + pods[7].UID: true, + } + + expected := []*v1.Pod{pods[2], pods[3], pods[4], pods[7]} kubelet.podManager.SetPods(pods) - actual := kubelet.filterOutTerminatedPods(pods) + actual := kubelet.filterOutInactivePods(pods) assert.Equal(t, expected, actual) }