kubelet: Admission must exclude completed pods and avoid races

Fixes two issues with how the pod worker refactor calculated the
pods that admission could see (GetActivePods() and
filterOutTerminatedPods())

First, completed pods must be filtered from the "desired" state
for admission, which arguably should be happening earlier in
config. Exclude the two terminal pods states from GetActivePods()

Second, the previous check introduced with the pod worker lifecycle
ownership changes was subtly wrong for the admission use case.
Admission has to include pods that haven't yet hit the pod worker,
which CouldHaveRunningContainers was filtering out (because the
pod worker hasn't seen them). Introduce a weaker check -
IsPodKnownTerminated() - that returns true only if the pod is in
a known terminated state (no running containers AND known to pod
worker). This weaker check may only be called from components that
need admitted pods, not other kubelet subsystems.

This commit does not fix the long standing bug that force deleted
pods are omitted from admission checks, which must be fixed by
having GetActivePods() also include pods "still terminating".
This commit is contained in:
Clayton Coleman 2021-08-25 11:10:10 -04:00
parent 4e10ffe453
commit a2ca66d280
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
3 changed files with 37 additions and 4 deletions

View File

@ -95,6 +95,10 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
// GetActivePods returns pods that may have a running container (a
// terminated pod is one that is known to have no running containers and
// will not get any more).
//
// TODO: This method must include pods that have been force deleted from
// the config source (and thus removed from the pod manager) but are still
// terminating.
func (kl *Kubelet) GetActivePods() []*v1.Pod {
allPods := kl.podManager.GetPods()
activePods := kl.filterOutTerminatedPods(allPods)
@ -964,12 +968,17 @@ func (kl *Kubelet) podResourcesAreReclaimed(pod *v1.Pod) bool {
return kl.PodResourcesAreReclaimed(pod, status)
}
// filterOutTerminatedPods returns the pods that could still have running
// containers
// filterOutTerminatedPods returns pods that are not in a terminal phase
// or are known to be fully terminated. This method should only be used
// when the set of pods being filtered is upstream of the pod worker, i.e.
// the pods the pod manager is aware of.
func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod {
var filteredPods []*v1.Pod
filteredPods := make([]*v1.Pod, 0, len(pods))
for _, p := range pods {
if !kl.podWorkers.CouldHaveRunningContainers(p.UID) {
if kl.podWorkers.IsPodKnownTerminated(p.UID) {
continue
}
if p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed {
continue
}
filteredPods = append(filteredPods, p)

View File

@ -130,6 +130,14 @@ type PodWorkers interface {
// true.
SyncKnownPods(desiredPods []*v1.Pod) map[types.UID]PodWorkType
// IsPodKnownTerminated returns true if the provided pod UID is known by the pod
// worker to be terminated. If the pod has been force deleted and the pod worker
// has completed termination this method will return false, so this method should
// only be used to filter out pods from the desired set such as in admission.
//
// Intended for use by the kubelet config loops, but not subsystems, which should
// use ShouldPod*().
IsPodKnownTerminated(uid types.UID) bool
// CouldHaveRunningContainers returns true before the pod workers have synced,
// once the pod workers see the pod (syncPod could be called), and returns false
// after the pod has been terminated (running containers guaranteed stopped).
@ -394,6 +402,16 @@ func newPodWorkers(
}
}
func (p *podWorkers) IsPodKnownTerminated(uid types.UID) bool {
p.podLock.Lock()
defer p.podLock.Unlock()
if status, ok := p.podSyncStatuses[uid]; ok {
return status.IsTerminated()
}
// if the pod is not known, we return false (pod worker is not aware of it)
return false
}
func (p *podWorkers) CouldHaveRunningContainers(uid types.UID) bool {
p.podLock.Lock()
defer p.podLock.Unlock()

View File

@ -49,6 +49,7 @@ type fakePodWorkers struct {
statusLock sync.Mutex
running map[types.UID]bool
terminating map[types.UID]bool
terminated map[types.UID]bool
terminationRequested map[types.UID]bool
removeRuntime map[types.UID]bool
removeContent map[types.UID]bool
@ -85,6 +86,11 @@ func (f *fakePodWorkers) SyncKnownPods(desiredPods []*v1.Pod) map[types.UID]PodW
return nil
}
func (f *fakePodWorkers) IsPodKnownTerminated(uid types.UID) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()
return f.terminated[uid]
}
func (f *fakePodWorkers) CouldHaveRunningContainers(uid types.UID) bool {
f.statusLock.Lock()
defer f.statusLock.Unlock()