Merge pull request #102344 from smarterclayton/keep_pod_worker

Prevent Kubelet from incorrectly interpreting "not yet started" pods as "ready to terminate pods" by unifying responsibility for pod lifecycle into pod worker
2025-07-21 19:01:49 +00:00 · 2021-07-08 16:48:53 -07:00 · 2021-07-08 16:48:53 -07:00 · dab6f6a43d
commit dab6f6a43d
parent 57716897eb 3eadd1a9ea
33 changed files with 2065 additions and 1104 deletions
--- a/pkg/kubelet/container/runtime.go
+++ b/pkg/kubelet/container/runtime.go
@ -92,7 +92,8 @@ type Runtime interface {
 	// file). In this case, garbage collector should refrain itself from aggressive
 	// behavior such as removing all containers of unrecognized pods (yet).
 	// If evictNonDeletedPods is set to true, containers and sandboxes belonging to pods
-	// that are terminated, but not deleted will be evicted.  Otherwise, only deleted pods will be GC'd.
+	// that are terminated, but not deleted will be evicted.  Otherwise, only deleted pods
+	// will be GC'd.
 	// TODO: Revisit this method and make it cleaner.
 	GarbageCollect(gcPolicy GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error
 	// SyncPod syncs the running pod into the desired pod.
--- a/pkg/kubelet/eviction/eviction_manager.go
+++ b/pkg/kubelet/eviction/eviction_manager.go
@ -561,15 +561,15 @@ func (m *managerImpl) evictPod(pod *v1.Pod, gracePeriodOverride int64, evictMsg
 		klog.ErrorS(nil, "Eviction manager: cannot evict a critical pod", "pod", klog.KObj(pod))
 		return false
 	}
-	status := v1.PodStatus{
-		Phase:   v1.PodFailed,
-		Message: evictMsg,
-		Reason:  Reason,
-	}
 	// record that we are evicting the pod
 	m.recorder.AnnotatedEventf(pod, annotations, v1.EventTypeWarning, Reason, evictMsg)
 	// this is a blocking call and should only return when the pod and its containers are killed.
-	err := m.killPodFunc(pod, status, &gracePeriodOverride)
+	klog.V(3).InfoS("Evicting pod", "pod", klog.KObj(pod), "podUID", pod.UID, "message", evictMsg)
+	err := m.killPodFunc(pod, true, &gracePeriodOverride, func(status *v1.PodStatus) {
+		status.Phase = v1.PodFailed
+		status.Reason = Reason
+		status.Message = evictMsg
+	})
 	if err != nil {
 		klog.ErrorS(err, "Eviction manager: pod failed to evict", "pod", klog.KObj(pod))
 	} else {
--- a/pkg/kubelet/eviction/eviction_manager_test.go
+++ b/pkg/kubelet/eviction/eviction_manager_test.go
@ -21,7 +21,7 @@ import (
 	"testing"
 	"time"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/clock"
@ -46,14 +46,16 @@ const (
 // mockPodKiller is used to testing which pod is killed
 type mockPodKiller struct {
 	pod                 *v1.Pod
-	status              v1.PodStatus
+	evict               bool
+	statusFn            func(*v1.PodStatus)
 	gracePeriodOverride *int64
 }

 // killPodNow records the pod that was killed
-func (m *mockPodKiller) killPodNow(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error {
+func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride *int64, statusFn func(*v1.PodStatus)) error {
 	m.pod = pod
-	m.status = status
+	m.statusFn = statusFn
+	m.evict = evict
 	m.gracePeriodOverride = gracePeriodOverride
 	return nil
 }
--- a/pkg/kubelet/eviction/types.go
+++ b/pkg/kubelet/eviction/types.go
@ -19,7 +19,7 @@ package eviction
 import (
 	"time"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
@ -92,7 +92,7 @@ type ContainerGC interface {
 // pod - the pod to kill
 // status - the desired status to associate with the pod (i.e. why its killed)
 // gracePeriodOverride - the grace period override to use instead of what is on the pod spec
-type KillPodFunc func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error
+type KillPodFunc func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error

 // MirrorPodFunc returns the mirror pod for the given static pod and
 // whether it was known to the pod manager.
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@ -17,6 +17,7 @@ limitations under the License.
 package kubelet

 import (
+	"context"
 	"crypto/tls"
 	"fmt"
 	"math"
@ -137,6 +138,11 @@ const (
 	// Period for performing global cleanup tasks.
 	housekeepingPeriod = time.Second * 2

+	// Duration at which housekeeping failed to satisfy the invariant that
+	// housekeeping should be fast to avoid blocking pod config (while
+	// housekeeping is running no new pods are started or deleted).
+	housekeepingWarningDuration = time.Second * 15
+
 	// Period for performing eviction monitoring.
 	// ensure this is kept in sync with internal cadvisor housekeeping.
 	evictionMonitoringPeriod = time.Second * 10
@ -639,6 +645,20 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 		klet.containerLogManager = logs.NewStubContainerLogManager()
 	}

+	klet.reasonCache = NewReasonCache()
+	klet.workQueue = queue.NewBasicWorkQueue(klet.clock)
+	klet.podWorkers = newPodWorkers(
+		klet.syncPod,
+		klet.syncTerminatingPod,
+		klet.syncTerminatedPod,
+
+		kubeDeps.Recorder,
+		klet.workQueue,
+		klet.resyncInterval,
+		backOffPeriod,
+		klet.podCache,
+	)
+
 	runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
 		kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
 		klet.livenessManager,
@ -646,7 +666,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 		klet.startupManager,
 		seccompProfileRoot,
 		machineInfo,
-		klet,
+		klet.podWorkers,
 		kubeDeps.OSInterface,
 		klet,
 		httpClient,
@ -780,7 +800,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 		kubeCfg.EnableControllerAttachDetach,
 		nodeName,
 		klet.podManager,
-		klet.statusManager,
+		klet.podWorkers,
 		klet.kubeClient,
 		klet.volumePluginMgr,
 		klet.containerRuntime,
@ -792,12 +812,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
 		keepTerminatedPodVolumes,
 		volumepathhandler.NewBlockVolumePathHandler())

-	klet.reasonCache = NewReasonCache()
-	klet.workQueue = queue.NewBasicWorkQueue(klet.clock)
-	klet.podWorkers = newPodWorkers(klet.syncPod, kubeDeps.Recorder, klet.workQueue, klet.resyncInterval, backOffPeriod, klet.podCache)
-
 	klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)
-	klet.podKiller = NewPodKiller(klet)

 	// setup eviction manager
 	evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock)
@ -1103,9 +1118,6 @@ type Kubelet struct {
 	// Container restart Backoff
 	backOff *flowcontrol.Backoff

-	// Pod killer handles pods to be killed
-	podKiller PodKiller
-
 	// Information about the ports which are opened by daemons on Node running this Kubelet server.
 	daemonEndpoints *v1.NodeDaemonEndpoints

@ -1470,10 +1482,6 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
 		kl.initNetworkUtil()
 	}

-	// Start a goroutine responsible for killing pods (that are not properly
-	// handled by pod workers).
-	go wait.Until(kl.podKiller.PerformPodKillingWork, 1*time.Second, wait.NeverStop)
-
 	// Start component sync loops.
 	kl.statusManager.Start()

@ -1487,7 +1495,12 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
 	kl.syncLoop(updates, kl)
 }

-// syncPod is the transaction script for the sync of a single pod.
+// syncPod is the transaction script for the sync of a single pod (setting up)
+// a pod. The reverse (teardown) is handled in syncTerminatingPod and
+// syncTerminatedPod. If syncPod exits without error, then the pod runtime
+// state is in sync with the desired configuration state (pod is running).
+// If syncPod exits with a transient error, the next invocation of syncPod
+// is expected to make progress towards reaching the runtime state.
 //
 // Arguments:
 //
@ -1499,7 +1512,7 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
 // * If the pod is being seen as running for the first time, record pod
 //   start latency
 // * Update the status of the pod in the status manager
-// * Kill the pod if it should not be running
+// * Kill the pod if it should not be running due to soft admission
 // * Create a mirror pod if the pod is a static pod, and does not
 //   already have a mirror pod
 // * Create the data directories for the pod if they do not exist
@ -1514,39 +1527,9 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
 // This operation writes all events that are dispatched in order to provide
 // the most accurate information possible about an error situation to aid debugging.
 // Callers should not throw an event if this operation returns an error.
-func (kl *Kubelet) syncPod(o syncPodOptions) error {
-	// pull out the required options
-	pod := o.pod
-	mirrorPod := o.mirrorPod
-	podStatus := o.podStatus
-	updateType := o.updateType
-
-	// if we want to kill a pod, do it now!
-	if updateType == kubetypes.SyncPodKill {
-		killPodOptions := o.killPodOptions
-		if killPodOptions == nil || killPodOptions.PodStatusFunc == nil {
-			return fmt.Errorf("kill pod options are required if update type is kill")
-		}
-		apiPodStatus := killPodOptions.PodStatusFunc(pod, podStatus)
-		kl.statusManager.SetPodStatus(pod, apiPodStatus)
-		// we kill the pod with the specified grace period since this is a termination
-		if err := kl.killPod(pod, nil, podStatus, killPodOptions.PodTerminationGracePeriodSecondsOverride); err != nil {
-			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
-			// there was an error killing the pod, so we return that error directly
-			utilruntime.HandleError(err)
-			return err
-		}
-		return nil
-	}
-
-	// If a pod is still gracefully terminating, then we do not want to
-	// take further action. This mitigates static pods and deleted pods
-	// from getting rerun prematurely or their cgroups being deleted before
-	// the runtime cleans up.
-	podFullName := kubecontainer.GetPodFullName(pod)
-	if kl.podKiller.IsPodPendingTerminationByPodName(podFullName) {
-		return fmt.Errorf("pod %q is pending termination", podFullName)
-	}
+func (kl *Kubelet) syncPod(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
+	klog.V(4).InfoS("syncPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
+	defer klog.V(4).InfoS("syncPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)

 	// Latency measurements for the main workflow are relative to the
 	// first time the pod was seen by the API server.
@ -1583,16 +1566,15 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 		podStatus.IPs = []string{apiPodStatus.PodIP}
 	}

-	// Record the time it takes for the pod to become running.
-	existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID)
-	if !ok || existingStatus.Phase == v1.PodPending && apiPodStatus.Phase == v1.PodRunning &&
-		!firstSeenTime.IsZero() {
-		metrics.PodStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
-	}
-
+	// If the pod should not be running, we request the pod's containers be stopped. This is not the same
+	// as termination (we want to stop the pod, but potentially restart it later if soft admission allows
+	// it later). Set the status and phase appropriately
 	runnable := kl.canRunPod(pod)
 	if !runnable.Admit {
-		// Pod is not runnable; update the Pod and Container statuses to why.
+		// Pod is not runnable; and update the Pod and Container statuses to why.
+		if apiPodStatus.Phase != v1.PodFailed && apiPodStatus.Phase != v1.PodSucceeded {
+			apiPodStatus.Phase = v1.PodPending
+		}
 		apiPodStatus.Reason = runnable.Reason
 		apiPodStatus.Message = runnable.Message
 		// Waiting containers are not creating.
@ -1609,22 +1591,28 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 		}
 	}

-	// Update status in the status manager
+	// Record the time it takes for the pod to become running.
+	existingStatus, ok := kl.statusManager.GetPodStatus(pod.UID)
+	if !ok || existingStatus.Phase == v1.PodPending && apiPodStatus.Phase == v1.PodRunning &&
+		!firstSeenTime.IsZero() {
+		metrics.PodStartDuration.Observe(metrics.SinceInSeconds(firstSeenTime))
+	}
+
 	kl.statusManager.SetPodStatus(pod, apiPodStatus)

-	// Kill pod if it should not be running
-	if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == v1.PodFailed {
+	// Pods that are not runnable must be stopped - return a typed error to the pod worker
+	if !runnable.Admit {
+		klog.V(2).InfoS("Pod is not runnable and must have running containers stopped", "pod", klog.KObj(pod), "podUID", pod.UID, "message", runnable.Message)
 		var syncErr error
-		if err := kl.killPod(pod, nil, podStatus, nil); err != nil {
+		p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
+		if err := kl.killPod(pod, p, nil); err != nil {
 			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
 			syncErr = fmt.Errorf("error killing pod: %v", err)
 			utilruntime.HandleError(syncErr)
 		} else {
-			if !runnable.Admit {
-				// There was no error killing the pod, but the pod cannot be run.
-				// Return an error to signal that the sync loop should back off.
-				syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
-			}
+			// There was no error killing the pod, but the pod cannot be run.
+			// Return an error to signal that the sync loop should back off.
+			syncErr = fmt.Errorf("pod cannot be run: %s", runnable.Message)
 		}
 		return syncErr
 	}
@ -1640,7 +1628,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 	pcm := kl.containerManager.NewPodContainerManager()
 	// If pod has already been terminated then we need not create
 	// or update the pod's cgroup
-	if !kl.podIsTerminated(pod) {
+	// TODO: once context cancellation is added this check can be removed
+	if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
 		// When the kubelet is restarted with the cgroups-per-qos
 		// flag enabled, all the pod's running containers
 		// should be killed intermittently and brought back up
@ -1657,7 +1646,8 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 		// exists or the pod is running for the first time
 		podKilled := false
 		if !pcm.Exists(pod) && !firstSync {
-			if err := kl.killPod(pod, nil, podStatus, nil); err == nil {
+			p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
+			if err := kl.killPod(pod, p, nil); err == nil {
 				podKilled = true
 			} else {
 				klog.ErrorS(err, "KillPod failed", "pod", klog.KObj(pod), "podStatus", podStatus)
@ -1691,6 +1681,7 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 				// The mirror pod is semantically different from the static pod. Remove
 				// it. The mirror pod will get recreated later.
 				klog.InfoS("Trying to delete pod", "pod", klog.KObj(pod), "podUID", mirrorPod.ObjectMeta.UID)
+				podFullName := kubecontainer.GetPodFullName(pod)
 				var err error
 				deleted, err = kl.podManager.DeleteMirrorPod(podFullName, &mirrorPod.ObjectMeta.UID)
 				if deleted {
@ -1720,8 +1711,9 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 		return err
 	}

-	// Volume manager will not mount volumes for terminated pods
-	if !kl.podIsTerminated(pod) {
+	// Volume manager will not mount volumes for terminating pods
+	// TODO: once context cancellation is added this check can be removed
+	if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
 		// Wait for volumes to attach/mount
 		if err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil {
 			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedMountVolume, "Unable to attach or mount volumes: %v", err)
@ -1752,6 +1744,125 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error {
 	return nil
 }

+// syncTerminatingPod is expected to terminate all running containers in a pod. Once this method
+// returns without error, the pod's local state can be safely cleaned up. If runningPod is passed,
+// we perform no status updates.
+func (kl *Kubelet) syncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, runningPod *kubecontainer.Pod, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
+	klog.V(4).InfoS("syncTerminatingPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
+	defer klog.V(4).InfoS("syncTerminatingPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)
+
+	// when we receive a runtime only pod (runningPod != nil) we don't need to update the status
+	// manager or refresh the status of the cache, because a successful killPod will ensure we do
+	// not get invoked again
+	if runningPod != nil {
+		// we kill the pod with the specified grace period since this is a termination
+		if gracePeriod != nil {
+			klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", *gracePeriod)
+		} else {
+			klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", nil)
+		}
+		if err := kl.killPod(pod, *runningPod, gracePeriod); err != nil {
+			kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
+			// there was an error killing the pod, so we return that error directly
+			utilruntime.HandleError(err)
+			return err
+		}
+		klog.V(4).InfoS("Pod termination stopped all running orphan containers", "pod", klog.KObj(pod), "podUID", pod.UID)
+		return nil
+	}
+
+	apiPodStatus := kl.generateAPIPodStatus(pod, podStatus)
+	if podStatusFn != nil {
+		podStatusFn(&apiPodStatus)
+	}
+	kl.statusManager.SetPodStatus(pod, apiPodStatus)
+
+	if gracePeriod != nil {
+		klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", *gracePeriod)
+	} else {
+		klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", nil)
+	}
+	p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
+	if err := kl.killPod(pod, p, gracePeriod); err != nil {
+		kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err)
+		// there was an error killing the pod, so we return that error directly
+		utilruntime.HandleError(err)
+		return err
+	}
+
+	// Guard against consistency issues in KillPod implementations by checking that there are no
+	// running containers. This method is invoked infrequently so this is effectively free and can
+	// catch race conditions introduced by callers updating pod status out of order.
+	// TODO: have KillPod return the terminal status of stopped containers and write that into the
+	//  cache immediately
+	podStatus, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
+	if err != nil {
+		klog.ErrorS(err, "Unable to read pod status prior to final pod termination", "pod", klog.KObj(pod), "podUID", pod.UID)
+		return err
+	}
+	var runningContainers []string
+	var containers []string
+	for _, s := range podStatus.ContainerStatuses {
+		if s.State == kubecontainer.ContainerStateRunning {
+			runningContainers = append(runningContainers, s.ID.String())
+		}
+		containers = append(containers, fmt.Sprintf("(%s state=%s exitCode=%d finishedAt=%s)", s.Name, s.State, s.ExitCode, s.FinishedAt.UTC().Format(time.RFC3339Nano)))
+	}
+	if klog.V(4).Enabled() {
+		sort.Strings(containers)
+		klog.InfoS("Post-termination container state", "pod", klog.KObj(pod), "podUID", pod.UID, "containers", strings.Join(containers, " "))
+	}
+	if len(runningContainers) > 0 {
+		return fmt.Errorf("detected running containers after a successful KillPod, CRI violation: %v", runningContainers)
+	}
+
+	// we have successfully stopped all containers, the pod is terminating, our status is "done"
+	klog.V(4).InfoS("Pod termination stopped all running containers", "pod", klog.KObj(pod), "podUID", pod.UID)
+
+	return nil
+}
+
+// syncTerminatedPod cleans up a pod that has terminated (has no running containers).
+// The invocations in this call are expected to tear down what PodResourcesAreReclaimed checks (which
+// gates pod deletion). When this method exits the pod is expected to be ready for cleanup.
+// TODO: make this method take a context and exit early
+func (kl *Kubelet) syncTerminatedPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
+	klog.V(4).InfoS("syncTerminatedPod enter", "pod", klog.KObj(pod), "podUID", pod.UID)
+	defer klog.V(4).InfoS("syncTerminatedPod exit", "pod", klog.KObj(pod), "podUID", pod.UID)
+
+	// generate the final status of the pod
+	// TODO: should we simply fold this into TerminatePod? that would give a single pod update
+	apiPodStatus := kl.generateAPIPodStatus(pod, podStatus)
+	kl.statusManager.SetPodStatus(pod, apiPodStatus)
+
+	// volumes are unmounted after the pod worker reports ShouldPodRuntimeBeRemoved (which is satisfied
+	// before syncTerminatedPod is invoked)
+	if err := kl.volumeManager.WaitForUnmount(pod); err != nil {
+		return err
+	}
+	klog.V(4).InfoS("Pod termination unmounted volumes", "pod", klog.KObj(pod), "podUID", pod.UID)
+
+	// Note: we leave pod containers to be reclaimed in the background since dockershim requires the
+	// container for retrieving logs and we want to make sure logs are available until the pod is
+	// physically deleted.
+
+	// remove any cgroups in the hierarchy for pods that are no longer running.
+	if kl.cgroupsPerQOS {
+		pcm := kl.containerManager.NewPodContainerManager()
+		name, _ := pcm.GetPodContainerName(pod)
+		if err := pcm.Destroy(name); err != nil {
+			return err
+		}
+		klog.V(4).InfoS("Pod termination removed cgroups", "pod", klog.KObj(pod), "podUID", pod.UID)
+	}
+
+	// mark the final pod status
+	kl.statusManager.TerminatePod(pod)
+	klog.V(4).InfoS("Pod is terminated and will need no more status updates", "pod", klog.KObj(pod), "podUID", pod.UID)
+
+	return nil
+}
+
 // Get pods which should be resynchronized. Currently, the following pod should be resynchronized:
 //   * pod whose work is ready.
 //   * internal modules that request sync of a pod.
@ -1794,28 +1905,11 @@ func (kl *Kubelet) deletePod(pod *v1.Pod) error {
 		// for sources that haven't reported yet.
 		return fmt.Errorf("skipping delete because sources aren't ready yet")
 	}
-	kl.podWorkers.ForgetWorker(pod.UID)
-
-	// make sure our runtimeCache is at least as fresh as the last container started event we observed.
-	// this ensures we correctly send graceful deletion signals to all containers we've reported started.
-	if lastContainerStarted, ok := kl.lastContainerStartedTime.Get(pod.UID); ok {
-		if err := kl.runtimeCache.ForceUpdateIfOlder(lastContainerStarted); err != nil {
-			return fmt.Errorf("error updating containers: %v", err)
-		}
-	}
-	// Runtime cache may not have been updated to with the pod, but it's okay
-	// because the periodic cleanup routine will attempt to delete again later.
-	runningPods, err := kl.runtimeCache.GetPods()
-	if err != nil {
-		return fmt.Errorf("error listing containers: %v", err)
-	}
-	runningPod := kubecontainer.Pods(runningPods).FindPod("", pod.UID)
-	if runningPod.IsEmpty() {
-		return fmt.Errorf("pod not found")
-	}
-	podPair := kubecontainer.PodPair{APIPod: pod, RunningPod: &runningPod}
-	kl.podKiller.KillPod(&podPair)
-
+	klog.V(3).InfoS("Pod has been deleted and must be killed", "pod", klog.KObj(pod), "podUID", pod.UID)
+	kl.podWorkers.UpdatePod(UpdatePodOptions{
+		Pod:        pod,
+		UpdateType: kubetypes.SyncPodKill,
+	})
 	// We leave the volume/directory cleanup to the periodic cleanup routine.
 	return nil
 }
@ -1853,7 +1947,7 @@ func (kl *Kubelet) canAdmitPod(pods []*v1.Pod, pod *v1.Pod) (bool, string, strin
 func (kl *Kubelet) canRunPod(pod *v1.Pod) lifecycle.PodAdmitResult {
 	attrs := &lifecycle.PodAdmitAttributes{Pod: pod}
 	// Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive.
-	attrs.OtherPods = kl.filterOutTerminatedPods(kl.podManager.GetPods())
+	attrs.OtherPods = kl.GetActivePods()

 	for _, handler := range kl.softAdmitHandlers {
 		if result := handler.Admit(attrs); !result.Admit {
@ -2043,10 +2137,16 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle
 			// skip housekeeping, as we may accidentally delete pods from unready sources.
 			klog.V(4).InfoS("SyncLoop (housekeeping, skipped): sources aren't ready yet")
 		} else {
+			start := time.Now()
 			klog.V(4).InfoS("SyncLoop (housekeeping)")
 			if err := handler.HandlePodCleanups(); err != nil {
 				klog.ErrorS(err, "Failed cleaning pods")
 			}
+			duration := time.Since(start)
+			if duration > housekeepingWarningDuration {
+				klog.ErrorS(fmt.Errorf("housekeeping took too long"), "Housekeeping took longer than 15s", "seconds", duration.Seconds())
+			}
+			klog.V(4).InfoS("SyncLoop (housekeeping) end")
 		}
 	}
 	return true
@ -2067,31 +2167,12 @@ func handleProbeSync(kl *Kubelet, update proberesults.Update, handler SyncHandle
 // dispatchWork starts the asynchronous sync of the pod in a pod worker.
 // If the pod has completed termination, dispatchWork will perform no action.
 func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
-	// check whether we are ready to delete the pod from the API server (all status up to date)
-	containersTerminal, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
-	if pod.DeletionTimestamp != nil && containersTerminal {
-		klog.V(4).InfoS("Pod has completed execution and should be deleted from the API server", "pod", klog.KObj(pod), "syncType", syncType)
-		kl.statusManager.TerminatePod(pod)
-		return
-	}
-
-	// optimization: avoid invoking the pod worker if no further changes are possible to the pod definition
-	// (i.e. the pod has completed and its containers have been terminated)
-	if podWorkerTerminal && containersTerminal {
-		klog.V(4).InfoS("Pod has completed and its containers have been terminated, ignoring remaining sync work", "pod", klog.KObj(pod), "syncType", syncType)
-		return
-	}
-
 	// Run the sync in an async worker.
-	kl.podWorkers.UpdatePod(&UpdatePodOptions{
+	kl.podWorkers.UpdatePod(UpdatePodOptions{
 		Pod:        pod,
 		MirrorPod:  mirrorPod,
 		UpdateType: syncType,
-		OnCompleteFunc: func(err error) {
-			if err != nil {
-				metrics.PodWorkerDuration.WithLabelValues(syncType.String()).Observe(metrics.SinceInSeconds(start))
-			}
-		},
+		StartTime:  start,
 	})
 	// Note the number of containers for new pods.
 	if syncType == kubetypes.SyncPodCreate {
@ -2127,10 +2208,13 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
 			continue
 		}

-		if !kl.podIsTerminated(pod) {
-			// Only go through the admission process if the pod is not
-			// terminated.
-
+		// Only go through the admission process if the pod is not requested
+		// for termination by another part of the kubelet. If the pod is already
+		// using resources (previously admitted), the pod worker is going to be
+		// shutting it down. If the pod hasn't started yet, we know that when
+		// the pod worker is invoked it will also avoid setting up the pod, so
+		// we simply avoid doing any work.
+		if !kl.podWorkers.IsPodTerminationRequested(pod.UID) {
 			// We failed pods that we rejected, so activePods include all admitted
 			// pods that are alive.
 			activePods := kl.filterOutTerminatedPods(existingPods)
@ -2304,13 +2388,8 @@ func (kl *Kubelet) ListenAndServePodResources() {
 // Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around.
 func (kl *Kubelet) cleanUpContainersInPod(podID types.UID, exitedContainerID string) {
 	if podStatus, err := kl.podCache.Get(podID); err == nil {
-		removeAll := false
-		if syncedPod, ok := kl.podManager.GetPodByUID(podID); ok {
-			// generate the api status using the cached runtime status to get up-to-date ContainerStatuses
-			apiPodStatus := kl.generateAPIPodStatus(syncedPod, podStatus)
-			// When an evicted or deleted pod has already synced, all containers can be removed.
-			removeAll = eviction.PodIsEvicted(syncedPod.Status) || (syncedPod.DeletionTimestamp != nil && notRunning(apiPodStatus.ContainerStatuses))
-		}
+		// When an evicted or deleted pod has already synced, all containers can be removed.
+		removeAll := kl.podWorkers.ShouldPodContentBeRemoved(podID)
 		kl.containerDeletor.deleteContainersInPod(exitedContainerID, podStatus, removeAll)
 	}
 }
--- a/pkg/kubelet/kubelet_pods.go
+++ b/pkg/kubelet/kubelet_pods.go
@ -30,7 +30,6 @@ import (
 	"runtime"
 	"sort"
 	"strings"
-	"sync"

 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
@ -54,7 +53,6 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward"
 	remotecommandserver "k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand"
 	"k8s.io/kubernetes/pkg/kubelet/envvars"
-	"k8s.io/kubernetes/pkg/kubelet/eviction"
 	"k8s.io/kubernetes/pkg/kubelet/images"
 	"k8s.io/kubernetes/pkg/kubelet/status"
 	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -71,11 +69,6 @@ import (
 const (
 	managedHostsHeader                = "# Kubernetes-managed hosts file.\n"
 	managedHostsHeaderWithHostNetwork = "# Kubernetes-managed hosts file (host network).\n"
-
-	// Capacity of the channel for storing pods to kill. A small number should
-	// suffice because a goroutine is dedicated to check the channel and does
-	// not block on anything else.
-	podKillingChannelCapacity = 50
 )

 // Container state reason list
@ -99,7 +92,9 @@ func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
 	return pods, nil
 }

-// GetActivePods returns non-terminal pods
+// GetActivePods returns pods that may have a running container (a
+// terminated pod is one that is known to have no running containers and
+// will not get any more).
 func (kl *Kubelet) GetActivePods() []*v1.Pod {
 	allPods := kl.podManager.GetPods()
 	activePods := kl.filterOutTerminatedPods(allPods)
@ -853,18 +848,11 @@ func containerResourceRuntimeValue(fs *v1.ResourceFieldSelector, pod *v1.Pod, co
 	return resource.ExtractResourceValueByContainerName(fs, pod, containerName)
 }

-// One of the following arguments must be non-nil: runningPod, status.
-func (kl *Kubelet) killPod(pod *v1.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus, gracePeriodOverride *int64) error {
-	var p kubecontainer.Pod
-	if runningPod != nil {
-		p = *runningPod
-	} else if status != nil {
-		p = kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), status)
-	} else {
-		return fmt.Errorf("one of the two arguments must be non-nil: runningPod, status")
-	}
-
-	// Call the container runtime KillPod method which stops all running containers of the pod
+// killPod instructs the container runtime to kill the pod. This method requires that
+// the pod status contains the result of the last syncPod, otherwise it may fail to
+// terminate newly created containers and sandboxes.
+func (kl *Kubelet) killPod(pod *v1.Pod, p kubecontainer.Pod, gracePeriodOverride *int64) error {
+	// Call the container runtime KillPod method which stops all known running containers of the pod
 	if err := kl.containerRuntime.KillPod(pod, p, gracePeriodOverride); err != nil {
 		return err
 	}
@ -912,83 +900,38 @@ func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
 	return pullSecrets
 }

-// podStatusIsTerminal reports when the specified pod has no running containers or is no longer accepting
-// spec changes.
-func (kl *Kubelet) podAndContainersAreTerminal(pod *v1.Pod) (containersTerminal, podWorkerTerminal bool) {
-	// Check the cached pod status which was set after the last sync.
-	status, ok := kl.statusManager.GetPodStatus(pod.UID)
-	if !ok {
-		// If there is no cached status, use the status from the
-		// apiserver. This is useful if kubelet has recently been
-		// restarted.
-		status = pod.Status
+func countRunningContainerStatus(status v1.PodStatus) int {
+	var runningContainers int
+	for _, c := range status.InitContainerStatuses {
+		if c.State.Running != nil {
+			runningContainers++
+		}
 	}
-	// A pod transitions into failed or succeeded from either container lifecycle (RestartNever container
-	// fails) or due to external events like deletion or eviction. A terminal pod *should* have no running
-	// containers, but to know that the pod has completed its lifecycle you must wait for containers to also
-	// be terminal.
-	containersTerminal = notRunning(status.ContainerStatuses)
-	// The kubelet must accept config changes from the pod spec until it has reached a point where changes would
-	// have no effect on any running container.
-	podWorkerTerminal = status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && containersTerminal)
-	return
-}
-
-// podIsTerminated returns true if the provided pod is in a terminal phase ("Failed", "Succeeded") or
-// has been deleted and has no running containers. This corresponds to when a pod must accept changes to
-// its pod spec (e.g. terminating containers allow grace period to be shortened).
-func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
-	_, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
-	return podWorkerTerminal
-}
-
-// IsPodTerminated returns true if the pod with the provided UID is in a terminal phase ("Failed",
-// "Succeeded") or has been deleted and has no running containers. This corresponds to when a pod must
-// accept changes to its pod spec (e.g. terminating containers allow grace period to be shortened)
-func (kl *Kubelet) IsPodTerminated(uid types.UID) bool {
-	pod, podFound := kl.podManager.GetPodByUID(uid)
-	if !podFound {
-		return true
+	for _, c := range status.ContainerStatuses {
+		if c.State.Running != nil {
+			runningContainers++
+		}
 	}
-	return kl.podIsTerminated(pod)
-}
-
-// IsPodDeleted returns true if the pod is deleted.  For the pod to be deleted, either:
-// 1. The pod object is deleted
-// 2. The pod's status is evicted
-// 3. The pod's deletion timestamp is set, and containers are not running
-func (kl *Kubelet) IsPodDeleted(uid types.UID) bool {
-	pod, podFound := kl.podManager.GetPodByUID(uid)
-	if !podFound {
-		return true
+	for _, c := range status.EphemeralContainerStatuses {
+		if c.State.Running != nil {
+			runningContainers++
+		}
 	}
-	status, statusFound := kl.statusManager.GetPodStatus(pod.UID)
-	if !statusFound {
-		status = pod.Status
-	}
-	return eviction.PodIsEvicted(status) || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
+	return runningContainers
 }

 // PodResourcesAreReclaimed returns true if all required node-level resources that a pod was consuming have
 // been reclaimed by the kubelet.  Reclaiming resources is a prerequisite to deleting a pod from the API server.
 func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bool {
-	if !notRunning(status.ContainerStatuses) {
+	if kl.podWorkers.CouldHaveRunningContainers(pod.UID) {
 		// We shouldn't delete pods that still have running containers
 		klog.V(3).InfoS("Pod is terminated, but some containers are still running", "pod", klog.KObj(pod))
 		return false
 	}
-	// pod's containers should be deleted
-	runtimeStatus, err := kl.podCache.Get(pod.UID)
-	if err != nil {
-		klog.V(3).InfoS("Pod is terminated, Error getting runtimeStatus from the podCache", "pod", klog.KObj(pod), "err", err)
-		return false
-	}
-	if len(runtimeStatus.ContainerStatuses) > 0 {
-		var statusStr string
-		for _, status := range runtimeStatus.ContainerStatuses {
-			statusStr += fmt.Sprintf("%+v ", *status)
-		}
-		klog.V(3).InfoS("Pod is terminated, but some containers have not been cleaned up", "pod", klog.KObj(pod), "status", statusStr)
+	if count := countRunningContainerStatus(status); count > 0 {
+		// We shouldn't delete pods until the reported pod status contains no more running containers (the previous
+		// check ensures no more status can be generated, this check verifies we have seen enough of the status)
+		klog.V(3).InfoS("Pod is terminated, but some container status has not yet been reported", "pod", klog.KObj(pod), "running", count)
 		return false
 	}
 	if kl.podVolumesExist(pod.UID) && !kl.keepTerminatedPodVolumes {
@ -1003,6 +946,12 @@ func (kl *Kubelet) PodResourcesAreReclaimed(pod *v1.Pod, status v1.PodStatus) bo
 			return false
 		}
 	}
+
+	// Note: we leave pod containers to be reclaimed in the background since dockershim requires the
+	// container for retrieving logs and we want to make sure logs are available until the pod is
+	// physically deleted.
+
+	klog.V(3).InfoS("Pod is terminated and all resources are reclaimed", "pod", klog.KObj(pod))
 	return true
 }

@ -1015,23 +964,12 @@ func (kl *Kubelet) podResourcesAreReclaimed(pod *v1.Pod) bool {
 	return kl.PodResourcesAreReclaimed(pod, status)
 }

-// notRunning returns true if every status is terminated or waiting, or the status list
-// is empty.
-func notRunning(statuses []v1.ContainerStatus) bool {
-	for _, status := range statuses {
-		if status.State.Terminated == nil && status.State.Waiting == nil {
-			return false
-		}
-	}
-	return true
-}
-
-// filterOutTerminatedPods returns the given pods which the status manager
-// does not consider failed or succeeded.
+// filterOutTerminatedPods returns the pods that could still have running
+// containers
 func (kl *Kubelet) filterOutTerminatedPods(pods []*v1.Pod) []*v1.Pod {
 	var filteredPods []*v1.Pod
 	for _, p := range pods {
-		if kl.podIsTerminated(p) {
+		if !kl.podWorkers.CouldHaveRunningContainers(p.UID) {
 			continue
 		}
 		filteredPods = append(filteredPods, p)
@ -1056,9 +994,9 @@ func (kl *Kubelet) removeOrphanedPodStatuses(pods []*v1.Pod, mirrorPods []*v1.Po
 // If pod killing is done, podManager.DeleteMirrorPod() is called to delete mirror pod
 // from the API server
 func (kl *Kubelet) deleteOrphanedMirrorPods() {
-	podFullNames := kl.podManager.GetOrphanedMirrorPodNames()
-	for _, podFullname := range podFullNames {
-		if !kl.podKiller.IsPodPendingTerminationByPodName(podFullname) {
+	mirrorPods := kl.podManager.GetOrphanedMirrorPodNames()
+	for _, podFullname := range mirrorPods {
+		if !kl.podWorkers.IsPodForMirrorPodTerminatingByFullName(podFullname) {
 			_, err := kl.podManager.DeleteMirrorPod(podFullname, nil)
 			if err != nil {
 				klog.ErrorS(err, "Encountered error when deleting mirror pod", "podName", podFullname)
@ -1071,7 +1009,8 @@ func (kl *Kubelet) deleteOrphanedMirrorPods() {

 // HandlePodCleanups performs a series of cleanup work, including terminating
 // pod workers, killing unwanted pods, and removing orphaned volumes/pod
-// directories.
+// directories. No config changes are sent to pod workers while this method
+// is executing which means no new pods can appear.
 // NOTE: This function is executed by the main sync loop, so it
 // should not contain any blocking calls.
 func (kl *Kubelet) HandlePodCleanups() error {
@ -1102,43 +1041,92 @@ func (kl *Kubelet) HandlePodCleanups() error {
 	//      to the apiserver, it could still restart the terminated pod (even
 	//      though the pod was not considered terminated by the apiserver).
 	// These two conditions could be alleviated by checkpointing kubelet.
-	activePods := kl.filterOutTerminatedPods(allPods)

-	desiredPods := make(map[types.UID]sets.Empty)
-	for _, pod := range activePods {
-		desiredPods[pod.UID] = sets.Empty{}
+	// Stop the workers for terminated pods not in the config source
+	klog.V(3).InfoS("Clean up pod workers for terminated pods")
+	workingPods := kl.podWorkers.SyncKnownPods(allPods)
+
+	allPodsByUID := make(map[types.UID]*v1.Pod)
+	for _, pod := range allPods {
+		allPodsByUID[pod.UID] = pod
 	}
-	// Stop the workers for no-longer existing pods.
-	kl.podWorkers.ForgetNonExistingPodWorkers(desiredPods)
-	kl.probeManager.CleanupPods(desiredPods)

-	runningPods, err := kl.runtimeCache.GetPods()
+	// Identify the set of pods that have workers, which should be all pods
+	// from config that are not terminated, as well as any terminating pods
+	// that have already been removed from config. Pods that are terminating
+	// will be added to possiblyRunningPods, to prevent overly aggressive
+	// cleanup of pod cgroups.
+	runningPods := make(map[types.UID]sets.Empty)
+	possiblyRunningPods := make(map[types.UID]sets.Empty)
+	for uid, sync := range workingPods {
+		switch sync {
+		case SyncPodWork:
+			runningPods[uid] = struct{}{}
+			possiblyRunningPods[uid] = struct{}{}
+		case TerminatingPodWork:
+			possiblyRunningPods[uid] = struct{}{}
+		}
+	}
+
+	// Stop probing pods that are not running
+	klog.V(3).InfoS("Clean up probes for terminating and terminated pods")
+	kl.probeManager.CleanupPods(runningPods)
+
+	// Terminate any pods that are observed in the runtime but not
+	// present in the list of known running pods from config.
+	runningRuntimePods, err := kl.runtimeCache.GetPods()
 	if err != nil {
 		klog.ErrorS(err, "Error listing containers")
 		return err
 	}
-	for _, pod := range runningPods {
-		if _, found := desiredPods[pod.ID]; !found {
-			kl.podKiller.KillPod(&kubecontainer.PodPair{APIPod: nil, RunningPod: pod})
+	for _, runningPod := range runningRuntimePods {
+		switch workType, ok := workingPods[runningPod.ID]; {
+		case ok && workType == SyncPodWork, ok && workType == TerminatingPodWork:
+			// if the pod worker is already in charge of this pod, we don't need to do anything
+			continue
+		default:
+			// If the pod isn't in the set that should be running and isn't already terminating, terminate
+			// now. This termination is aggressive because all known pods should already be in a known state
+			// (i.e. a removed static pod should already be terminating), so these are pods that were
+			// orphaned due to kubelet restart or bugs. Since housekeeping blocks other config changes, we
+			// know that another pod wasn't started in the background so we are safe to terminate the
+			// unknown pods.
+			if _, ok := allPodsByUID[runningPod.ID]; !ok {
+				klog.V(3).InfoS("Clean up orphaned pod containers", "podUID", runningPod.ID)
+				one := int64(1)
+				kl.podWorkers.UpdatePod(UpdatePodOptions{
+					UpdateType: kubetypes.SyncPodKill,
+					RunningPod: runningPod,
+					KillPodOptions: &KillPodOptions{
+						PodTerminationGracePeriodSecondsOverride: &one,
+					},
+				})
+			}
 		}
 	}

+	// Remove orphaned pod statuses not in the total list of known config pods
+	klog.V(3).InfoS("Clean up orphaned pod statuses")
 	kl.removeOrphanedPodStatuses(allPods, mirrorPods)
 	// Note that we just killed the unwanted pods. This may not have reflected
 	// in the cache. We need to bypass the cache to get the latest set of
 	// running pods to clean up the volumes.
 	// TODO: Evaluate the performance impact of bypassing the runtime cache.
-	runningPods, err = kl.containerRuntime.GetPods(false)
+	runningRuntimePods, err = kl.containerRuntime.GetPods(false)
 	if err != nil {
 		klog.ErrorS(err, "Error listing containers")
 		return err
 	}

-	// Remove any orphaned volumes.
-	// Note that we pass all pods (including terminated pods) to the function,
-	// so that we don't remove volumes associated with terminated but not yet
-	// deleted pods.
-	err = kl.cleanupOrphanedPodDirs(allPods, runningPods)
+	// Remove orphaned volumes from pods that are known not to have any
+	// containers. Note that we pass all pods (including terminated pods) to
+	// the function, so that we don't remove volumes associated with terminated
+	// but not yet deleted pods.
+	// TODO: this method could more aggressively cleanup terminated pods
+	// in the future (volumes, mount dirs, logs, and containers could all be
+	// better separated)
+	klog.V(3).InfoS("Clean up orphaned pod directories")
+	err = kl.cleanupOrphanedPodDirs(allPods, runningRuntimePods)
 	if err != nil {
 		// We want all cleanup tasks to be run even if one of them failed. So
 		// we just log an error here and continue other cleanup tasks.
@ -1146,162 +1134,23 @@ func (kl *Kubelet) HandlePodCleanups() error {
 		klog.ErrorS(err, "Failed cleaning up orphaned pod directories")
 	}

-	// Remove any orphaned mirror pods.
+	// Remove any orphaned mirror pods (mirror pods are tracked by name via the
+	// pod worker)
+	klog.V(3).InfoS("Clean up orphaned mirror pods")
 	kl.deleteOrphanedMirrorPods()

-	// Remove any cgroups in the hierarchy for pods that are no longer running.
+	// Remove any cgroups in the hierarchy for pods that are definitely no longer
+	// running (not in the container runtime).
 	if kl.cgroupsPerQOS {
 		pcm := kl.containerManager.NewPodContainerManager()
-		kl.cleanupOrphanedPodCgroups(pcm, cgroupPods, activePods)
+		klog.V(3).InfoS("Clean up orphaned pod cgroups")
+		kl.cleanupOrphanedPodCgroups(pcm, cgroupPods, possiblyRunningPods)
 	}

 	kl.backOff.GC()
 	return nil
 }

-// PodKiller handles requests for killing pods
-type PodKiller interface {
-	// KillPod receives pod speficier representing the pod to kill
-	KillPod(podPair *kubecontainer.PodPair)
-	// PerformPodKillingWork performs the actual pod killing work via calling CRI
-	// It returns after its Close() func is called and all outstanding pod killing requests are served
-	PerformPodKillingWork()
-	// Close ensures that after it's called, then this pod killer wouldn't accept any more pod killing requests
-	Close()
-	// IsPodPendingTerminationByPodName checks whether any pod for the given full pod name is pending termination (thread safe)
-	IsPodPendingTerminationByPodName(podFullname string) bool
-	// IsPodPendingTerminationByUID checks whether the mirror pod for the given uid is pending termination (thread safe)
-	IsPodPendingTerminationByUID(uid types.UID) bool
-}
-
-// podKillerWithChannel is an implementation of PodKiller which receives pod killing requests via channel
-type podKillerWithChannel struct {
-	// Channel for getting pods to kill.
-	podKillingCh chan *kubecontainer.PodPair
-	// lock for synchronization between HandlePodCleanups and pod killer
-	podKillingLock *sync.RWMutex
-	// mirrorPodTerminationMap keeps track of the progress of mirror pod termination
-	// The key is the UID of the pod and the value is the full name of the pod
-	mirrorPodTerminationMap map[string]string
-	// podTerminationMap keeps track of the progress of pod termination.
-	// The key is the UID of the pod and the value is the full name of the pod
-	podTerminationMap map[string]string
-	// killPod is the func which invokes runtime to kill the pod
-	killPod func(pod *v1.Pod, runningPod *kubecontainer.Pod, status *kubecontainer.PodStatus, gracePeriodOverride *int64) error
-}
-
-// NewPodKiller returns a functional PodKiller
-func NewPodKiller(kl *Kubelet) PodKiller {
-	podKiller := &podKillerWithChannel{
-		podKillingCh:            make(chan *kubecontainer.PodPair, podKillingChannelCapacity),
-		podKillingLock:          &sync.RWMutex{},
-		mirrorPodTerminationMap: make(map[string]string),
-		podTerminationMap:       make(map[string]string),
-		killPod:                 kl.killPod,
-	}
-	return podKiller
-}
-
-// IsPodPendingTerminationByUID checks whether the pod for the given uid is pending termination
-func (pk *podKillerWithChannel) IsPodPendingTerminationByUID(uid types.UID) bool {
-	pk.podKillingLock.RLock()
-	defer pk.podKillingLock.RUnlock()
-	if _, ok := pk.podTerminationMap[string(uid)]; ok {
-		return ok
-	}
-	if _, ok := pk.mirrorPodTerminationMap[string(uid)]; ok {
-		return ok
-	}
-	return false
-}
-
-// IsPodPendingTerminationByPodName checks whether the given pod is in grace period of termination
-func (pk *podKillerWithChannel) IsPodPendingTerminationByPodName(podFullname string) bool {
-	pk.podKillingLock.RLock()
-	defer pk.podKillingLock.RUnlock()
-	for _, name := range pk.mirrorPodTerminationMap {
-		if name == podFullname {
-			return true
-		}
-	}
-	for _, name := range pk.podTerminationMap {
-		if name == podFullname {
-			return true
-		}
-	}
-	return false
-}
-
-func (pk *podKillerWithChannel) markPodTerminated(uid string) {
-	klog.V(4).InfoS("Marking pod termination", "podUID", uid)
-	pk.podKillingLock.Lock()
-	defer pk.podKillingLock.Unlock()
-	delete(pk.mirrorPodTerminationMap, uid)
-	delete(pk.podTerminationMap, uid)
-}
-
-// KillPod sends pod killing request to the killer after marks the pod
-// unless the given pod has been marked to be killed
-func (pk *podKillerWithChannel) KillPod(podPair *kubecontainer.PodPair) {
-	pk.podKillingLock.Lock()
-	defer pk.podKillingLock.Unlock()
-	var apiPodExists bool
-	var runningPodExists bool
-	if podPair.APIPod != nil {
-		uid := string(podPair.APIPod.UID)
-		_, apiPodExists = pk.mirrorPodTerminationMap[uid]
-		if !apiPodExists {
-			fullname := kubecontainer.GetPodFullName(podPair.APIPod)
-			klog.V(4).InfoS("Marking api pod pending termination", "podName", fullname, "podUID", uid)
-			pk.mirrorPodTerminationMap[uid] = fullname
-		}
-	}
-	if podPair.RunningPod != nil {
-		uid := string(podPair.RunningPod.ID)
-		_, runningPodExists = pk.podTerminationMap[uid]
-		if !runningPodExists {
-			fullname := podPair.RunningPod.Name
-			klog.V(4).InfoS("Marking running pod pending termination", "podName", fullname, "podUID", uid)
-			pk.podTerminationMap[uid] = fullname
-		}
-	}
-	if apiPodExists || runningPodExists {
-		if apiPodExists && runningPodExists {
-			klog.V(4).InfoS("Api pod and running pod are pending termination", "apiPodUID", podPair.APIPod.UID, "runningPodUID", podPair.RunningPod.ID)
-		} else if apiPodExists {
-			klog.V(4).InfoS("Api pod is pending termination", "podUID", podPair.APIPod.UID)
-		} else {
-			klog.V(4).InfoS("Running pod is pending termination", "podUID", podPair.RunningPod.ID)
-		}
-		return
-	}
-	// Limit to one request per pod
-	pk.podKillingCh <- podPair
-}
-
-// Close closes the channel through which requests are delivered
-func (pk *podKillerWithChannel) Close() {
-	close(pk.podKillingCh)
-}
-
-// PerformPodKillingWork launches a goroutine to kill a pod received from the channel if
-// another goroutine isn't already in action.
-func (pk *podKillerWithChannel) PerformPodKillingWork() {
-	for podPair := range pk.podKillingCh {
-		runningPod := podPair.RunningPod
-		apiPod := podPair.APIPod
-
-		go func(apiPod *v1.Pod, runningPod *kubecontainer.Pod) {
-			klog.V(2).InfoS("Killing unwanted pod", "podName", runningPod.Name)
-			err := pk.killPod(apiPod, runningPod, nil, nil)
-			if err != nil {
-				klog.ErrorS(err, "Failed killing the pod", "podName", runningPod.Name)
-			}
-			pk.markPodTerminated(string(runningPod.ID))
-		}(apiPod, runningPod)
-	}
-}
-
 // validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state
 // of the container. The previous flag will only return the logs for the last terminated container, otherwise, the current
 // running container is preferred over a previous termination. If info about the container is not available then a specific
@ -2014,25 +1863,14 @@ func (kl *Kubelet) GetPortForward(podName, podNamespace string, podUID types.UID

 // cleanupOrphanedPodCgroups removes cgroups that should no longer exist.
 // it reconciles the cached state of cgroupPods with the specified list of runningPods
-func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupPods map[types.UID]cm.CgroupName, activePods []*v1.Pod) {
-	// Add all running pods to the set that we want to preserve
-	podSet := sets.NewString()
-	for _, pod := range activePods {
-		podSet.Insert(string(pod.UID))
-	}
-
+func (kl *Kubelet) cleanupOrphanedPodCgroups(pcm cm.PodContainerManager, cgroupPods map[types.UID]cm.CgroupName, possiblyRunningPods map[types.UID]sets.Empty) {
 	// Iterate over all the found pods to verify if they should be running
 	for uid, val := range cgroupPods {
 		// if the pod is in the running set, its not a candidate for cleanup
-		if podSet.Has(string(uid)) {
+		if _, ok := possiblyRunningPods[uid]; ok {
 			continue
 		}

-		// if the pod is within termination grace period, we shouldn't cleanup the underlying cgroup
-		if kl.podKiller.IsPodPendingTerminationByUID(uid) {
-			klog.V(3).InfoS("Pod is pending termination", "podUID", uid)
-			continue
-		}
 		// If volumes have not been unmounted/detached, do not delete the cgroup
 		// so any memory backed volumes don't have their charges propagated to the
 		// parent croup.  If the volumes still exist, reduce the cpu shares for any
--- a/pkg/kubelet/kubelet_test.go
+++ b/pkg/kubelet/kubelet_test.go
@ -17,9 +17,11 @@ limitations under the License.
 package kubelet

 import (
+	"context"
 	"fmt"
 	"io/ioutil"
 	"os"
+	"reflect"
 	"sort"
 	"strconv"
 	"testing"
@ -119,7 +121,6 @@ type TestKubelet struct {

 func (tk *TestKubelet) Cleanup() {
 	if tk.kubelet != nil {
-		tk.kubelet.podKiller.Close()
 		os.RemoveAll(tk.kubelet.rootDirectory)
 		tk.kubelet = nil
 	}
@ -293,12 +294,10 @@ func newTestKubeletWithImageList(
 	fakeClock := clock.NewFakeClock(time.Now())
 	kubelet.backOff = flowcontrol.NewBackOff(time.Second, time.Minute)
 	kubelet.backOff.Clock = fakeClock
-	kubelet.podKiller = NewPodKiller(kubelet)
-	go kubelet.podKiller.PerformPodKillingWork()
 	kubelet.resyncInterval = 10 * time.Second
 	kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock)
 	// Relist period does not affect the tests.
-	kubelet.pleg = pleg.NewGenericPLEG(fakeRuntime, 100, time.Hour, nil, clock.RealClock{})
+	kubelet.pleg = pleg.NewGenericPLEG(fakeRuntime, 100, time.Hour, kubelet.podCache, clock.RealClock{})
 	kubelet.clock = fakeClock

 	nodeRef := &v1.ObjectReference{
@ -340,7 +339,7 @@ func newTestKubeletWithImageList(
 		controllerAttachDetachEnabled,
 		kubelet.nodeName,
 		kubelet.podManager,
-		kubelet.statusManager,
+		kubelet.podWorkers,
 		fakeKubeClient,
 		kubelet.volumePluginMgr,
 		fakeRuntime,
@ -450,10 +449,12 @@ func TestHandlePodCleanupsPerQOS(t *testing.T) {
 	// mark the pod as killed (within this test case).

 	kubelet.HandlePodCleanups()
-	time.Sleep(2 * time.Second)

 	// assert that unwanted pods were killed
-	fakeRuntime.AssertKilledPods([]string{"12345678"})
+	if actual, expected := kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion, []types.UID{"12345678"}; !reflect.DeepEqual(actual, expected) {
+		t.Fatalf("expected %v to be deleted, got %v", expected, actual)
+	}
+	fakeRuntime.AssertKilledPods([]string(nil))

 	// simulate Runtime.KillPod
 	fakeRuntime.PodList = nil
@ -461,7 +462,6 @@ func TestHandlePodCleanupsPerQOS(t *testing.T) {
 	kubelet.HandlePodCleanups()
 	kubelet.HandlePodCleanups()
 	kubelet.HandlePodCleanups()
-	time.Sleep(2 * time.Second)

 	destroyCount := 0
 	err := wait.Poll(100*time.Millisecond, 10*time.Second, func() (bool, error) {
@ -488,9 +488,11 @@ func TestDispatchWorkOfCompletedPod(t *testing.T) {
 	testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
 	defer testKubelet.Cleanup()
 	kubelet := testKubelet.kubelet
+	var got bool
 	kubelet.podWorkers = &fakePodWorkers{
-		syncPodFn: func(options syncPodOptions) error {
-			return fmt.Errorf("should ignore completed pod %q", options.pod.Name)
+		syncPodFn: func(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
+			got = true
+			return nil
 		},
 		cache: kubelet.podCache,
 		t:     t,
@ -554,6 +556,10 @@ func TestDispatchWorkOfCompletedPod(t *testing.T) {
 	}
 	for _, pod := range pods {
 		kubelet.dispatchWork(pod, kubetypes.SyncPodSync, nil, time.Now())
+		if !got {
+			t.Errorf("Should not skip completed pod %q", pod.Name)
+		}
+		got = false
 	}
 }

@ -563,7 +569,7 @@ func TestDispatchWorkOfActivePod(t *testing.T) {
 	kubelet := testKubelet.kubelet
 	var got bool
 	kubelet.podWorkers = &fakePodWorkers{
-		syncPodFn: func(options syncPodOptions) error {
+		syncPodFn: func(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
 			got = true
 			return nil
 		},
@ -631,10 +637,12 @@ func TestHandlePodCleanups(t *testing.T) {
 	kubelet := testKubelet.kubelet

 	kubelet.HandlePodCleanups()
-	time.Sleep(2 * time.Second)

-	// assert that unwanted pods were killed
-	fakeRuntime.AssertKilledPods([]string{"12345678"})
+	// assert that unwanted pods were queued to kill
+	if actual, expected := kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion, []types.UID{"12345678"}; !reflect.DeepEqual(actual, expected) {
+		t.Fatalf("expected %v to be deleted, got %v", expected, actual)
+	}
+	fakeRuntime.AssertKilledPods([]string(nil))
 }

 func TestHandlePodRemovesWhenSourcesAreReady(t *testing.T) {
@ -667,44 +675,17 @@ func TestHandlePodRemovesWhenSourcesAreReady(t *testing.T) {
 	time.Sleep(2 * time.Second)

 	// Sources are not ready yet. Don't remove any pods.
-	fakeRuntime.AssertKilledPods(nil)
+	if expect, actual := []types.UID(nil), kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion; !reflect.DeepEqual(expect, actual) {
+		t.Fatalf("expected %v kills, got %v", expect, actual)
+	}

 	ready = true
 	kubelet.HandlePodRemoves(pods)
 	time.Sleep(2 * time.Second)

 	// Sources are ready. Remove unwanted pods.
-	fakeRuntime.AssertKilledPods([]string{"1"})
-}
-
-func TestKillPodFollwedByIsPodPendingTermination(t *testing.T) {
-	testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
-	defer testKubelet.Cleanup()
-
-	pod := &kubecontainer.Pod{
-		ID:        "12345678",
-		Name:      "foo",
-		Namespace: "new",
-		Containers: []*kubecontainer.Container{
-			{Name: "bar"},
-		},
-	}
-
-	fakeRuntime := testKubelet.fakeRuntime
-	fakeContainerManager := testKubelet.fakeContainerManager
-	fakeContainerManager.PodContainerManager.AddPodFromCgroups(pod) // add pod to mock cgroup
-	fakeRuntime.PodList = []*containertest.FakePod{
-		{Pod: pod},
-	}
-
-	kl := testKubelet.kubelet
-	kl.podKiller.KillPod(&kubecontainer.PodPair{
-		APIPod:     nil,
-		RunningPod: pod,
-	})
-
-	if !(kl.podKiller.IsPodPendingTerminationByUID(pod.ID) || fakeRuntime.AssertKilledPods([]string{"12345678"})) {
-		t.Fatal("Race condition: When KillPod is complete, the pod should be pending termination or be killed")
+	if expect, actual := []types.UID{"1"}, kubelet.podWorkers.(*fakePodWorkers).triggeredDeletion; !reflect.DeepEqual(expect, actual) {
+		t.Fatalf("expected %v kills, got %v", expect, actual)
 	}
 }

@ -726,6 +707,7 @@ func (nl testNodeLister) List(_ labels.Selector) (ret []*v1.Node, err error) {
 }

 func checkPodStatus(t *testing.T, kl *Kubelet, pod *v1.Pod, phase v1.PodPhase) {
+	t.Helper()
 	status, found := kl.statusManager.GetPodStatus(pod.UID)
 	require.True(t, found, "Status of pod %q is not found in the status map", pod.UID)
 	require.Equal(t, phase, status.Phase)
@ -769,6 +751,10 @@ func TestHandlePortConflicts(t *testing.T) {
 	// The newer pod should be rejected.
 	notfittingPod := pods[0]
 	fittingPod := pods[1]
+	kl.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
+		pods[0].UID: true,
+		pods[1].UID: true,
+	}

 	kl.HandlePodAdditions(pods)

@ -904,6 +890,10 @@ func TestHandleMemExceeded(t *testing.T) {
 	// The newer pod should be rejected.
 	notfittingPod := pods[0]
 	fittingPod := pods[1]
+	kl.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
+		pods[0].UID: true,
+		pods[1].UID: true,
+	}

 	kl.HandlePodAdditions(pods)

@ -1230,11 +1220,7 @@ func TestCreateMirrorPod(t *testing.T) {
 		pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = "file"
 		pods := []*v1.Pod{pod}
 		kl.podManager.SetPods(pods)
-		err := kl.syncPod(syncPodOptions{
-			pod:        pod,
-			podStatus:  &kubecontainer.PodStatus{},
-			updateType: updateType,
-		})
+		err := kl.syncPod(context.Background(), updateType, pod, nil, &kubecontainer.PodStatus{})
 		assert.NoError(t, err)
 		podFullName := kubecontainer.GetPodFullName(pod)
 		assert.True(t, manager.HasPod(podFullName), "Expected mirror pod %q to be created", podFullName)
@ -1266,12 +1252,7 @@ func TestDeleteOutdatedMirrorPod(t *testing.T) {

 	pods := []*v1.Pod{pod, mirrorPod}
 	kl.podManager.SetPods(pods)
-	err := kl.syncPod(syncPodOptions{
-		pod:        pod,
-		mirrorPod:  mirrorPod,
-		podStatus:  &kubecontainer.PodStatus{},
-		updateType: kubetypes.SyncPodUpdate,
-	})
+	err := kl.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, mirrorPod, &kubecontainer.PodStatus{})
 	assert.NoError(t, err)
 	name := kubecontainer.GetPodFullName(pod)
 	creates, deletes := manager.GetCounts(name)
@ -1309,17 +1290,41 @@ func TestDeleteOrphanedMirrorPods(t *testing.T) {
 				},
 			},
 		},
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				UID:       "12345670",
+				Name:      "pod3",
+				Namespace: "ns",
+				Annotations: map[string]string{
+					kubetypes.ConfigSourceAnnotationKey: "api",
+					kubetypes.ConfigMirrorAnnotationKey: "mirror",
+				},
+			},
+		},
 	}

 	kl.podManager.SetPods(orphanPods)
+
+	// a static pod that is terminating will not be deleted
+	kl.podWorkers.(*fakePodWorkers).terminatingStaticPods = map[string]bool{
+		kubecontainer.GetPodFullName(orphanPods[2]): true,
+	}
+
 	// Sync with an empty pod list to delete all mirror pods.
 	kl.HandlePodCleanups()
 	assert.Len(t, manager.GetPods(), 0, "Expected 0 mirror pods")
-	for _, pod := range orphanPods {
+	for i, pod := range orphanPods {
 		name := kubecontainer.GetPodFullName(pod)
 		creates, deletes := manager.GetCounts(name)
-		if creates != 0 || deletes != 1 {
-			t.Errorf("expected 0 creation and one deletion of %q, got %d, %d", name, creates, deletes)
+		switch i {
+		case 2:
+			if creates != 0 || deletes != 0 {
+				t.Errorf("expected 0 creation and 0 deletion of %q, got %d, %d", name, creates, deletes)
+			}
+		default:
+			if creates != 0 || deletes != 1 {
+				t.Errorf("expected 0 creation and one deletion of %q, got %d, %d", name, creates, deletes)
+			}
 		}
 	}
 }
@ -1404,20 +1409,12 @@ func TestNetworkErrorsWithoutHostNetwork(t *testing.T) {
 	})

 	kubelet.podManager.SetPods([]*v1.Pod{pod})
-	err := kubelet.syncPod(syncPodOptions{
-		pod:        pod,
-		podStatus:  &kubecontainer.PodStatus{},
-		updateType: kubetypes.SyncPodUpdate,
-	})
+	err := kubelet.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, nil, &kubecontainer.PodStatus{})
 	assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error")

 	pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource
 	pod.Spec.HostNetwork = true
-	err = kubelet.syncPod(syncPodOptions{
-		pod:        pod,
-		podStatus:  &kubecontainer.PodStatus{},
-		updateType: kubetypes.SyncPodUpdate,
-	})
+	err = kubelet.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, nil, &kubecontainer.PodStatus{})
 	assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error")
 }

@ -1442,6 +1439,12 @@ func TestFilterOutTerminatedPods(t *testing.T) {
 	pods[3].Status.Phase = v1.PodPending
 	pods[4].Status.Phase = v1.PodRunning

+	kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
+		pods[2].UID: true,
+		pods[3].UID: true,
+		pods[4].UID: true,
+	}
+
 	expected := []*v1.Pod{pods[2], pods[3], pods[4]}
 	kubelet.podManager.SetPods(pods)
 	actual := kubelet.filterOutTerminatedPods(pods)
@ -1586,17 +1589,12 @@ func TestDeletePodDirsForDeletedPods(t *testing.T) {
 }

 func syncAndVerifyPodDir(t *testing.T, testKubelet *TestKubelet, pods []*v1.Pod, podsToCheck []*v1.Pod, shouldExist bool) {
+	t.Helper()
 	kl := testKubelet.kubelet

 	kl.podManager.SetPods(pods)
 	kl.HandlePodSyncs(pods)
 	kl.HandlePodCleanups()
-	// The first time HandlePodCleanups() is run the pod is placed into the
-	// podKiller, and bypasses the pod directory cleanup. The pod is
-	// already killed in the second run to HandlePodCleanups() and will
-	// cleanup the directories.
-	time.Sleep(2 * time.Second)
-	kl.HandlePodCleanups()
 	for i, pod := range podsToCheck {
 		exist := dirExists(kl.getPodDir(pod.UID))
 		assert.Equal(t, shouldExist, exist, "directory of pod %d", i)
@ -1612,7 +1610,6 @@ func TestDoesNotDeletePodDirsForTerminatedPods(t *testing.T) {
 		podWithUIDNameNs("12345679", "pod2", "ns"),
 		podWithUIDNameNs("12345680", "pod3", "ns"),
 	}
-
 	syncAndVerifyPodDir(t, testKubelet, pods, pods, true)
 	// Pod 1 failed, and pod 2 succeeded. None of the pod directories should be
 	// deleted.
@ -1634,6 +1631,7 @@ func TestDoesNotDeletePodDirsIfContainerIsRunning(t *testing.T) {
 	// Sync once to create pod directory; confirm that the pod directory has
 	// already been created.
 	pods := []*v1.Pod{apiPod}
+	testKubelet.kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{apiPod.UID: true}
 	syncAndVerifyPodDir(t, testKubelet, pods, []*v1.Pod{apiPod}, true)

 	// Pretend the pod is deleted from apiserver, but is still active on the node.
@ -1646,6 +1644,7 @@ func TestDoesNotDeletePodDirsIfContainerIsRunning(t *testing.T) {
 	// should be removed.
 	pods = []*v1.Pod{}
 	testKubelet.fakeRuntime.PodList = []*containertest.FakePod{}
+	testKubelet.kubelet.podWorkers.(*fakePodWorkers).running = nil
 	syncAndVerifyPodDir(t, testKubelet, pods, []*v1.Pod{apiPod}, false)
 }

@ -2233,7 +2232,7 @@ func TestGenerateAPIPodStatusInvokesPodSyncHandlers(t *testing.T) {
 	require.Equal(t, "because", apiStatus.Message)
 }

-func TestSyncPodKillPod(t *testing.T) {
+func TestSyncTerminatingPodKillPod(t *testing.T) {
 	testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
 	defer testKubelet.Cleanup()
 	kl := testKubelet.kubelet
@ -2246,21 +2245,12 @@ func TestSyncPodKillPod(t *testing.T) {
 	}
 	pods := []*v1.Pod{pod}
 	kl.podManager.SetPods(pods)
+	podStatus := &kubecontainer.PodStatus{ID: pod.UID}
 	gracePeriodOverride := int64(0)
-	err := kl.syncPod(syncPodOptions{
-		pod:        pod,
-		podStatus:  &kubecontainer.PodStatus{},
-		updateType: kubetypes.SyncPodKill,
-		killPodOptions: &KillPodOptions{
-			PodStatusFunc: func(p *v1.Pod, podStatus *kubecontainer.PodStatus) v1.PodStatus {
-				return v1.PodStatus{
-					Phase:   v1.PodFailed,
-					Reason:  "reason",
-					Message: "message",
-				}
-			},
-			PodTerminationGracePeriodSecondsOverride: &gracePeriodOverride,
-		},
+	err := kl.syncTerminatingPod(context.Background(), pod, podStatus, nil, &gracePeriodOverride, func(podStatus *v1.PodStatus) {
+		podStatus.Phase = v1.PodFailed
+		podStatus.Reason = "reason"
+		podStatus.Message = "message"
 	})
 	require.NoError(t, err)

--- a/pkg/kubelet/kubelet_volumes.go
+++ b/pkg/kubelet/kubelet_volumes.go
@ -186,11 +186,6 @@ func (kl *Kubelet) cleanupOrphanedPodDirs(pods []*v1.Pod, runningPods []*kubecon
 		if allPods.Has(string(uid)) {
 			continue
 		}
-		// if the pod is within termination grace period, we shouldn't cleanup the underlying volumes
-		if kl.podKiller.IsPodPendingTerminationByUID(uid) {
-			klog.V(3).InfoS("Pod is pending termination", "podUID", uid)
-			continue
-		}
 		// If volumes have not been unmounted/detached, do not delete directory.
 		// Doing so may result in corruption of data.
 		// TODO: getMountedVolumePathListFromDisk() call may be redundant with
--- a/pkg/kubelet/kubelet_volumes_test.go
+++ b/pkg/kubelet/kubelet_volumes_test.go
@ -315,9 +315,14 @@ func TestVolumeUnmountAndDetachControllerDisabled(t *testing.T) {
 		1 /* expectedSetUpCallCount */, testKubelet.volumePlugin))

 	// Remove pod
+	// TODO: this may not be threadsafe (technically waitForVolumeUnmount)
+	kubelet.podWorkers.(*fakePodWorkers).removeRuntime = map[types.UID]bool{pod.UID: true}
 	kubelet.podManager.SetPods([]*v1.Pod{})

-	assert.NoError(t, waitForVolumeUnmount(kubelet.volumeManager, pod))
+	assert.NoError(t, kubelet.volumeManager.WaitForUnmount(pod))
+	if actual := kubelet.volumeManager.GetMountedVolumesForPod(util.GetUniquePodName(pod)); len(actual) > 0 {
+		t.Fatalf("expected volume unmount to wait for no volumes: %v", actual)
+	}

 	// Verify volumes unmounted
 	podVolumes = kubelet.volumeManager.GetMountedVolumesForPod(
@ -499,6 +504,7 @@ func TestVolumeUnmountAndDetachControllerEnabled(t *testing.T) {
 		1 /* expectedSetUpCallCount */, testKubelet.volumePlugin))

 	// Remove pod
+	kubelet.podWorkers.(*fakePodWorkers).removeRuntime = map[types.UID]bool{pod.UID: true}
 	kubelet.podManager.SetPods([]*v1.Pod{})

 	assert.NoError(t, waitForVolumeUnmount(kubelet.volumeManager, pod))
--- a/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go
+++ b/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go
@ -56,25 +56,25 @@ func (f *fakeHTTP) Get(url string) (*http.Response, error) {
 }

 type fakePodStateProvider struct {
-	existingPods map[types.UID]struct{}
-	runningPods  map[types.UID]struct{}
+	terminated map[types.UID]struct{}
+	removed    map[types.UID]struct{}
 }

 func newFakePodStateProvider() *fakePodStateProvider {
 	return &fakePodStateProvider{
-		existingPods: make(map[types.UID]struct{}),
-		runningPods:  make(map[types.UID]struct{}),
+		terminated: make(map[types.UID]struct{}),
+		removed:    make(map[types.UID]struct{}),
 	}
 }

-func (f *fakePodStateProvider) IsPodDeleted(uid types.UID) bool {
-	_, found := f.existingPods[uid]
-	return !found
+func (f *fakePodStateProvider) ShouldPodRuntimeBeRemoved(uid types.UID) bool {
+	_, found := f.terminated[uid]
+	return found
 }

-func (f *fakePodStateProvider) IsPodTerminated(uid types.UID) bool {
-	_, found := f.runningPods[uid]
-	return !found
+func (f *fakePodStateProvider) ShouldPodContentBeRemoved(uid types.UID) bool {
+	_, found := f.removed[uid]
+	return found
 }

 func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) {
--- a/pkg/kubelet/kuberuntime/kuberuntime_gc.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_gc.go
@ -220,7 +220,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE
 }

 // evict all containers that are evictable
-func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
+func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
 	// Separate containers by evict units.
 	evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
 	if err != nil {
@ -230,7 +230,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSour
 	// Remove deleted pod containers if all sources are ready.
 	if allSourcesReady {
 		for key, unit := range evictUnits {
-			if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
+			if cgc.podStateProvider.ShouldPodContentBeRemoved(key.uid) || (evictNonDeletedPods && cgc.podStateProvider.ShouldPodRuntimeBeRemoved(key.uid)) {
 				cgc.removeOldestN(unit, len(unit)) // Remove all.
 				delete(evictUnits, key)
 			}
@ -272,7 +272,7 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.GCPolicy, allSour
 //   2. contains no containers.
 //   3. belong to a non-existent (i.e., already removed) pod, or is not the
 //      most recently created sandbox for the pod.
-func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
+func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
 	containers, err := cgc.manager.getKubeletContainers(true)
 	if err != nil {
 		return err
@ -311,7 +311,7 @@ func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {
 	}

 	for podUID, sandboxes := range sandboxesByPod {
-		if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {
+		if cgc.podStateProvider.ShouldPodContentBeRemoved(podUID) || (evictNonDeletedPods && cgc.podStateProvider.ShouldPodRuntimeBeRemoved(podUID)) {
 			// Remove all evictable sandboxes if the pod has been removed.
 			// Note that the latest dead sandbox is also removed if there is
 			// already an active one.
@ -337,9 +337,10 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
 		for _, dir := range dirs {
 			name := dir.Name()
 			podUID := parsePodUIDFromLogsDirectory(name)
-			if !cgc.podStateProvider.IsPodDeleted(podUID) {
+			if !cgc.podStateProvider.ShouldPodContentBeRemoved(podUID) {
 				continue
 			}
+			klog.V(4).InfoS("Removing pod logs", "podUID", podUID)
 			err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
 			if err != nil {
 				klog.ErrorS(err, "Failed to remove pod logs directory", "path", name)
@ -397,15 +398,15 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
 // * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
 // * gets evictable sandboxes which are not ready and contains no containers.
 // * removes evictable sandboxes.
-func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
+func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
 	errors := []error{}
 	// Remove evictable containers
-	if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil {
+	if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
 		errors = append(errors, err)
 	}

 	// Remove sandboxes with zero containers
-	if err := cgc.evictSandboxes(evictTerminatedPods); err != nil {
+	if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
 		errors = append(errors, err)
 	}

--- a/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go
@ -24,7 +24,8 @@ import (

 	"github.com/golang/mock/gomock"
 	"github.com/stretchr/testify/assert"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/types"
 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 	containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
@ -35,16 +36,14 @@ func TestSandboxGC(t *testing.T) {
 	assert.NoError(t, err)

 	podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
-	makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodStateProvider bool, createdAt int64) sandboxTemplate {
-		if withPodStateProvider {
-			// initialize the pod getter
-			podStateProvider.existingPods[pod.UID] = struct{}{}
-		}
+	makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, hasRunningContainers, isTerminating bool, createdAt int64) sandboxTemplate {
 		return sandboxTemplate{
-			pod:       pod,
-			state:     state,
-			attempt:   attempt,
-			createdAt: createdAt,
+			pod:         pod,
+			state:       state,
+			attempt:     attempt,
+			createdAt:   createdAt,
+			running:     hasRunningContainers,
+			terminating: isTerminating,
 		}
 	}

@ -61,133 +60,138 @@ func TestSandboxGC(t *testing.T) {
 		}),
 	}

-	for c, test := range []struct {
-		description         string              // description of the test case
-		sandboxes           []sandboxTemplate   // templates of sandboxes
-		containers          []containerTemplate // templates of containers
-		remain              []int               // template indexes of remaining sandboxes
-		evictTerminatedPods bool
+	for _, test := range []struct {
+		description          string              // description of the test case
+		sandboxes            []sandboxTemplate   // templates of sandboxes
+		containers           []containerTemplate // templates of containers
+		remain               []int               // template indexes of remaining sandboxes
+		evictTerminatingPods bool
 	}{
 		{
 			description: "notready sandboxes without containers for deleted pods should be garbage collected.",
 			sandboxes: []sandboxTemplate{
-				makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0),
+				makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, false, 0),
 			},
-			containers:          []containerTemplate{},
-			remain:              []int{},
-			evictTerminatedPods: false,
+			containers:           []containerTemplate{},
+			remain:               []int{},
+			evictTerminatingPods: false,
 		},
 		{
 			description: "ready sandboxes without containers for deleted pods should not be garbage collected.",
 			sandboxes: []sandboxTemplate{
-				makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false, 0),
+				makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false, false, 0),
 			},
-			containers:          []containerTemplate{},
-			remain:              []int{0},
-			evictTerminatedPods: false,
+			containers:           []containerTemplate{},
+			remain:               []int{0},
+			evictTerminatingPods: false,
 		},
 		{
 			description: "sandboxes for existing pods should not be garbage collected.",
 			sandboxes: []sandboxTemplate{
-				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0),
-				makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
+				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, false, 0),
+				makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
 			},
-			containers:          []containerTemplate{},
-			remain:              []int{0, 1},
-			evictTerminatedPods: false,
+			containers:           []containerTemplate{},
+			remain:               []int{0, 1},
+			evictTerminatingPods: false,
 		},
 		{
 			description: "older exited sandboxes without containers for existing pods should be garbage collected if there are more than one exited sandboxes.",
 			sandboxes: []sandboxTemplate{
-				makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 1),
-				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
+				makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 1),
+				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
 			},
-			containers:          []containerTemplate{},
-			remain:              []int{0},
-			evictTerminatedPods: false,
+			containers:           []containerTemplate{},
+			remain:               []int{0},
+			evictTerminatingPods: false,
 		},
 		{
 			description: "older exited sandboxes with containers for existing pods should not be garbage collected even if there are more than one exited sandboxes.",
 			sandboxes: []sandboxTemplate{
-				makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 1),
-				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
+				makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 1),
+				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
 			},
 			containers: []containerTemplate{
 				{pod: pods[0], container: &pods[0].Spec.Containers[0], sandboxAttempt: 0, state: runtimeapi.ContainerState_CONTAINER_EXITED},
 			},
-			remain:              []int{0, 1},
-			evictTerminatedPods: false,
+			remain:               []int{0, 1},
+			evictTerminatingPods: false,
 		},
 		{
-			description: "non-running sandboxes for existing pods should be garbage collected if evictTerminatedPods is set.",
+			description: "non-running sandboxes for existing pods should be garbage collected if evictTerminatingPods is set.",
 			sandboxes: []sandboxTemplate{
-				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, 0),
-				makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
+				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true, true, 0),
+				makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, true, 0),
 			},
-			containers:          []containerTemplate{},
-			remain:              []int{0},
-			evictTerminatedPods: true,
+			containers:           []containerTemplate{},
+			remain:               []int{0},
+			evictTerminatingPods: true,
 		},
 		{
 			description: "sandbox with containers should not be garbage collected.",
 			sandboxes: []sandboxTemplate{
-				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0),
+				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, false, 0),
 			},
 			containers: []containerTemplate{
 				{pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED},
 			},
-			remain:              []int{0},
-			evictTerminatedPods: false,
+			remain:               []int{0},
+			evictTerminatingPods: false,
 		},
 		{
 			description: "multiple sandboxes should be handled properly.",
 			sandboxes: []sandboxTemplate{
 				// running sandbox.
-				makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true, 1),
+				makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true, false, 1),
 				// exited sandbox without containers.
-				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
+				makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
 				// exited sandbox with containers.
-				makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 1),
+				makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 1),
 				// exited sandbox without containers.
-				makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, 0),
+				makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true, false, 0),
 				// exited sandbox without containers for deleted pods.
-				makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, 0),
+				makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false, true, 0),
 			},
 			containers: []containerTemplate{
 				{pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
 			},
-			remain:              []int{0, 2},
-			evictTerminatedPods: false,
+			remain:               []int{0, 2},
+			evictTerminatingPods: false,
 		},
 	} {
-		t.Logf("TestCase #%d: %+v", c, test)
-		fakeSandboxes := makeFakePodSandboxes(t, m, test.sandboxes)
-		fakeContainers := makeFakeContainers(t, m, test.containers)
-		fakeRuntime.SetFakeSandboxes(fakeSandboxes)
-		fakeRuntime.SetFakeContainers(fakeContainers)
+		t.Run(test.description, func(t *testing.T) {
+			podStateProvider.removed = make(map[types.UID]struct{})
+			podStateProvider.terminated = make(map[types.UID]struct{})
+			fakeSandboxes := makeFakePodSandboxes(t, m, test.sandboxes)
+			fakeContainers := makeFakeContainers(t, m, test.containers)
+			for _, s := range test.sandboxes {
+				if !s.running && s.pod.Name == "deleted" {
+					podStateProvider.removed[s.pod.UID] = struct{}{}
+				}
+				if s.terminating {
+					podStateProvider.terminated[s.pod.UID] = struct{}{}
+				}
+			}
+			fakeRuntime.SetFakeSandboxes(fakeSandboxes)
+			fakeRuntime.SetFakeContainers(fakeContainers)

-		err := m.containerGC.evictSandboxes(test.evictTerminatedPods)
-		assert.NoError(t, err)
-		realRemain, err := fakeRuntime.ListPodSandbox(nil)
-		assert.NoError(t, err)
-		assert.Len(t, realRemain, len(test.remain))
-		for _, remain := range test.remain {
-			status, err := fakeRuntime.PodSandboxStatus(fakeSandboxes[remain].Id)
+			err := m.containerGC.evictSandboxes(test.evictTerminatingPods)
 			assert.NoError(t, err)
-			assert.Equal(t, &fakeSandboxes[remain].PodSandboxStatus, status)
-		}
+			realRemain, err := fakeRuntime.ListPodSandbox(nil)
+			assert.NoError(t, err)
+			assert.Len(t, realRemain, len(test.remain))
+			for _, remain := range test.remain {
+				status, err := fakeRuntime.PodSandboxStatus(fakeSandboxes[remain].Id)
+				assert.NoError(t, err)
+				assert.Equal(t, &fakeSandboxes[remain].PodSandboxStatus, status)
+			}
+		})
 	}
 }

-func makeGCContainer(podStateProvider *fakePodStateProvider, podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
+func makeGCContainer(podName, containerName string, attempt int, createdAt int64, state runtimeapi.ContainerState) containerTemplate {
 	container := makeTestContainer(containerName, "test-image")
 	pod := makeTestPod(podName, "test-ns", podName, []v1.Container{container})
-	if podName == "running" {
-		podStateProvider.runningPods[pod.UID] = struct{}{}
-	}
-	if podName != "deleted" {
-		podStateProvider.existingPods[pod.UID] = struct{}{}
-	}
 	return containerTemplate{
 		pod:       pod,
 		container: &container,
@ -204,201 +208,212 @@ func TestContainerGC(t *testing.T) {
 	podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
 	defaultGCPolicy := kubecontainer.GCPolicy{MinAge: time.Hour, MaxPerPodContainer: 2, MaxContainers: 6}

-	for c, test := range []struct {
-		description         string                  // description of the test case
-		containers          []containerTemplate     // templates of containers
-		policy              *kubecontainer.GCPolicy // container gc policy
-		remain              []int                   // template indexes of remaining containers
-		evictTerminatedPods bool
-		allSourcesReady     bool
+	for _, test := range []struct {
+		description          string                  // description of the test case
+		containers           []containerTemplate     // templates of containers
+		policy               *kubecontainer.GCPolicy // container gc policy
+		remain               []int                   // template indexes of remaining containers
+		evictTerminatingPods bool
+		allSourcesReady      bool
 	}{
 		{
 			description: "all containers should be removed when max container limit is 0",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			policy:              &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
-			remain:              []int{},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			policy:               &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: 1, MaxContainers: 0},
+			remain:               []int{},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "max containers should be complied when no max per pod container limit is set",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 4, 4, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 3, 3, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			policy:              &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
-			remain:              []int{0, 1, 2, 3},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			policy:               &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: 4},
+			remain:               []int{0, 1, 2, 3},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "no containers should be removed if both max container and per pod container limits are not set",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			policy:              &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
-			remain:              []int{0, 1, 2},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			policy:               &kubecontainer.GCPolicy{MinAge: time.Minute, MaxPerPodContainer: -1, MaxContainers: -1},
+			remain:               []int{0, 1, 2},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "recently started containers should not be removed",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0, 1, 2},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 1, 2},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "oldest containers should be removed when per pod container limit exceeded",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0, 1},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 1},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "running containers should not be removed",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
+				makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
 			},
-			remain:              []int{0, 1, 2},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 1, 2},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "no containers should be removed when limits are not exceeded",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0, 1},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 1},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "max container count should apply per (UID, container) pair",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "baz", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "baz", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "baz", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo2", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo2", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo2", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0, 1, 3, 4, 6, 7},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 1, 3, 4, 6, 7},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "max limit should apply and try to keep from every pod",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo2", "bar2", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo3", "bar3", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo4", "bar4", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0, 2, 4, 6, 8},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 2, 4, 6, 8},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "oldest pods should be removed if limit exceeded",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo2", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo4", "bar4", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo5", "bar5", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo6", "bar6", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo7", "bar7", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0, 2, 4, 6, 8, 9},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 2, 4, 6, 8, 9},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
-			description: "all non-running containers should be removed when evictTerminatedPods is set",
+			description: "all non-running containers should be removed when evictTerminatingPods is set",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
+				makeGCContainer("foo", "bar", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "bar1", 2, 2, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo1", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("running", "bar2", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo3", "bar3", 0, 0, runtimeapi.ContainerState_CONTAINER_RUNNING),
 			},
-			remain:              []int{4, 5},
-			evictTerminatedPods: true,
-			allSourcesReady:     true,
+			remain:               []int{4, 5},
+			evictTerminatingPods: true,
+			allSourcesReady:      true,
 		},
 		{
 			description: "containers for deleted pods should be removed",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 				// deleted pods still respect MinAge.
-				makeGCContainer(podStateProvider, "deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
-				makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("deleted", "bar1", 2, time.Now().UnixNano(), runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("deleted", "bar1", 1, 1, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0, 1, 2},
-			evictTerminatedPods: false,
-			allSourcesReady:     true,
+			remain:               []int{0, 1, 2},
+			evictTerminatingPods: false,
+			allSourcesReady:      true,
 		},
 		{
 			description: "containers for deleted pods may not be removed if allSourcesReady is set false ",
 			containers: []containerTemplate{
-				makeGCContainer(podStateProvider, "deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
+				makeGCContainer("deleted", "bar1", 0, 0, runtimeapi.ContainerState_CONTAINER_EXITED),
 			},
-			remain:              []int{0},
-			evictTerminatedPods: true,
-			allSourcesReady:     false,
+			remain:               []int{0},
+			evictTerminatingPods: true,
+			allSourcesReady:      false,
 		},
 	} {
-		t.Logf("TestCase #%d: %+v", c, test)
-		fakeContainers := makeFakeContainers(t, m, test.containers)
-		fakeRuntime.SetFakeContainers(fakeContainers)
+		t.Run(test.description, func(t *testing.T) {
+			podStateProvider.removed = make(map[types.UID]struct{})
+			podStateProvider.terminated = make(map[types.UID]struct{})
+			fakeContainers := makeFakeContainers(t, m, test.containers)
+			for _, s := range test.containers {
+				if s.pod.Name == "deleted" {
+					podStateProvider.removed[s.pod.UID] = struct{}{}
+				}
+				if s.pod.Name != "running" {
+					podStateProvider.terminated[s.pod.UID] = struct{}{}
+				}
+			}
+			fakeRuntime.SetFakeContainers(fakeContainers)

-		if test.policy == nil {
-			test.policy = &defaultGCPolicy
-		}
-		err := m.containerGC.evictContainers(*test.policy, test.allSourcesReady, test.evictTerminatedPods)
-		assert.NoError(t, err)
-		realRemain, err := fakeRuntime.ListContainers(nil)
-		assert.NoError(t, err)
-		assert.Len(t, realRemain, len(test.remain))
-		for _, remain := range test.remain {
-			status, err := fakeRuntime.ContainerStatus(fakeContainers[remain].Id)
+			if test.policy == nil {
+				test.policy = &defaultGCPolicy
+			}
+			err := m.containerGC.evictContainers(*test.policy, test.allSourcesReady, test.evictTerminatingPods)
 			assert.NoError(t, err)
-			assert.Equal(t, &fakeContainers[remain].ContainerStatus, status)
-		}
+			realRemain, err := fakeRuntime.ListContainers(nil)
+			assert.NoError(t, err)
+			assert.Len(t, realRemain, len(test.remain))
+			for _, remain := range test.remain {
+				status, err := fakeRuntime.ContainerStatus(fakeContainers[remain].Id)
+				assert.NoError(t, err)
+				assert.Equal(t, &fakeContainers[remain].ContainerStatus, status)
+			}
+		})
 	}
 }

@ -410,18 +425,15 @@ func TestPodLogDirectoryGC(t *testing.T) {
 	podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)

 	// pod log directories without corresponding pods should be removed.
-	podStateProvider.existingPods["123"] = struct{}{}
-	podStateProvider.existingPods["456"] = struct{}{}
-	podStateProvider.existingPods["321"] = struct{}{}
-	podStateProvider.runningPods["123"] = struct{}{}
-	podStateProvider.runningPods["456"] = struct{}{}
-	podStateProvider.existingPods["321"] = struct{}{}
 	files := []string{"123", "456", "789", "012", "name_namespace_321", "name_namespace_654"}
 	removed := []string{
 		filepath.Join(podLogsRootDirectory, "789"),
 		filepath.Join(podLogsRootDirectory, "012"),
 		filepath.Join(podLogsRootDirectory, "name_namespace_654"),
 	}
+	podStateProvider.removed["012"] = struct{}{}
+	podStateProvider.removed["789"] = struct{}{}
+	podStateProvider.removed["654"] = struct{}{}

 	ctrl := gomock.NewController(t)
 	defer ctrl.Finish()
@ -452,11 +464,11 @@ func TestUnknownStateContainerGC(t *testing.T) {
 	fakeRuntime, _, m, err := createTestRuntimeManager()
 	assert.NoError(t, err)

-	podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
+	// podStateProvider := m.containerGC.podStateProvider.(*fakePodStateProvider)
 	defaultGCPolicy := kubecontainer.GCPolicy{MinAge: time.Hour, MaxPerPodContainer: 0, MaxContainers: 0}

 	fakeContainers := makeFakeContainers(t, m, []containerTemplate{
-		makeGCContainer(podStateProvider, "foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_UNKNOWN),
+		makeGCContainer("foo", "bar", 0, 0, runtimeapi.ContainerState_CONTAINER_UNKNOWN),
 	})
 	fakeRuntime.SetFakeContainers(fakeContainers)

--- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@ -75,10 +75,11 @@ var (
 	ErrVersionNotSupported = errors.New("runtime api version is not supported")
 )

-// podStateProvider can determine if a pod is deleted ir terminated
+// podStateProvider can determine if none of the elements are necessary to retain (pod content)
+// or if none of the runtime elements are necessary to retain (containers)
 type podStateProvider interface {
-	IsPodDeleted(kubetypes.UID) bool
-	IsPodTerminated(kubetypes.UID) bool
+	ShouldPodContentBeRemoved(kubetypes.UID) bool
+	ShouldPodRuntimeBeRemoved(kubetypes.UID) bool
 }

 type kubeGenericRuntimeManager struct {
@ -804,7 +805,8 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 			// or CRI if the Pod has been deleted while the POD is
 			// being created. If the pod has been deleted then it's
 			// not a real error.
-			if m.podStateProvider.IsPodDeleted(pod.UID) {
+			// TODO: this is probably not needed now that termination is part of the sync loop
+			if m.podStateProvider.ShouldPodContentBeRemoved(pod.UID) {
 				klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
 				return
 			}
--- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
@ -70,10 +70,12 @@ func customTestRuntimeManager(keyring *credentialprovider.BasicDockerKeyring) (*

 // sandboxTemplate is a sandbox template to create fake sandbox.
 type sandboxTemplate struct {
-	pod       *v1.Pod
-	attempt   uint32
-	createdAt int64
-	state     runtimeapi.PodSandboxState
+	pod         *v1.Pod
+	attempt     uint32
+	createdAt   int64
+	state       runtimeapi.PodSandboxState
+	running     bool
+	terminating bool
 }

 // containerTemplate is a container template to create fake container.
@ -1401,6 +1403,7 @@ func TestSyncPodWithSandboxAndDeletedPod(t *testing.T) {
 	}

 	backOff := flowcontrol.NewBackOff(time.Second, time.Minute)
+	m.podStateProvider.(*fakePodStateProvider).removed = map[types.UID]struct{}{pod.UID: {}}

 	// GetPodStatus and the following SyncPod will not return errors in the
 	// case where the pod has been deleted. We are not adding any pods into
--- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go
+++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go
@ -62,7 +62,7 @@ type Manager struct {
 	shutdownGracePeriodCriticalPods time.Duration

 	getPods        eviction.ActivePodsFunc
-	killPod        eviction.KillPodFunc
+	killPodFunc    eviction.KillPodFunc
 	syncNodeStatus func()

 	dbusCon     dbusInhibiter
@ -78,7 +78,7 @@ type Manager struct {
 func NewManager(getPodsFunc eviction.ActivePodsFunc, killPodFunc eviction.KillPodFunc, syncNodeStatus func(), shutdownGracePeriodRequested, shutdownGracePeriodCriticalPods time.Duration) (*Manager, lifecycle.PodAdmitHandler) {
 	manager := &Manager{
 		getPods:                         getPodsFunc,
-		killPod:                         killPodFunc,
+		killPodFunc:                     killPodFunc,
 		syncNodeStatus:                  syncNodeStatus,
 		shutdownGracePeriodRequested:    shutdownGracePeriodRequested,
 		shutdownGracePeriodCriticalPods: shutdownGracePeriodCriticalPods,
@ -268,15 +268,10 @@ func (m *Manager) processShutdownEvent() error {
 			}

 			klog.V(1).InfoS("Shutdown manager killing pod with gracePeriod", "pod", klog.KObj(pod), "gracePeriod", gracePeriodOverride)
-
-			status := v1.PodStatus{
-				Phase:   v1.PodFailed,
-				Reason:  nodeShutdownReason,
-				Message: nodeShutdownMessage,
-			}
-
-			err := m.killPod(pod, status, &gracePeriodOverride)
-			if err != nil {
+			if err := m.killPodFunc(pod, false, &gracePeriodOverride, func(status *v1.PodStatus) {
+				status.Message = nodeShutdownMessage
+				status.Reason = nodeShutdownReason
+			}); err != nil {
 				klog.V(1).InfoS("Shutdown manager failed killing pod", "pod", klog.KObj(pod), "err", err)
 			} else {
 				klog.V(1).InfoS("Shutdown manager finished killing pod", "pod", klog.KObj(pod))
--- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go
+++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go
@ -212,7 +212,7 @@ func TestManager(t *testing.T) {
 			}

 			podKillChan := make(chan PodKillInfo, 1)
-			killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error {
+			killPodsFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(podStatus *v1.PodStatus)) error {
 				var gracePeriod int64
 				if gracePeriodOverride != nil {
 					gracePeriod = *gracePeriodOverride
@ -297,7 +297,7 @@ func TestFeatureEnabled(t *testing.T) {
 			activePodsFunc := func() []*v1.Pod {
 				return nil
 			}
-			killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error {
+			killPodsFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
 				return nil
 			}
 			defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, tc.featureGateEnabled)()
@ -323,7 +323,7 @@ func TestRestart(t *testing.T) {
 	activePodsFunc := func() []*v1.Pod {
 		return nil
 	}
-	killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error {
+	killPodsFunc := func(pod *v1.Pod, isEvicted bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
 		return nil
 	}
 	syncNodeStatus := func() {}
--- a/pkg/kubelet/pod/testing/mock_manager.go
+++ b/pkg/kubelet/pod/testing/mock_manager.go
@ -19,11 +19,13 @@ package testing

 import (
 	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
-)
-import mock "github.com/stretchr/testify/mock"

-import types "k8s.io/apimachinery/pkg/types"
-import v1 "k8s.io/api/core/v1"
+	mock "github.com/stretchr/testify/mock"
+
+	types "k8s.io/apimachinery/pkg/types"
+
+	v1 "k8s.io/api/core/v1"
+)

 // MockManager is an autogenerated mock type for the Manager type
 type MockManager struct {
@ -77,6 +79,20 @@ func (_m *MockManager) GetOrphanedMirrorPodNames() []string {
 	return r0
 }

+func (_m *MockManager) GetOrphanedMirrorPodNamesAndUIDs() map[string]kubelettypes.ResolvedPodUID {
+	ret := _m.Called()
+
+	var r0 map[string]kubelettypes.ResolvedPodUID
+	if rf, ok := ret.Get(0).(func() map[string]kubelettypes.ResolvedPodUID); ok {
+		r0 = rf()
+	} else {
+		if ret.Get(0) != nil {
+			r0 = ret.Get(0).(map[string]kubelettypes.ResolvedPodUID)
+		}
+	}
+	return r0
+}
+
 // DeletePod provides a mock function with given fields: _a0
 func (_m *MockManager) DeletePod(_a0 *v1.Pod) {
 	_m.Called(_a0)
--- a/pkg/kubelet/pod_workers.go
+++ b/pkg/kubelet/pod_workers.go
--- a/pkg/kubelet/pod_workers_test.go
+++ b/pkg/kubelet/pod_workers_test.go
@ -17,6 +17,7 @@ limitations under the License.
 package kubelet

 import (
+	"context"
 	"reflect"
 	"strconv"
 	"sync"
@ -38,30 +39,82 @@ import (
 // fakePodWorkers runs sync pod function in serial, so we can have
 // deterministic behaviour in testing.
 type fakePodWorkers struct {
+	lock      sync.Mutex
 	syncPodFn syncPodFnType
 	cache     kubecontainer.Cache
 	t         TestingInterface
+
+	triggeredDeletion []types.UID
+
+	statusLock            sync.Mutex
+	running               map[types.UID]bool
+	terminating           map[types.UID]bool
+	terminationRequested  map[types.UID]bool
+	removeRuntime         map[types.UID]bool
+	removeContent         map[types.UID]bool
+	terminatingStaticPods map[string]bool
 }

-func (f *fakePodWorkers) UpdatePod(options *UpdatePodOptions) {
-	status, err := f.cache.Get(options.Pod.UID)
+func (f *fakePodWorkers) UpdatePod(options UpdatePodOptions) {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	var uid types.UID
+	switch {
+	case options.Pod != nil:
+		uid = options.Pod.UID
+	case options.RunningPod != nil:
+		uid = options.RunningPod.ID
+	default:
+		return
+	}
+	status, err := f.cache.Get(uid)
 	if err != nil {
 		f.t.Errorf("Unexpected error: %v", err)
 	}
-	if err := f.syncPodFn(syncPodOptions{
-		mirrorPod:      options.MirrorPod,
-		pod:            options.Pod,
-		podStatus:      status,
-		updateType:     options.UpdateType,
-		killPodOptions: options.KillPodOptions,
-	}); err != nil {
-		f.t.Errorf("Unexpected error: %v", err)
+	switch options.UpdateType {
+	case kubetypes.SyncPodKill:
+		f.triggeredDeletion = append(f.triggeredDeletion, uid)
+	default:
+		if err := f.syncPodFn(context.Background(), options.UpdateType, options.Pod, options.MirrorPod, status); err != nil {
+			f.t.Errorf("Unexpected error: %v", err)
+		}
 	}
 }

-func (f *fakePodWorkers) ForgetNonExistingPodWorkers(desiredPods map[types.UID]sets.Empty) {}
+func (f *fakePodWorkers) SyncKnownPods(desiredPods []*v1.Pod) map[types.UID]PodWorkType {
+	return nil
+}

-func (f *fakePodWorkers) ForgetWorker(uid types.UID) {}
+func (f *fakePodWorkers) CouldHaveRunningContainers(uid types.UID) bool {
+	f.statusLock.Lock()
+	defer f.statusLock.Unlock()
+	return f.running[uid]
+}
+func (f *fakePodWorkers) IsPodTerminationRequested(uid types.UID) bool {
+	f.statusLock.Lock()
+	defer f.statusLock.Unlock()
+	return f.terminationRequested[uid]
+}
+func (f *fakePodWorkers) ShouldPodContainersBeTerminating(uid types.UID) bool {
+	f.statusLock.Lock()
+	defer f.statusLock.Unlock()
+	return f.terminating[uid]
+}
+func (f *fakePodWorkers) ShouldPodRuntimeBeRemoved(uid types.UID) bool {
+	f.statusLock.Lock()
+	defer f.statusLock.Unlock()
+	return f.removeRuntime[uid]
+}
+func (f *fakePodWorkers) ShouldPodContentBeRemoved(uid types.UID) bool {
+	f.statusLock.Lock()
+	defer f.statusLock.Unlock()
+	return f.removeContent[uid]
+}
+func (f *fakePodWorkers) IsPodForMirrorPodTerminatingByFullName(podFullname string) bool {
+	f.statusLock.Lock()
+	defer f.statusLock.Unlock()
+	return f.terminatingStaticPods[podFullname]
+}

 type TestingInterface interface {
 	Errorf(format string, args ...interface{})
@ -80,6 +133,8 @@ func newPod(uid, name string) *v1.Pod {
 type syncPodRecord struct {
 	name       string
 	updateType kubetypes.SyncPodType
+	runningPod *kubecontainer.Pod
+	terminated bool
 }

 func createPodWorkers() (*podWorkers, map[types.UID][]syncPodRecord) {
@ -88,15 +143,38 @@ func createPodWorkers() (*podWorkers, map[types.UID][]syncPodRecord) {
 	fakeRecorder := &record.FakeRecorder{}
 	fakeRuntime := &containertest.FakeRuntime{}
 	fakeCache := containertest.NewFakeCache(fakeRuntime)
-	podWorkers := newPodWorkers(
-		func(options syncPodOptions) error {
+	w := newPodWorkers(
+		func(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
 			func() {
 				lock.Lock()
 				defer lock.Unlock()
-				pod := options.pod
+				pod := pod
 				processed[pod.UID] = append(processed[pod.UID], syncPodRecord{
 					name:       pod.Name,
-					updateType: options.updateType,
+					updateType: updateType,
+				})
+			}()
+			return nil
+		},
+		func(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, runningPod *kubecontainer.Pod, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
+			func() {
+				lock.Lock()
+				defer lock.Unlock()
+				processed[pod.UID] = append(processed[pod.UID], syncPodRecord{
+					name:       pod.Name,
+					updateType: kubetypes.SyncPodKill,
+					runningPod: runningPod,
+				})
+			}()
+			return nil
+		},
+		func(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
+			func() {
+				lock.Lock()
+				defer lock.Unlock()
+				processed[pod.UID] = append(processed[pod.UID], syncPodRecord{
+					name:       pod.Name,
+					terminated: true,
 				})
 			}()
 			return nil
@ -107,7 +185,7 @@ func createPodWorkers() (*podWorkers, map[types.UID][]syncPodRecord) {
 		time.Second,
 		fakeCache,
 	)
-	return podWorkers, processed
+	return w.(*podWorkers), processed
 }

 func drainWorkers(podWorkers *podWorkers, numPods int) {
@ -115,8 +193,27 @@ func drainWorkers(podWorkers *podWorkers, numPods int) {
 		stillWorking := false
 		podWorkers.podLock.Lock()
 		for i := 0; i < numPods; i++ {
-			if podWorkers.isWorking[types.UID(strconv.Itoa(i))] {
+			if s, ok := podWorkers.podSyncStatuses[types.UID(strconv.Itoa(i))]; ok && s.working {
 				stillWorking = true
+				break
+			}
+		}
+		podWorkers.podLock.Unlock()
+		if !stillWorking {
+			break
+		}
+		time.Sleep(50 * time.Millisecond)
+	}
+}
+
+func drainAllWorkers(podWorkers *podWorkers) {
+	for {
+		stillWorking := false
+		podWorkers.podLock.Lock()
+		for _, worker := range podWorkers.podSyncStatuses {
+			if worker.working {
+				stillWorking = true
+				break
 			}
 		}
 		podWorkers.podLock.Unlock()
@ -133,7 +230,7 @@ func TestUpdatePod(t *testing.T) {
 	numPods := 20
 	for i := 0; i < numPods; i++ {
 		for j := i; j < numPods; j++ {
-			podWorkers.UpdatePod(&UpdatePodOptions{
+			podWorkers.UpdatePod(UpdatePodOptions{
 				Pod:        newPod(strconv.Itoa(j), strconv.Itoa(i)),
 				UpdateType: kubetypes.SyncPodCreate,
 			})
@ -165,20 +262,51 @@ func TestUpdatePod(t *testing.T) {
 	}
 }

+func TestUpdatePodForRuntimePod(t *testing.T) {
+	podWorkers, processed := createPodWorkers()
+
+	// ignores running pod of wrong sync type
+	podWorkers.UpdatePod(UpdatePodOptions{
+		UpdateType: kubetypes.SyncPodCreate,
+		RunningPod: &kubecontainer.Pod{ID: "1", Name: "1", Namespace: "test"},
+	})
+	drainAllWorkers(podWorkers)
+	if len(processed) != 0 {
+		t.Fatalf("Not all pods processed: %v", len(processed))
+	}
+
+	// creates synthetic pod
+	podWorkers.UpdatePod(UpdatePodOptions{
+		UpdateType: kubetypes.SyncPodKill,
+		RunningPod: &kubecontainer.Pod{ID: "1", Name: "1", Namespace: "test"},
+	})
+	drainAllWorkers(podWorkers)
+	if len(processed) != 1 {
+		t.Fatalf("Not all pods processed: %v", processed)
+	}
+	updates := processed["1"]
+	if len(updates) != 1 {
+		t.Fatalf("unexpected updates: %v", updates)
+	}
+	if updates[0].runningPod == nil || updates[0].updateType != kubetypes.SyncPodKill || updates[0].name != "1" {
+		t.Fatalf("unexpected update: %v", updates)
+	}
+}
+
 func TestUpdatePodDoesNotForgetSyncPodKill(t *testing.T) {
 	podWorkers, processed := createPodWorkers()
 	numPods := 20
 	for i := 0; i < numPods; i++ {
 		pod := newPod(strconv.Itoa(i), strconv.Itoa(i))
-		podWorkers.UpdatePod(&UpdatePodOptions{
+		podWorkers.UpdatePod(UpdatePodOptions{
 			Pod:        pod,
 			UpdateType: kubetypes.SyncPodCreate,
 		})
-		podWorkers.UpdatePod(&UpdatePodOptions{
+		podWorkers.UpdatePod(UpdatePodOptions{
 			Pod:        pod,
 			UpdateType: kubetypes.SyncPodKill,
 		})
-		podWorkers.UpdatePod(&UpdatePodOptions{
+		podWorkers.UpdatePod(UpdatePodOptions{
 			Pod:        pod,
 			UpdateType: kubetypes.SyncPodUpdate,
 		})
@ -205,12 +333,12 @@ func TestUpdatePodDoesNotForgetSyncPodKill(t *testing.T) {
 	}
 }

-func TestForgetNonExistingPodWorkers(t *testing.T) {
+func TestSyncKnownPods(t *testing.T) {
 	podWorkers, _ := createPodWorkers()

 	numPods := 20
 	for i := 0; i < numPods; i++ {
-		podWorkers.UpdatePod(&UpdatePodOptions{
+		podWorkers.UpdatePod(UpdatePodOptions{
 			Pod:        newPod(strconv.Itoa(i), "name"),
 			UpdateType: kubetypes.SyncPodUpdate,
 		})
@ -224,7 +352,72 @@ func TestForgetNonExistingPodWorkers(t *testing.T) {
 	desiredPods := map[types.UID]sets.Empty{}
 	desiredPods[types.UID("2")] = sets.Empty{}
 	desiredPods[types.UID("14")] = sets.Empty{}
-	podWorkers.ForgetNonExistingPodWorkers(desiredPods)
+	desiredPodList := []*v1.Pod{newPod("2", "name"), newPod("14", "name")}
+
+	// kill all but the requested pods
+	for i := 0; i < numPods; i++ {
+		pod := newPod(strconv.Itoa(i), "name")
+		if _, ok := desiredPods[pod.UID]; ok {
+			continue
+		}
+		if (i % 2) == 0 {
+			now := metav1.Now()
+			pod.DeletionTimestamp = &now
+		}
+		podWorkers.UpdatePod(UpdatePodOptions{
+			Pod:        pod,
+			UpdateType: kubetypes.SyncPodKill,
+		})
+	}
+	drainWorkers(podWorkers, numPods)
+
+	if !podWorkers.ShouldPodContainersBeTerminating(types.UID("0")) {
+		t.Errorf("Expected pod to be terminating")
+	}
+	if !podWorkers.ShouldPodContainersBeTerminating(types.UID("1")) {
+		t.Errorf("Expected pod to be terminating")
+	}
+	if podWorkers.ShouldPodContainersBeTerminating(types.UID("2")) {
+		t.Errorf("Expected pod to not be terminating")
+	}
+	if !podWorkers.IsPodTerminationRequested(types.UID("0")) {
+		t.Errorf("Expected pod to be terminating")
+	}
+	if podWorkers.IsPodTerminationRequested(types.UID("2")) {
+		t.Errorf("Expected pod to not be terminating")
+	}
+
+	if podWorkers.CouldHaveRunningContainers(types.UID("0")) {
+		t.Errorf("Expected pod to be terminated (deleted and terminated)")
+	}
+	if podWorkers.CouldHaveRunningContainers(types.UID("1")) {
+		t.Errorf("Expected pod to be terminated")
+	}
+	if !podWorkers.CouldHaveRunningContainers(types.UID("2")) {
+		t.Errorf("Expected pod to not be terminated")
+	}
+
+	if !podWorkers.ShouldPodContentBeRemoved(types.UID("0")) {
+		t.Errorf("Expected pod to be suitable for removal (deleted and terminated)")
+	}
+	if podWorkers.ShouldPodContentBeRemoved(types.UID("1")) {
+		t.Errorf("Expected pod to not be suitable for removal (terminated but not deleted)")
+	}
+	if podWorkers.ShouldPodContentBeRemoved(types.UID("2")) {
+		t.Errorf("Expected pod to not be suitable for removal (not terminated)")
+	}
+
+	if podWorkers.ShouldPodContainersBeTerminating(types.UID("abc")) {
+		t.Errorf("Expected pod to not be known to be terminating (does not exist but not yet synced)")
+	}
+	if !podWorkers.CouldHaveRunningContainers(types.UID("abc")) {
+		t.Errorf("Expected pod to potentially have running containers (does not exist but not yet synced)")
+	}
+	if podWorkers.ShouldPodContentBeRemoved(types.UID("abc")) {
+		t.Errorf("Expected pod to not be suitable for removal (does not exist but not yet synced)")
+	}
+
+	podWorkers.SyncKnownPods(desiredPodList)
 	if len(podWorkers.podUpdates) != 2 {
 		t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates))
 	}
@ -234,34 +427,52 @@ func TestForgetNonExistingPodWorkers(t *testing.T) {
 	if _, exists := podWorkers.podUpdates[types.UID("14")]; !exists {
 		t.Errorf("No updates channel for pod 14")
 	}
+	if podWorkers.IsPodTerminationRequested(types.UID("2")) {
+		t.Errorf("Expected pod termination request to be cleared after sync")
+	}

-	podWorkers.ForgetNonExistingPodWorkers(map[types.UID]sets.Empty{})
-	if len(podWorkers.podUpdates) != 0 {
+	if !podWorkers.ShouldPodContainersBeTerminating(types.UID("abc")) {
+		t.Errorf("Expected pod to be expected to terminate containers (does not exist and synced at least once)")
+	}
+	if podWorkers.CouldHaveRunningContainers(types.UID("abc")) {
+		t.Errorf("Expected pod to be known not to have running containers (does not exist and synced at least once)")
+	}
+	if !podWorkers.ShouldPodContentBeRemoved(types.UID("abc")) {
+		t.Errorf("Expected pod to be suitable for removal (does not exist and synced at least once)")
+	}
+
+	// verify workers that are not terminated stay open even if config no longer
+	// sees them
+	podWorkers.SyncKnownPods(nil)
+	if len(podWorkers.podUpdates) != 2 {
 		t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates))
 	}
-}
+	if len(podWorkers.podSyncStatuses) != 2 {
+		t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.podSyncStatuses)
+	}
+	if len(podWorkers.lastUndeliveredWorkUpdate) != 0 {
+		t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.lastUndeliveredWorkUpdate)
+	}

-func TestIsWorkingClearedAfterForgetPodWorkers(t *testing.T) {
-	podWorkers, _ := createPodWorkers()
-
-	numPods := 20
-	for i := 0; i < numPods; i++ {
-		podWorkers.UpdatePod(&UpdatePodOptions{
-			Pod:        newPod(strconv.Itoa(i), "name"),
-			UpdateType: kubetypes.SyncPodUpdate,
+	for uid := range desiredPods {
+		pod := newPod(string(uid), "name")
+		podWorkers.UpdatePod(UpdatePodOptions{
+			Pod:        pod,
+			UpdateType: kubetypes.SyncPodKill,
 		})
 	}
 	drainWorkers(podWorkers, numPods)

-	if len(podWorkers.podUpdates) != numPods {
-		t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates))
-	}
-	podWorkers.ForgetNonExistingPodWorkers(map[types.UID]sets.Empty{})
+	// verify once those pods terminate (via some other flow) the workers are cleared
+	podWorkers.SyncKnownPods(nil)
 	if len(podWorkers.podUpdates) != 0 {
 		t.Errorf("Incorrect number of open channels %v", len(podWorkers.podUpdates))
 	}
-	if len(podWorkers.isWorking) != 0 {
-		t.Errorf("Incorrect number of isWorking %v", len(podWorkers.isWorking))
+	if len(podWorkers.podSyncStatuses) != 0 {
+		t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.podSyncStatuses)
+	}
+	if len(podWorkers.lastUndeliveredWorkUpdate) != 0 {
+		t.Errorf("Incorrect number of tracked statuses: %#v", podWorkers.lastUndeliveredWorkUpdate)
 	}
 }

@ -272,17 +483,25 @@ type simpleFakeKubelet struct {
 	wg        sync.WaitGroup
 }

-func (kl *simpleFakeKubelet) syncPod(options syncPodOptions) error {
-	kl.pod, kl.mirrorPod, kl.podStatus = options.pod, options.mirrorPod, options.podStatus
+func (kl *simpleFakeKubelet) syncPod(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
+	kl.pod, kl.mirrorPod, kl.podStatus = pod, mirrorPod, podStatus
 	return nil
 }

-func (kl *simpleFakeKubelet) syncPodWithWaitGroup(options syncPodOptions) error {
-	kl.pod, kl.mirrorPod, kl.podStatus = options.pod, options.mirrorPod, options.podStatus
+func (kl *simpleFakeKubelet) syncPodWithWaitGroup(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
+	kl.pod, kl.mirrorPod, kl.podStatus = pod, mirrorPod, podStatus
 	kl.wg.Done()
 	return nil
 }

+func (kl *simpleFakeKubelet) syncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, runningPod *kubecontainer.Pod, gracePeriod *int64, podStatusFn func(*v1.PodStatus)) error {
+	return nil
+}
+
+func (kl *simpleFakeKubelet) syncTerminatedPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) error {
+	return nil
+}
+
 // TestFakePodWorkers verifies that the fakePodWorkers behaves the same way as the real podWorkers
 // for their invocation of the syncPodFn.
 func TestFakePodWorkers(t *testing.T) {
@ -293,8 +512,16 @@ func TestFakePodWorkers(t *testing.T) {
 	kubeletForRealWorkers := &simpleFakeKubelet{}
 	kubeletForFakeWorkers := &simpleFakeKubelet{}

-	realPodWorkers := newPodWorkers(kubeletForRealWorkers.syncPodWithWaitGroup, fakeRecorder, queue.NewBasicWorkQueue(&clock.RealClock{}), time.Second, time.Second, fakeCache)
-	fakePodWorkers := &fakePodWorkers{kubeletForFakeWorkers.syncPod, fakeCache, t}
+	realPodWorkers := newPodWorkers(
+		kubeletForRealWorkers.syncPodWithWaitGroup,
+		kubeletForRealWorkers.syncTerminatingPod,
+		kubeletForRealWorkers.syncTerminatedPod,
+		fakeRecorder, queue.NewBasicWorkQueue(&clock.RealClock{}), time.Second, time.Second, fakeCache)
+	fakePodWorkers := &fakePodWorkers{
+		syncPodFn: kubeletForFakeWorkers.syncPod,
+		cache:     fakeCache,
+		t:         t,
+	}

 	tests := []struct {
 		pod       *v1.Pod
@ -316,12 +543,12 @@ func TestFakePodWorkers(t *testing.T) {

 	for i, tt := range tests {
 		kubeletForRealWorkers.wg.Add(1)
-		realPodWorkers.UpdatePod(&UpdatePodOptions{
+		realPodWorkers.UpdatePod(UpdatePodOptions{
 			Pod:        tt.pod,
 			MirrorPod:  tt.mirrorPod,
 			UpdateType: kubetypes.SyncPodUpdate,
 		})
-		fakePodWorkers.UpdatePod(&UpdatePodOptions{
+		fakePodWorkers.UpdatePod(UpdatePodOptions{
 			Pod:        tt.pod,
 			MirrorPod:  tt.mirrorPod,
 			UpdateType: kubetypes.SyncPodUpdate,
@ -350,19 +577,26 @@ func TestKillPodNowFunc(t *testing.T) {
 	killPodFunc := killPodNow(podWorkers, fakeRecorder)
 	pod := newPod("test", "test")
 	gracePeriodOverride := int64(0)
-	err := killPodFunc(pod, v1.PodStatus{Phase: v1.PodFailed, Reason: "reason", Message: "message"}, &gracePeriodOverride)
+	err := killPodFunc(pod, false, &gracePeriodOverride, func(status *v1.PodStatus) {
+		status.Phase = v1.PodFailed
+		status.Reason = "reason"
+		status.Message = "message"
+	})
 	if err != nil {
-		t.Errorf("Unexpected error: %v", err)
+		t.Fatalf("Unexpected error: %v", err)
 	}
+	drainAllWorkers(podWorkers)
 	if len(processed) != 1 {
-		t.Errorf("len(processed) expected: %v, actual: %v", 1, len(processed))
-		return
+		t.Fatalf("len(processed) expected: %v, actual: %#v", 1, processed)
 	}
 	syncPodRecords := processed[pod.UID]
-	if len(syncPodRecords) != 1 {
-		t.Errorf("Pod processed %v times, but expected %v", len(syncPodRecords), 1)
+	if len(syncPodRecords) != 2 {
+		t.Fatalf("Pod processed expected %v times, got %#v", 1, syncPodRecords)
 	}
 	if syncPodRecords[0].updateType != kubetypes.SyncPodKill {
 		t.Errorf("Pod update type was %v, but expected %v", syncPodRecords[0].updateType, kubetypes.SyncPodKill)
 	}
+	if !syncPodRecords[1].terminated {
+		t.Errorf("Pod terminated %v, but expected %v", syncPodRecords[1].terminated, true)
+	}
 }
--- a/pkg/kubelet/preemption/preemption.go
+++ b/pkg/kubelet/preemption/preemption.go
@ -20,7 +20,7 @@ import (
 	"fmt"
 	"math"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/api/v1/resource"
@ -94,18 +94,16 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod,
 	if err != nil {
 		return fmt.Errorf("preemption: error finding a set of pods to preempt: %v", err)
 	}
-	klog.InfoS("Attempting to evict pods in order to free up resources", "pods", podsToPreempt,
-		"insufficientResources", insufficientResources.toString())
 	for _, pod := range podsToPreempt {
-		status := v1.PodStatus{
-			Phase:   v1.PodFailed,
-			Message: message,
-			Reason:  events.PreemptContainer,
-		}
 		// record that we are evicting the pod
 		c.recorder.Eventf(pod, v1.EventTypeWarning, events.PreemptContainer, message)
 		// this is a blocking call and should only return when the pod and its containers are killed.
-		err := c.killPodFunc(pod, status, nil)
+		klog.V(3).InfoS("Preempting pod to free up resources", "pod", klog.KObj(pod), "podUID", pod.UID, "insufficientResources", insufficientResources.toString())
+		err := c.killPodFunc(pod, true, nil, func(status *v1.PodStatus) {
+			status.Phase = v1.PodFailed
+			status.Reason = events.PreemptContainer
+			status.Message = message
+		})
 		if err != nil {
 			klog.ErrorS(err, "Failed to evict pod", "pod", klog.KObj(pod))
 			// In future syncPod loops, the kubelet will retry the pod deletion steps that it was stuck on.
--- a/pkg/kubelet/preemption/preemption_test.go
+++ b/pkg/kubelet/preemption/preemption_test.go
@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/tools/record"
@ -57,7 +57,7 @@ func (f *fakePodKiller) getKilledPods() []*v1.Pod {
 	return f.killedPods
 }

-func (f *fakePodKiller) killPodNow(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error {
+func (f *fakePodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(status *v1.PodStatus)) error {
 	if f.errDuringPodKilling {
 		f.killedPods = []*v1.Pod{}
 		return fmt.Errorf("problem killing pod %v", pod)
--- a/pkg/kubelet/runonce.go
+++ b/pkg/kubelet/runonce.go
@ -17,11 +17,12 @@ limitations under the License.
 package kubelet

 import (
+	"context"
 	"fmt"
 	"os"
 	"time"

-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/klog/v2"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -130,12 +131,7 @@ func (kl *Kubelet) runPod(pod *v1.Pod, retryDelay time.Duration) error {
 			klog.ErrorS(err, "Failed creating a mirror pod", "pod", klog.KObj(pod))
 		}
 		mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
-		if err = kl.syncPod(syncPodOptions{
-			pod:        pod,
-			mirrorPod:  mirrorPod,
-			podStatus:  status,
-			updateType: kubetypes.SyncPodUpdate,
-		}); err != nil {
+		if err = kl.syncPod(context.Background(), kubetypes.SyncPodUpdate, pod, mirrorPod, status); err != nil {
 			return fmt.Errorf("error syncing pod %q: %v", format.Pod(pod), err)
 		}
 		if retry >= runOnceMaxRetries {
--- a/pkg/kubelet/runonce_test.go
+++ b/pkg/kubelet/runonce_test.go
@ -79,6 +79,7 @@ func TestRunOnce(t *testing.T) {
 		nodeLister:       testNodeLister{},
 		statusManager:    status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}),
 		podManager:       podManager,
+		podWorkers:       &fakePodWorkers{},
 		os:               &containertest.FakeOS{},
 		containerRuntime: fakeRuntime,
 		reasonCache:      NewReasonCache(),
@ -101,7 +102,7 @@ func TestRunOnce(t *testing.T) {
 		true,
 		kb.nodeName,
 		kb.podManager,
-		kb.statusManager,
+		kb.podWorkers,
 		kb.kubeClient,
 		kb.volumePluginMgr,
 		fakeRuntime,
@ -122,7 +123,7 @@ func TestRunOnce(t *testing.T) {
 		UID:       types.UID(kb.nodeName),
 		Namespace: "",
 	}
-	fakeKillPodFunc := func(pod *v1.Pod, podStatus v1.PodStatus, gracePeriodOverride *int64) error {
+	fakeKillPodFunc := func(pod *v1.Pod, evict bool, gracePeriodOverride *int64, fn func(*v1.PodStatus)) error {
 		return nil
 	}
 	fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false }
--- a/pkg/kubelet/status/status_manager.go
+++ b/pkg/kubelet/status/status_manager.go
@ -20,6 +20,7 @@ import (
 	"context"
 	"fmt"
 	"sort"
+	"strings"
 	"sync"
 	"time"

@ -356,6 +357,7 @@ func (m *manager) TerminatePod(pod *v1.Pod) {
 		}
 	}

+	klog.V(5).InfoS("TerminatePod calling updateStatusInternal", "pod", klog.KObj(pod), "podUID", pod.UID)
 	m.updateStatusInternal(pod, status, true)
 }

@ -431,10 +433,43 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp
 	}

 	normalizeStatus(pod, &status)
+
+	// Perform some more extensive logging of container termination state to assist in
+	// debugging production races (generally not needed).
+	if klog.V(5).Enabled() {
+		var containers []string
+		for _, s := range append(append([]v1.ContainerStatus(nil), status.InitContainerStatuses...), status.ContainerStatuses...) {
+			var current, previous string
+			switch {
+			case s.State.Running != nil:
+				current = "running"
+			case s.State.Waiting != nil:
+				current = "waiting"
+			case s.State.Terminated != nil:
+				current = fmt.Sprintf("terminated=%d", s.State.Terminated.ExitCode)
+			default:
+				current = "unknown"
+			}
+			switch {
+			case s.LastTerminationState.Running != nil:
+				previous = "running"
+			case s.LastTerminationState.Waiting != nil:
+				previous = "waiting"
+			case s.LastTerminationState.Terminated != nil:
+				previous = fmt.Sprintf("terminated=%d", s.LastTerminationState.Terminated.ExitCode)
+			default:
+				previous = "<none>"
+			}
+			containers = append(containers, fmt.Sprintf("(%s state=%s previous=%s)", s.Name, current, previous))
+		}
+		sort.Strings(containers)
+		klog.InfoS("updateStatusInternal", "version", cachedStatus.version+1, "pod", klog.KObj(pod), "podUID", pod.UID, "containers", strings.Join(containers, " "))
+	}
+
 	// The intent here is to prevent concurrent updates to a pod's status from
 	// clobbering each other so the phase of a pod progresses monotonically.
 	if isCached && isPodStatusByKubeletEqual(&cachedStatus.status, &status) && !forceUpdate {
-		klog.V(5).InfoS("Ignoring same status for pod", "pod", klog.KObj(pod), "status", status)
+		klog.V(3).InfoS("Ignoring same status for pod", "pod", klog.KObj(pod), "status", status)
 		return false // No new status.
 	}

@ -583,7 +618,7 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {

 	oldStatus := pod.Status.DeepCopy()
 	newPod, patchBytes, unchanged, err := statusutil.PatchPodStatus(m.kubeClient, pod.Namespace, pod.Name, pod.UID, *oldStatus, mergePodStatus(*oldStatus, status.status))
-	klog.V(3).InfoS("Patch status for pod", "pod", klog.KObj(pod), "patchBytes", patchBytes)
+	klog.V(3).InfoS("Patch status for pod", "pod", klog.KObj(pod), "patch", string(patchBytes))

 	if err != nil {
 		klog.InfoS("Failed to update status for pod", "pod", klog.KObj(pod), "err", err)
--- a/pkg/kubelet/types/pod_update.go
+++ b/pkg/kubelet/types/pod_update.go
@ -120,8 +120,8 @@ const (
 	SyncPodUpdate
 	// SyncPodCreate is when the pod is created from source
 	SyncPodCreate
-	// SyncPodKill is when the pod is killed based on a trigger internal to the kubelet for eviction.
-	// If a SyncPodKill request is made to pod workers, the request is never dropped, and will always be processed.
+	// SyncPodKill is when the pod should have no running containers. A pod stopped in this way could be
+	// restarted in the future due config changes.
 	SyncPodKill
 )

--- a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go
+++ b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go
@ -40,7 +40,6 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/config"
 	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 	"k8s.io/kubernetes/pkg/kubelet/pod"
-	"k8s.io/kubernetes/pkg/kubelet/status"
 	"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
 	"k8s.io/kubernetes/pkg/volume"
 	"k8s.io/kubernetes/pkg/volume/csimigration"
@ -70,6 +69,12 @@ type DesiredStateOfWorldPopulator interface {
 	HasAddedPods() bool
 }

+// podStateProvider can determine if a pod is going to be terminated.
+type podStateProvider interface {
+	ShouldPodContainersBeTerminating(types.UID) bool
+	ShouldPodRuntimeBeRemoved(types.UID) bool
+}
+
 // NewDesiredStateOfWorldPopulator returns a new instance of
 // DesiredStateOfWorldPopulator.
 //
@ -84,7 +89,7 @@ func NewDesiredStateOfWorldPopulator(
 	loopSleepDuration time.Duration,
 	getPodStatusRetryDuration time.Duration,
 	podManager pod.Manager,
-	podStatusProvider status.PodStatusProvider,
+	podStateProvider podStateProvider,
 	desiredStateOfWorld cache.DesiredStateOfWorld,
 	actualStateOfWorld cache.ActualStateOfWorld,
 	kubeContainerRuntime kubecontainer.Runtime,
@ -97,7 +102,7 @@ func NewDesiredStateOfWorldPopulator(
 		loopSleepDuration:         loopSleepDuration,
 		getPodStatusRetryDuration: getPodStatusRetryDuration,
 		podManager:                podManager,
-		podStatusProvider:         podStatusProvider,
+		podStateProvider:          podStateProvider,
 		desiredStateOfWorld:       desiredStateOfWorld,
 		actualStateOfWorld:        actualStateOfWorld,
 		pods: processedPods{
@ -117,7 +122,7 @@ type desiredStateOfWorldPopulator struct {
 	loopSleepDuration         time.Duration
 	getPodStatusRetryDuration time.Duration
 	podManager                pod.Manager
-	podStatusProvider         status.PodStatusProvider
+	podStateProvider          podStateProvider
 	desiredStateOfWorld       cache.DesiredStateOfWorld
 	actualStateOfWorld        cache.ActualStateOfWorld
 	pods                      processedPods
@ -177,14 +182,6 @@ func (dswp *desiredStateOfWorldPopulator) populatorLoop() {
 	dswp.findAndRemoveDeletedPods()
 }

-func (dswp *desiredStateOfWorldPopulator) isPodTerminated(pod *v1.Pod) bool {
-	podStatus, found := dswp.podStatusProvider.GetPodStatus(pod.UID)
-	if !found {
-		podStatus = pod.Status
-	}
-	return util.IsPodTerminated(pod, podStatus)
-}
-
 // Iterate through all pods and add to desired state of world if they don't
 // exist but should
 func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
@ -203,8 +200,8 @@ func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {

 	processedVolumesForFSResize := sets.NewString()
 	for _, pod := range dswp.podManager.GetPods() {
-		if dswp.isPodTerminated(pod) {
-			// Do not (re)add volumes for terminated pods
+		if dswp.podStateProvider.ShouldPodContainersBeTerminating(pod.UID) {
+			// Do not (re)add volumes for pods that can't also be starting containers
 			continue
 		}
 		dswp.processPodVolumes(pod, mountedVolumesForPod, processedVolumesForFSResize)
@ -214,9 +211,6 @@ func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
 // Iterate through all pods in desired state of world, and remove if they no
 // longer exist
 func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {
-	var runningPods []*kubecontainer.Pod
-
-	runningPodsFetched := false
 	for _, volumeToMount := range dswp.desiredStateOfWorld.GetVolumesToMount() {
 		pod, podExists := dswp.podManager.GetPodByUID(volumeToMount.Pod.UID)
 		if podExists {
@ -234,8 +228,8 @@ func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {
 				}
 			}

-			// Skip running pods
-			if !dswp.isPodTerminated(pod) {
+			// Exclude known pods that we expect to be running
+			if !dswp.podStateProvider.ShouldPodRuntimeBeRemoved(pod.UID) {
 				continue
 			}
 			if dswp.keepTerminatedPodVolumes {
@ -245,36 +239,9 @@ func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {

 		// Once a pod has been deleted from kubelet pod manager, do not delete
 		// it immediately from volume manager. Instead, check the kubelet
-		// containerRuntime to verify that all containers in the pod have been
+		// pod state provider to verify that all containers in the pod have been
 		// terminated.
-		if !runningPodsFetched {
-			var getPodsErr error
-			runningPods, getPodsErr = dswp.kubeContainerRuntime.GetPods(false)
-			if getPodsErr != nil {
-				klog.ErrorS(getPodsErr, "kubeContainerRuntime.findAndRemoveDeletedPods returned error")
-				continue
-			}
-
-			runningPodsFetched = true
-			dswp.timeOfLastGetPodStatus = time.Now()
-		}
-
-		runningContainers := false
-		for _, runningPod := range runningPods {
-			if runningPod.ID == volumeToMount.Pod.UID {
-				// runningPod.Containers only include containers in the running state,
-				// excluding containers in the creating process.
-				// By adding a non-empty judgment for runningPod.Sandboxes,
-				// ensure that all containers of the pod have been terminated.
-				if len(runningPod.Sandboxes) > 0 || len(runningPod.Containers) > 0 {
-					runningContainers = true
-				}
-
-				break
-			}
-		}
-
-		if runningContainers {
+		if !dswp.podStateProvider.ShouldPodRuntimeBeRemoved(volumeToMount.Pod.UID) {
 			klog.V(4).InfoS("Pod still has one or more containers in the non-exited state and will not be removed from desired state", "pod", klog.KObj(volumeToMount.Pod))
 			continue
 		}
--- a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator_test.go
+++ b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator_test.go
@ -27,6 +27,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
+	kubetypes "k8s.io/apimachinery/pkg/types"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	"k8s.io/client-go/kubernetes/fake"
 	core "k8s.io/client-go/testing"
@ -34,13 +35,10 @@ import (
 	csitrans "k8s.io/csi-translation-lib"
 	"k8s.io/kubernetes/pkg/features"
 	"k8s.io/kubernetes/pkg/kubelet/configmap"
-	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 	containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
 	kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
 	podtest "k8s.io/kubernetes/pkg/kubelet/pod/testing"
 	"k8s.io/kubernetes/pkg/kubelet/secret"
-	"k8s.io/kubernetes/pkg/kubelet/status"
-	statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
 	"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
 	"k8s.io/kubernetes/pkg/volume"
 	"k8s.io/kubernetes/pkg/volume/csimigration"
@ -81,7 +79,7 @@ func prepareDswpWithVolume(t *testing.T) (*desiredStateOfWorldPopulator, kubepod
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
 	return dswp, fakePodManager
 }

@ -156,7 +154,7 @@ func TestFindAndAddNewPods_WithVolumeRetrievalError(t *testing.T) {
 }

 func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) {
-	dswp, fakeRuntime, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t)
+	dswp, fakePodState, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t)
 	podName := util.GetUniquePodName(pod)

 	//let the pod be terminated
@ -167,29 +165,15 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) {
 	podGet.Status.Phase = v1.PodFailed
 	dswp.podManager.DeletePod(pod)

-	fakeRuntime.PodList = []*containertest.FakePod{
-		{
-			Pod: &kubecontainer.Pod{
-				Name: pod.Name,
-				ID:   pod.UID,
-				Sandboxes: []*kubecontainer.Container{
-					{
-						Name: "dswp-test-pod-sandbox",
-					},
-				},
-			},
-		},
-	}
-
 	dswp.findAndRemoveDeletedPods()

 	if !dswp.pods.processedPods[podName] {
-		t.Fatalf("Pod should not been removed from desired state of world since sandbox exist")
+		t.Fatalf("Pod should not been removed from desired state of world since pod state still thinks it exists")
 	}

-	fakeRuntime.PodList = nil
+	fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}

-	// fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
+	// the pod state is marked as removed, so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
 	dswp.findAndRemoveDeletedPods()
 	if dswp.pods.processedPods[podName] {
 		t.Fatalf("Failed to remove pods from desired state of world since they no longer exist")
@ -221,7 +205,8 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) {
 }

 func TestFindAndRemoveDeletedPodsWithActualState(t *testing.T) {
-	dswp, _, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t)
+	dswp, fakePodState, pod, expectedVolumeName, _ := prepareDSWPWithPodPV(t)
+	fakeASW := dswp.actualStateOfWorld
 	podName := util.GetUniquePodName(pod)

 	//let the pod be terminated
@ -251,7 +236,19 @@ func TestFindAndRemoveDeletedPodsWithActualState(t *testing.T) {

 	// reconcile with actual state so that volume is added into the actual state
 	// desired state populator now can successfully delete the pod and volume
-	fakeASW := dswp.actualStateOfWorld
+	reconcileASW(fakeASW, dswp.desiredStateOfWorld, t)
+	dswp.findAndRemoveDeletedPods()
+	if !dswp.desiredStateOfWorld.VolumeExists(expectedVolumeName) {
+		t.Fatalf(
+			"VolumeExists(%q) failed. Expected: <false> Actual: <%v>",
+			expectedVolumeName,
+			volumeExists)
+	}
+
+	fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}
+
+	// reconcile with actual state so that volume is added into the actual state
+	// desired state populator now can successfully delete the pod and volume
 	reconcileASW(fakeASW, dswp.desiredStateOfWorld, t)
 	dswp.findAndRemoveDeletedPods()
 	volumeExists = dswp.desiredStateOfWorld.VolumeExists(expectedVolumeName)
@ -271,7 +268,7 @@ func TestFindAndRemoveDeletedPodsWithActualState(t *testing.T) {
 }

 func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
-	dswp, fakeRuntime, pod, expectedVolumeName, pv := prepareDSWPWithPodPV(t)
+	dswp, fakePodState, pod, expectedVolumeName, pv := prepareDSWPWithPodPV(t)
 	podName := util.GetUniquePodName(pod)

 	//let the pod be terminated
@ -281,7 +278,7 @@ func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
 	}
 	podGet.Status.Phase = v1.PodFailed
 	dswp.podManager.DeletePod(pod)
-	fakeRuntime.PodList = nil
+	fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}

 	// Add the volume to ASW by reconciling.
 	fakeASW := dswp.actualStateOfWorld
@ -302,7 +299,7 @@ func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
 		t.Fatalf("Failed to set the volume as uncertain: %s", err)
 	}

-	// fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
+	// the pod state now lists the pod as removed, so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
 	dswp.findAndRemoveDeletedPods()
 	if dswp.pods.processedPods[podName] {
 		t.Fatalf("Failed to remove pods from desired state of world since they no longer exist")
@ -333,7 +330,7 @@ func TestFindAndRemoveDeletedPodsWithUncertain(t *testing.T) {
 	}
 }

-func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *containertest.FakeRuntime, *v1.Pod, v1.UniqueVolumeName, *v1.PersistentVolume) {
+func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *fakePodStateProvider, *v1.Pod, v1.UniqueVolumeName, *v1.PersistentVolume) {
 	// create dswp
 	mode := v1.PersistentVolumeFilesystem
 	pv := &v1.PersistentVolume{
@ -353,7 +350,7 @@ func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *contain
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, fakesDSW, fakeRuntime := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, fakesDSW, _, fakePodState := createDswpWithVolume(t, pv, pvc)

 	// create pod
 	containers := []v1.Container{
@ -399,7 +396,7 @@ func prepareDSWPWithPodPV(t *testing.T) (*desiredStateOfWorldPopulator, *contain

 	verifyVolumeExistsInVolumesToMount(
 		t, v1.UniqueVolumeName(generatedVolumeName), false /* expectReportedInUse */, fakesDSW)
-	return dswp, fakeRuntime, pod, expectedVolumeName, pv
+	return dswp, fakePodState, pod, expectedVolumeName, pv
 }

 func TestFindAndRemoveNonattachableVolumes(t *testing.T) {
@ -424,7 +421,7 @@ func TestFindAndRemoveNonattachableVolumes(t *testing.T) {
 	}

 	fakeVolumePluginMgr, fakeVolumePlugin := volumetesting.GetTestKubeletVolumePluginMgr(t)
-	dswp, fakePodManager, fakesDSW, _ := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr)
+	dswp, fakePodManager, fakesDSW, _, _ := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr)

 	// create pod
 	containers := []v1.Container{
@ -487,7 +484,7 @@ func TestEphemeralVolumeOwnerCheck(t *testing.T) {

 	// create dswp
 	pod, pv, pvc := createEphemeralVolumeObjects("dswp-test-pod", "dswp-test-volume-name", false /* not owned */)
-	dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)
 	fakePodManager.AddPod(pod)

 	podName := util.GetUniquePodName(pod)
@ -505,7 +502,7 @@ func TestEphemeralVolumeOwnerCheck(t *testing.T) {
 func TestEphemeralVolumeEnablement(t *testing.T) {
 	// create dswp
 	pod, pv, pvc := createEphemeralVolumeObjects("dswp-test-pod", "dswp-test-volume-name", true /* owned */)
-	dswp, fakePodManager, fakesDSW, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, fakesDSW, _, fakePodState := createDswpWithVolume(t, pv, pvc)
 	fakePodManager.AddPod(pod)

 	podName := util.GetUniquePodName(pod)
@ -555,7 +552,7 @@ func TestEphemeralVolumeEnablement(t *testing.T) {
 	if !exist {
 		t.Fatalf("Failed to get pod by pod name: %s and namespace: %s", pod.Name, pod.Namespace)
 	}
-	podGet.Status.Phase = v1.PodFailed
+	fakePodState.removed = map[kubetypes.UID]struct{}{podGet.UID: {}}

 	// Pretend again that the feature is disabled.
 	// Removal of the pod and volumes is expected to work.
@ -620,7 +617,7 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods_Valid_Block_VolumeDevices(t
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, fakesDSW, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, fakesDSW, _, fakePodState := createDswpWithVolume(t, pv, pvc)

 	// create pod
 	containers := []v1.Container{
@ -674,9 +671,10 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods_Valid_Block_VolumeDevices(t
 	}
 	podGet.Status.Phase = v1.PodFailed
 	fakePodManager.DeletePod(pod)
-	//pod is added to fakePodManager but fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
-	dswp.findAndRemoveDeletedPods()
+	fakePodState.removed = map[kubetypes.UID]struct{}{pod.UID: {}}

+	//pod is added to fakePodManager but pod state knows the pod is removed, so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted
+	dswp.findAndRemoveDeletedPods()
 	if dswp.pods.processedPods[podName] {
 		t.Fatalf("Failed to remove pods from desired state of world since they no longer exist")
 	}
@ -727,7 +725,7 @@ func TestCreateVolumeSpec_Valid_File_VolumeMounts(t *testing.T) {
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)

 	// create pod
 	containers := []v1.Container{
@ -773,7 +771,7 @@ func TestCreateVolumeSpec_Valid_Nil_VolumeMounts(t *testing.T) {
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)

 	// create pod
 	containers := []v1.Container{
@ -819,7 +817,7 @@ func TestCreateVolumeSpec_Valid_Block_VolumeDevices(t *testing.T) {
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)

 	// create pod
 	containers := []v1.Container{
@ -865,7 +863,7 @@ func TestCreateVolumeSpec_Invalid_File_VolumeDevices(t *testing.T) {
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)

 	// create pod
 	containers := []v1.Container{
@ -911,7 +909,7 @@ func TestCreateVolumeSpec_Invalid_Block_VolumeMounts(t *testing.T) {
 			Phase: v1.ClaimBound,
 		},
 	}
-	dswp, fakePodManager, _, _ := createDswpWithVolume(t, pv, pvc)
+	dswp, fakePodManager, _, _, _ := createDswpWithVolume(t, pv, pvc)

 	// create pod
 	containers := []v1.Container{
@ -1068,7 +1066,7 @@ func TestCheckVolumeFSResize(t *testing.T) {
 			},
 		}

-		dswp, fakePodManager, fakeDSW, _ := createDswpWithVolume(t, pv, pvc)
+		dswp, fakePodManager, fakeDSW, _, _ := createDswpWithVolume(t, pv, pvc)
 		fakeASW := dswp.actualStateOfWorld
 		containers := []v1.Container{}

@ -1290,14 +1288,29 @@ func createEphemeralVolumeObjects(podName, volumeName string, owned bool) (pod *
 	return
 }

-func createDswpWithVolume(t *testing.T, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime) {
+func createDswpWithVolume(t *testing.T, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime, *fakePodStateProvider) {
 	fakeVolumePluginMgr, _ := volumetesting.GetTestKubeletVolumePluginMgr(t)
-	dswp, fakePodManager, fakesDSW, fakeRuntime := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr)
-	return dswp, fakePodManager, fakesDSW, fakeRuntime
+	dswp, fakePodManager, fakesDSW, fakeRuntime, fakeStateProvider := createDswpWithVolumeWithCustomPluginMgr(t, pv, pvc, fakeVolumePluginMgr)
+	return dswp, fakePodManager, fakesDSW, fakeRuntime, fakeStateProvider
+}
+
+type fakePodStateProvider struct {
+	terminating map[kubetypes.UID]struct{}
+	removed     map[kubetypes.UID]struct{}
+}
+
+func (p *fakePodStateProvider) ShouldPodContainersBeTerminating(uid kubetypes.UID) bool {
+	_, ok := p.terminating[uid]
+	return ok
+}
+
+func (p *fakePodStateProvider) ShouldPodRuntimeBeRemoved(uid kubetypes.UID) bool {
+	_, ok := p.removed[uid]
+	return ok
 }

 func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolume, pvc *v1.PersistentVolumeClaim,
-	fakeVolumePluginMgr *volume.VolumePluginMgr) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime) {
+	fakeVolumePluginMgr *volume.VolumePluginMgr) (*desiredStateOfWorldPopulator, kubepod.Manager, cache.DesiredStateOfWorld, *containertest.FakeRuntime, *fakePodStateProvider) {
 	fakeClient := &fake.Clientset{}
 	fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) {
 		return true, pvc, nil
@ -1314,8 +1327,7 @@ func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolu
 	fakesDSW := cache.NewDesiredStateOfWorld(fakeVolumePluginMgr)
 	fakeASW := cache.NewActualStateOfWorld("fake", fakeVolumePluginMgr)
 	fakeRuntime := &containertest.FakeRuntime{}
-
-	fakeStatusManager := status.NewManager(fakeClient, fakePodManager, &statustest.FakePodDeletionSafetyProvider{})
+	fakeStateProvider := &fakePodStateProvider{}

 	csiTranslator := csitrans.New()
 	dswp := &desiredStateOfWorldPopulator{
@ -1323,7 +1335,7 @@ func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolu
 		loopSleepDuration:         100 * time.Millisecond,
 		getPodStatusRetryDuration: 2 * time.Second,
 		podManager:                fakePodManager,
-		podStatusProvider:         fakeStatusManager,
+		podStateProvider:          fakeStateProvider,
 		desiredStateOfWorld:       fakesDSW,
 		actualStateOfWorld:        fakeASW,
 		pods: processedPods{
@ -1334,5 +1346,5 @@ func createDswpWithVolumeWithCustomPluginMgr(t *testing.T, pv *v1.PersistentVolu
 		intreeToCSITranslator:    csiTranslator,
 		volumePluginMgr:          fakeVolumePluginMgr,
 	}
-	return dswp, fakePodManager, fakesDSW, fakeRuntime
+	return dswp, fakePodManager, fakesDSW, fakeRuntime, fakeStateProvider
 }
--- a/pkg/kubelet/volumemanager/volume_manager.go
+++ b/pkg/kubelet/volumemanager/volume_manager.go
@ -39,7 +39,6 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/config"
 	"k8s.io/kubernetes/pkg/kubelet/container"
 	"k8s.io/kubernetes/pkg/kubelet/pod"
-	"k8s.io/kubernetes/pkg/kubelet/status"
 	"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
 	"k8s.io/kubernetes/pkg/kubelet/volumemanager/metrics"
 	"k8s.io/kubernetes/pkg/kubelet/volumemanager/populator"
@ -106,6 +105,13 @@ type VolumeManager interface {
 	// the duration defined in podAttachAndMountTimeout.
 	WaitForAttachAndMount(pod *v1.Pod) error

+	// WaitForUnmount processes the volumes referenced in the specified
+	// pod and blocks until they are all unmounted (reflected in the actual
+	// state of the world).
+	// An error is returned if all volumes are not unmounted within
+	// the duration defined in podAttachAndMountTimeout.
+	WaitForUnmount(pod *v1.Pod) error
+
 	// GetMountedVolumesForPod returns a VolumeMap containing the volumes
 	// referenced by the specified pod that are successfully attached and
 	// mounted. The key in the map is the OuterVolumeSpecName (i.e.
@ -149,6 +155,12 @@ type VolumeManager interface {
 	MarkVolumesAsReportedInUse(volumesReportedAsInUse []v1.UniqueVolumeName)
 }

+// podStateProvider can determine if a pod is is going to be terminated
+type podStateProvider interface {
+	ShouldPodContainersBeTerminating(k8stypes.UID) bool
+	ShouldPodRuntimeBeRemoved(k8stypes.UID) bool
+}
+
 // NewVolumeManager returns a new concrete instance implementing the
 // VolumeManager interface.
 //
@ -160,7 +172,7 @@ func NewVolumeManager(
 	controllerAttachDetachEnabled bool,
 	nodeName k8stypes.NodeName,
 	podManager pod.Manager,
-	podStatusProvider status.PodStatusProvider,
+	podStateProvider podStateProvider,
 	kubeClient clientset.Interface,
 	volumePluginMgr *volume.VolumePluginMgr,
 	kubeContainerRuntime container.Runtime,
@ -195,7 +207,7 @@ func NewVolumeManager(
 		desiredStateOfWorldPopulatorLoopSleepPeriod,
 		desiredStateOfWorldPopulatorGetPodStatusRetryDuration,
 		podManager,
-		podStatusProvider,
+		podStateProvider,
 		vm.desiredStateOfWorld,
 		vm.actualStateOfWorld,
 		kubeContainerRuntime,
@ -426,6 +438,42 @@ func (vm *volumeManager) WaitForAttachAndMount(pod *v1.Pod) error {
 	return nil
 }

+func (vm *volumeManager) WaitForUnmount(pod *v1.Pod) error {
+	if pod == nil {
+		return nil
+	}
+
+	klog.V(3).InfoS("Waiting for volumes to unmount for pod", "pod", klog.KObj(pod))
+	uniquePodName := util.GetUniquePodName(pod)
+
+	vm.desiredStateOfWorldPopulator.ReprocessPod(uniquePodName)
+
+	err := wait.PollImmediate(
+		podAttachAndMountRetryInterval,
+		podAttachAndMountTimeout,
+		vm.verifyVolumesUnmountedFunc(uniquePodName))
+
+	if err != nil {
+		var mountedVolumes []string
+		for _, v := range vm.actualStateOfWorld.GetMountedVolumesForPod(uniquePodName) {
+			mountedVolumes = append(mountedVolumes, v.OuterVolumeSpecName)
+		}
+		sort.Strings(mountedVolumes)
+
+		if len(mountedVolumes) == 0 {
+			return nil
+		}
+
+		return fmt.Errorf(
+			"mounted volumes=%v: %s",
+			mountedVolumes,
+			err)
+	}
+
+	klog.V(3).InfoS("All volumes are unmounted for pod", "pod", klog.KObj(pod))
+	return nil
+}
+
 // getUnattachedVolumes returns a list of the volumes that are expected to be attached but
 // are not currently attached to the node
 func (vm *volumeManager) getUnattachedVolumes(expectedVolumes []string) []string {
@ -449,6 +497,17 @@ func (vm *volumeManager) verifyVolumesMountedFunc(podName types.UniquePodName, e
 	}
 }

+// verifyVolumesUnmountedFunc returns a method that is true when there are no mounted volumes for this
+// pod.
+func (vm *volumeManager) verifyVolumesUnmountedFunc(podName types.UniquePodName) wait.ConditionFunc {
+	return func() (done bool, err error) {
+		if errs := vm.desiredStateOfWorld.PopPodErrors(podName); len(errs) > 0 {
+			return true, errors.New(strings.Join(errs, "; "))
+		}
+		return len(vm.actualStateOfWorld.GetMountedVolumesForPod(podName)) == 0, nil
+	}
+}
+
 // getUnmountedVolumes fetches the current list of mounted volumes from
 // the actual state of the world, and uses it to process the list of
 // expectedVolumes. It returns a list of unmounted volumes.
--- a/pkg/kubelet/volumemanager/volume_manager_fake.go
+++ b/pkg/kubelet/volumemanager/volume_manager_fake.go
@ -50,6 +50,11 @@ func (f *FakeVolumeManager) WaitForAttachAndMount(pod *v1.Pod) error {
 	return nil
 }

+// WaitForUnmount is not implemented
+func (f *FakeVolumeManager) WaitForUnmount(pod *v1.Pod) error {
+	return nil
+}
+
 // GetMountedVolumesForPod is not implemented
 func (f *FakeVolumeManager) GetMountedVolumesForPod(podName types.UniquePodName) container.VolumeMap {
 	return nil
--- a/pkg/kubelet/volumemanager/volume_manager_test.go
+++ b/pkg/kubelet/volumemanager/volume_manager_test.go
@ -28,6 +28,7 @@ import (

 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	kubetypes "k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/apimachinery/pkg/util/wait"
 	clientset "k8s.io/client-go/kubernetes"
@ -40,8 +41,6 @@ import (
 	kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
 	podtest "k8s.io/kubernetes/pkg/kubelet/pod/testing"
 	"k8s.io/kubernetes/pkg/kubelet/secret"
-	"k8s.io/kubernetes/pkg/kubelet/status"
-	statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
 	"k8s.io/kubernetes/pkg/volume"
 	volumetest "k8s.io/kubernetes/pkg/volume/testing"
 	"k8s.io/kubernetes/pkg/volume/util"
@ -269,19 +268,34 @@ func TestGetExtraSupplementalGroupsForPod(t *testing.T) {
 	}
 }

+type fakePodStateProvider struct {
+	shouldRemove map[kubetypes.UID]struct{}
+	terminating  map[kubetypes.UID]struct{}
+}
+
+func (p *fakePodStateProvider) ShouldPodRuntimeBeRemoved(uid kubetypes.UID) bool {
+	_, ok := p.shouldRemove[uid]
+	return ok
+}
+
+func (p *fakePodStateProvider) ShouldPodContainersBeTerminating(uid kubetypes.UID) bool {
+	_, ok := p.terminating[uid]
+	return ok
+}
+
 func newTestVolumeManager(t *testing.T, tmpDir string, podManager kubepod.Manager, kubeClient clientset.Interface) VolumeManager {
 	plug := &volumetest.FakeVolumePlugin{PluginName: "fake", Host: nil}
 	fakeRecorder := &record.FakeRecorder{}
 	plugMgr := &volume.VolumePluginMgr{}
 	// TODO (#51147) inject mock prober
 	plugMgr.InitPlugins([]volume.VolumePlugin{plug}, nil /* prober */, volumetest.NewFakeKubeletVolumeHost(t, tmpDir, kubeClient, nil))
-	statusManager := status.NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{})
+	stateProvider := &fakePodStateProvider{}
 	fakePathHandler := volumetest.NewBlockVolumePathHandler()
 	vm := NewVolumeManager(
 		true,
 		testHostname,
 		podManager,
-		statusManager,
+		stateProvider,
 		kubeClient,
 		plugMgr,
 		&containertest.FakeRuntime{},
--- a/pkg/volume/util/util.go
+++ b/pkg/volume/util/util.go
@ -381,7 +381,10 @@ func GetUniqueVolumeNameFromSpec(

 // IsPodTerminated checks if pod is terminated
 func IsPodTerminated(pod *v1.Pod, podStatus v1.PodStatus) bool {
-	return podStatus.Phase == v1.PodFailed || podStatus.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(podStatus.ContainerStatuses))
+	// TODO: the guarantees provided by kubelet status are not sufficient to guarantee it's safe to ignore a deleted pod,
+	// even if everything is notRunning (kubelet does not guarantee that when pod status is waiting that it isn't trying
+	// to start a container).
+	return podStatus.Phase == v1.PodFailed || podStatus.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(podStatus.InitContainerStatuses) && notRunning(podStatus.ContainerStatuses) && notRunning(podStatus.EphemeralContainerStatuses))
 }

 // notRunning returns true if every status is terminated or waiting, or the status list
--- a/test/e2e/apps/statefulset.go
+++ b/test/e2e/apps/statefulset.go
@ -756,6 +756,10 @@ var _ = SIGDescribe("StatefulSet", func() {
 			}
 			pod, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(context.TODO(), pod, metav1.CreateOptions{})
 			framework.ExpectNoError(err)
+			ginkgo.By("Waiting until pod " + podName + " will start running in namespace " + f.Namespace.Name)
+			if err := e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, podName, f.Namespace.Name); err != nil {
+				framework.Failf("Pod %v did not start running: %v", podName, err)
+			}

 			ginkgo.By("Creating statefulset with conflicting port in namespace " + f.Namespace.Name)
 			ss := e2estatefulset.NewStatefulSet(ssName, f.Namespace.Name, headlessSvcName, 1, nil, nil, labels)
@ -765,11 +769,6 @@ var _ = SIGDescribe("StatefulSet", func() {
 			_, err = f.ClientSet.AppsV1().StatefulSets(f.Namespace.Name).Create(context.TODO(), ss, metav1.CreateOptions{})
 			framework.ExpectNoError(err)

-			ginkgo.By("Waiting until pod " + podName + " will start running in namespace " + f.Namespace.Name)
-			if err := e2epod.WaitForPodNameRunningInNamespace(f.ClientSet, podName, f.Namespace.Name); err != nil {
-				framework.Failf("Pod %v did not start running: %v", podName, err)
-			}
-
 			var initialStatefulPodUID types.UID
 			ginkgo.By("Waiting until stateful pod " + statefulPodName + " will be recreated and deleted at least once in namespace " + f.Namespace.Name)

--- a/test/e2e/node/pods.go
+++ b/test/e2e/node/pods.go
@ -387,6 +387,8 @@ var _ = SIGDescribe("Pods Extended", func() {
 									// pod volume teardown races with container start in CRI, which reports a failure
 									framework.Logf("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766", pod.Name, pod.Spec.NodeName)
 								default:
+									data, _ := json.MarshalIndent(pod.Status, "", "  ")
+									framework.Logf("pod %s on node %s had incorrect final status:\n%s", string(data))
 									return fmt.Errorf("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s", pod.Name, pod.Spec.NodeName, t.ExitCode, t.StartedAt, t.FinishedAt, t.Reason, t.Message)
 								}
 								switch {