From de9cdab5ae3a2615dec4ad93854176d592670e62 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Tue, 13 Jul 2021 11:06:13 -0400 Subject: [PATCH] kubelet: Prevent runtime-only pods from going into terminated phase If a pod is already in terminated and the housekeeping loop sees an out of date cache entry for a running container, the pod worker should ignore that running pod termination request. Once the worker completes, a subsequent housekeeping invocation will then invoke terminating because the worker is no longer processing any pod with that UID. This does leave the possibility of syncTerminatedPod being blocked if a container in the pod is started after killPod successfully completes but before syncTerminatedPod can exit successfully, perhaps because the terminated flow (detach volumes) is blocked on that running container. A future change will address that issue. --- pkg/kubelet/pod_workers.go | 8 ++++++++ pkg/kubelet/pod_workers_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/pkg/kubelet/pod_workers.go b/pkg/kubelet/pod_workers.go index 9cfb169aca9..297900cca1c 100644 --- a/pkg/kubelet/pod_workers.go +++ b/pkg/kubelet/pod_workers.go @@ -537,6 +537,14 @@ func (p *podWorkers) UpdatePod(options UpdatePodOptions) { var wasGracePeriodShortened bool switch { case status.IsTerminated(): + // A terminated pod may still be waiting for cleanup - if we receive a runtime pod kill request + // due to housekeeping seeing an older cached version of the runtime pod simply ignore it until + // after the pod worker completes. + if isRuntimePod { + klog.V(3).InfoS("Pod is waiting for termination, ignoring runtime-only kill until after pod worker is fully terminated", "pod", klog.KObj(pod), "podUID", pod.UID) + return + } + workType = TerminatedPodWork if options.KillPodOptions != nil { diff --git a/pkg/kubelet/pod_workers_test.go b/pkg/kubelet/pod_workers_test.go index a23daa8ca6b..50705275ecd 100644 --- a/pkg/kubelet/pod_workers_test.go +++ b/pkg/kubelet/pod_workers_test.go @@ -293,6 +293,35 @@ func TestUpdatePodForRuntimePod(t *testing.T) { } } +func TestUpdatePodForTerminatedRuntimePod(t *testing.T) { + podWorkers, processed := createPodWorkers() + + now := time.Now() + podWorkers.podSyncStatuses[types.UID("1")] = &podSyncStatus{ + startedTerminating: true, + terminatedAt: now.Add(-time.Second), + terminatingAt: now.Add(-2 * time.Second), + gracePeriod: 1, + } + + // creates synthetic pod + podWorkers.UpdatePod(UpdatePodOptions{ + UpdateType: kubetypes.SyncPodKill, + RunningPod: &kubecontainer.Pod{ID: "1", Name: "1", Namespace: "test"}, + }) + drainAllWorkers(podWorkers) + if len(processed) != 0 { + t.Fatalf("Not all pods processed: %v", processed) + } + updates := processed["1"] + if len(updates) != 0 { + t.Fatalf("unexpected updates: %v", updates) + } + if len(podWorkers.lastUndeliveredWorkUpdate) != 0 { + t.Fatalf("Unexpected undelivered work") + } +} + func TestUpdatePodDoesNotForgetSyncPodKill(t *testing.T) { podWorkers, processed := createPodWorkers() numPods := 20