kubelet: Prevent runtime-only pods from going into terminated phase

If a pod is already in terminated and the housekeeping loop sees an
out of date cache entry for a running container, the pod worker
should ignore that running pod termination request. Once the worker
completes, a subsequent housekeeping invocation will then invoke
terminating because the worker is no longer processing any pod
with that UID.

This does leave the possibility of syncTerminatedPod being blocked
if a container in the pod is started after killPod successfully
completes but before syncTerminatedPod can exit successfully,
perhaps because the terminated flow (detach volumes) is blocked on
that running container. A future change will address that issue.
This commit is contained in:
Clayton Coleman 2021-07-13 11:06:13 -04:00
parent 234d731182
commit de9cdab5ae
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
2 changed files with 37 additions and 0 deletions

View File

@ -537,6 +537,14 @@ func (p *podWorkers) UpdatePod(options UpdatePodOptions) {
var wasGracePeriodShortened bool
switch {
case status.IsTerminated():
// A terminated pod may still be waiting for cleanup - if we receive a runtime pod kill request
// due to housekeeping seeing an older cached version of the runtime pod simply ignore it until
// after the pod worker completes.
if isRuntimePod {
klog.V(3).InfoS("Pod is waiting for termination, ignoring runtime-only kill until after pod worker is fully terminated", "pod", klog.KObj(pod), "podUID", pod.UID)
return
}
workType = TerminatedPodWork
if options.KillPodOptions != nil {

View File

@ -293,6 +293,35 @@ func TestUpdatePodForRuntimePod(t *testing.T) {
}
}
func TestUpdatePodForTerminatedRuntimePod(t *testing.T) {
podWorkers, processed := createPodWorkers()
now := time.Now()
podWorkers.podSyncStatuses[types.UID("1")] = &podSyncStatus{
startedTerminating: true,
terminatedAt: now.Add(-time.Second),
terminatingAt: now.Add(-2 * time.Second),
gracePeriod: 1,
}
// creates synthetic pod
podWorkers.UpdatePod(UpdatePodOptions{
UpdateType: kubetypes.SyncPodKill,
RunningPod: &kubecontainer.Pod{ID: "1", Name: "1", Namespace: "test"},
})
drainAllWorkers(podWorkers)
if len(processed) != 0 {
t.Fatalf("Not all pods processed: %v", processed)
}
updates := processed["1"]
if len(updates) != 0 {
t.Fatalf("unexpected updates: %v", updates)
}
if len(podWorkers.lastUndeliveredWorkUpdate) != 0 {
t.Fatalf("Unexpected undelivered work")
}
}
func TestUpdatePodDoesNotForgetSyncPodKill(t *testing.T) {
podWorkers, processed := createPodWorkers()
numPods := 20