mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 11:38:15 +00:00
Fix the deletion of rejected pods
This commit is contained in:
parent
3e28404008
commit
e3ee9b9adc
@ -1173,6 +1173,16 @@ func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
|
||||
metrics.RestartedPodTotal.WithLabelValues("true").Add(float64(restartCountStatic))
|
||||
metrics.RestartedPodTotal.WithLabelValues("").Add(float64(restartCount))
|
||||
|
||||
// Complete termination of deleted pods that are not runtime pods (don't have
|
||||
// running containers), are terminal, and are not known to pod workers.
|
||||
// An example is pods rejected during kubelet admission that have never
|
||||
// started before (i.e. does not have an orphaned pod).
|
||||
// Triggering TerminatePod allows to proceed with force-deletion of such pods.
|
||||
for _, pod := range kl.filterTerminalPodsToDelete(allPods, runningRuntimePods, workingPods) {
|
||||
klog.V(3).InfoS("Completing termination of terminal pods scheduled for deletion", "pod", klog.KObj(pod), "podUID", pod.UID)
|
||||
kl.statusManager.TerminatePod(pod)
|
||||
}
|
||||
|
||||
// Finally, terminate any pods that are observed in the runtime but not present in the list of
|
||||
// known running pods from config. If we do terminate running runtime pods that will happen
|
||||
// asynchronously in the background and those will be processed in the next invocation of
|
||||
@ -1245,6 +1255,36 @@ func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filters terminal pods with deletion timestamp which are not runtime pods
|
||||
func (kl *Kubelet) filterTerminalPodsToDelete(allPods []*v1.Pod, runningRuntimePods []*kubecontainer.Pod, workingPods map[types.UID]PodWorkerSync) map[types.UID]*v1.Pod {
|
||||
terminalPodsToDelete := make(map[types.UID]*v1.Pod)
|
||||
for _, pod := range allPods {
|
||||
if pod.DeletionTimestamp == nil {
|
||||
// skip pods which don't have a deletion timestamp
|
||||
continue
|
||||
}
|
||||
if !kl.isAdmittedPodTerminal(pod) {
|
||||
// skip non-terminal pods
|
||||
continue
|
||||
}
|
||||
if _, knownPod := workingPods[pod.UID]; knownPod {
|
||||
// skip pods known to pod workers
|
||||
continue
|
||||
}
|
||||
if _, knownPod := kl.statusManager.GetPodStatus(pod.UID); !knownPod {
|
||||
// skip pods unknown to pod status manager
|
||||
continue
|
||||
}
|
||||
terminalPodsToDelete[pod.UID] = pod
|
||||
}
|
||||
for _, runningRuntimePod := range runningRuntimePods {
|
||||
// skip running runtime pods - they are handled by a dedicated routine
|
||||
// which terminates the containers
|
||||
delete(terminalPodsToDelete, runningRuntimePod.ID)
|
||||
}
|
||||
return terminalPodsToDelete
|
||||
}
|
||||
|
||||
// splitPodsByStatic separates a list of desired pods from the pod manager into
|
||||
// regular or static pods. Mirror pods are not valid config sources (a mirror pod
|
||||
// being created cannot cause the Kubelet to start running a static pod) and are
|
||||
|
@ -897,14 +897,16 @@ func (m *manager) canBeDeleted(pod *v1.Pod, status v1.PodStatus, podIsFinished b
|
||||
if pod.DeletionTimestamp == nil || kubetypes.IsMirrorPod(pod) {
|
||||
return false
|
||||
}
|
||||
// Delay deletion of pods until the phase is terminal.
|
||||
// Delay deletion of pods until the phase is terminal, based on pod.Status
|
||||
// which comes from pod manager.
|
||||
if !podutil.IsPodPhaseTerminal(pod.Status.Phase) {
|
||||
klog.V(3).InfoS("Delaying pod deletion as the phase is non-terminal", "phase", status.Phase, "pod", klog.KObj(pod), "podUID", pod.UID)
|
||||
// For debugging purposes we also log the kubelet's local phase, when the deletion is delayed.
|
||||
klog.V(3).InfoS("Delaying pod deletion as the phase is non-terminal", "phase", pod.Status.Phase, "localPhase", status.Phase, "pod", klog.KObj(pod), "podUID", pod.UID)
|
||||
return false
|
||||
}
|
||||
// If this is an update completing pod termination then we know the pod termination is finished.
|
||||
if podIsFinished {
|
||||
klog.V(3).InfoS("The pod termination is finished as SyncTerminatedPod completes its execution", "phase", status.Phase, "pod", klog.KObj(pod), "podUID", pod.UID)
|
||||
klog.V(3).InfoS("The pod termination is finished as SyncTerminatedPod completes its execution", "phase", pod.Status.Phase, "localPhase", status.Phase, "pod", klog.KObj(pod), "podUID", pod.UID)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -381,6 +381,145 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
|
||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||
})
|
||||
// Regression test for https://issues.k8s.io/118472
|
||||
ginkgo.It("should force-delete non-admissible pods created and deleted during kubelet restart", func(ctx context.Context) {
|
||||
podName := "rejected-deleted-pod" + string(uuid.NewUUID())
|
||||
gracePeriod := int64(30)
|
||||
nodeName := getNodeName(ctx, f)
|
||||
podSpec := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Namespace: f.Namespace.Name,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: nodeName,
|
||||
NodeSelector: map[string]string{
|
||||
"this-label": "does-not-exist-on-any-nodes",
|
||||
},
|
||||
TerminationGracePeriodSeconds: &gracePeriod,
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: podName,
|
||||
Image: imageutils.GetPauseImageName(),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
ginkgo.By("Stopping the kubelet")
|
||||
startKubelet := stopKubelet()
|
||||
|
||||
// wait until the kubelet health check will fail
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
|
||||
|
||||
// Create the pod bound to the node. It will remain in the Pending
|
||||
// phase as Kubelet is down.
|
||||
ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
|
||||
pod := e2epod.NewPodClient(f).Create(ctx, podSpec)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
|
||||
err := e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
|
||||
framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
|
||||
|
||||
// Restart Kubelet so that it proceeds with deletion
|
||||
ginkgo.By("Starting the kubelet")
|
||||
startKubelet()
|
||||
|
||||
// wait until the kubelet health check will succeed
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||
|
||||
// Wait for the Kubelet to be ready.
|
||||
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
||||
nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
|
||||
framework.ExpectNoError(err)
|
||||
return nodes == 1
|
||||
}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||
|
||||
ginkgo.By(fmt.Sprintf("After the kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
|
||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||
})
|
||||
// Regression test for an extended scenario for https://issues.k8s.io/118472
|
||||
ginkgo.It("should force-delete non-admissible pods that was admitted and running before kubelet restart", func(ctx context.Context) {
|
||||
nodeLabelKey := "custom-label-key-required"
|
||||
nodeLabelValueRequired := "custom-label-value-required-for-admission"
|
||||
podName := "rejected-deleted-run" + string(uuid.NewUUID())
|
||||
gracePeriod := int64(30)
|
||||
nodeName := getNodeName(ctx, f)
|
||||
pod := e2epod.MustMixinRestrictedPodSecurity(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
Namespace: f.Namespace.Name,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeSelector: map[string]string{
|
||||
nodeLabelKey: nodeLabelValueRequired,
|
||||
},
|
||||
NodeName: nodeName,
|
||||
TerminationGracePeriodSeconds: &gracePeriod,
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: podName,
|
||||
Image: imageutils.GetPauseImageName(),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Adding node label for node (%v) to allow admission of pod (%v/%v)", nodeName, f.Namespace.Name, podName))
|
||||
e2enode.AddOrUpdateLabelOnNode(f.ClientSet, nodeName, nodeLabelKey, nodeLabelValueRequired)
|
||||
ginkgo.DeferCleanup(func() { e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey) })
|
||||
|
||||
// Create the pod bound to the node. It will start, but will be rejected after kubelet restart.
|
||||
ginkgo.By(fmt.Sprintf("Creating a pod (%v/%v)", f.Namespace.Name, podName))
|
||||
pod = e2epod.NewPodClient(f).Create(ctx, pod)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Waiting for the pod (%v/%v) to be running", f.Namespace.Name, pod.Name))
|
||||
err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, f.Namespace.Name)
|
||||
framework.ExpectNoError(err, "Failed to await for the pod to be running: (%v/%v)", f.Namespace.Name, pod.Name)
|
||||
|
||||
ginkgo.By("Stopping the kubelet")
|
||||
startKubelet := stopKubelet()
|
||||
|
||||
// wait until the kubelet health check will fail
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Deleting the pod (%v/%v) to set a deletion timestamp", pod.Namespace, pod.Name))
|
||||
err = e2epod.NewPodClient(f).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod})
|
||||
framework.ExpectNoError(err, "Failed to delete the pod: %q", pod.Name)
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Removing node label for node (%v) to ensure the pod (%v/%v) is rejected after kubelet restart", nodeName, f.Namespace.Name, podName))
|
||||
e2enode.RemoveLabelOffNode(f.ClientSet, nodeName, nodeLabelKey)
|
||||
|
||||
// Restart Kubelet so that it proceeds with deletion
|
||||
ginkgo.By("Starting the kubelet")
|
||||
startKubelet()
|
||||
|
||||
// wait until the kubelet health check will succeed
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||
|
||||
// Wait for the Kubelet to be ready.
|
||||
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
||||
nodes, err := e2enode.TotalReady(ctx, f.ClientSet)
|
||||
framework.ExpectNoError(err)
|
||||
return nodes == 1
|
||||
}, time.Minute, f.Timeouts.Poll).Should(gomega.BeTrue())
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Once Kubelet is restarted, verify the pod (%v/%v) is deleted by kubelet", pod.Namespace, pod.Name))
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
return checkMirrorPodDisappear(ctx, f.ClientSet, pod.Name, pod.Namespace)
|
||||
}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user