diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 89eff1e5cc7..e5fe6c5e078 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1713,6 +1713,9 @@ func (kl *Kubelet) syncPod(ctx context.Context, updateType kubetypes.SyncPodType // Fetch the pull secrets for the pod pullSecrets := kl.getPullSecretsForPod(pod) + // Ensure the pod is being probed + kl.probeManager.AddPod(pod) + // Call the container runtime's SyncPod callback result := kl.containerRuntime.SyncPod(pod, podStatus, pullSecrets, kl.backOff) kl.reasonCache.Update(pod.UID, result) @@ -1766,10 +1769,16 @@ func (kl *Kubelet) syncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatu kl.statusManager.SetPodStatus(pod, apiPodStatus) if gracePeriod != nil { + if *gracePeriod <= 1 { + // If we plan to terminate quickly, stop probes immediately, otherwise we will wait until the pod is completely done + kl.probeManager.RemovePod(pod) + } klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", *gracePeriod) } else { klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", nil) } + kl.probeManager.StopLivenessAndStartup(pod) + p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus) if err := kl.killPod(pod, p, gracePeriod); err != nil { kl.recorder.Eventf(pod, v1.EventTypeWarning, events.FailedToKillPod, "error killing pod: %v", err) @@ -1778,6 +1787,12 @@ func (kl *Kubelet) syncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatu return err } + // Once the containers are stopped, we can stop probing for liveness and readiness. + // TODO: once a pod is terminal, certain probes (liveness exec) could be stopped immediately after + // the detection of a container shutdown or (for readiness) after the first failure. Tracked as + // https://github.com/kubernetes/kubernetes/issues/107894 although may not be worth optimizing. + kl.probeManager.RemovePod(pod) + // Guard against consistency issues in KillPod implementations by checking that there are no // running containers. This method is invoked infrequently so this is effectively free and can // catch race conditions introduced by callers updating pod status out of order. @@ -2233,9 +2248,6 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) { } mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) kl.dispatchWork(pod, kubetypes.SyncPodCreate, mirrorPod, start) - // TODO: move inside syncPod and make reentrant - // https://github.com/kubernetes/kubernetes/issues/105014 - kl.probeManager.AddPod(pod) } } @@ -2269,10 +2281,6 @@ func (kl *Kubelet) HandlePodRemoves(pods []*v1.Pod) { if err := kl.deletePod(pod); err != nil { klog.V(2).InfoS("Failed to delete pod", "pod", klog.KObj(pod), "err", err) } - // TODO: move inside syncTerminatingPod|syncTerminatedPod (we should stop probing - // once the pod kill is acknowledged and during eviction) - // https://github.com/kubernetes/kubernetes/issues/105014 - kl.probeManager.RemovePod(pod) } } diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index a6d11d2ae48..0cdf20d2ba5 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1205,9 +1205,6 @@ func (kl *Kubelet) HandlePodCleanups() error { mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod) klog.V(3).InfoS("Pod is restartable after termination due to UID reuse", "pod", klog.KObj(pod), "podUID", pod.UID) kl.dispatchWork(pod, kubetypes.SyncPodCreate, mirrorPod, start) - // TODO: move inside syncPod and make reentrant - // https://github.com/kubernetes/kubernetes/issues/105014 - kl.probeManager.AddPod(pod) } return nil diff --git a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go index 6974d82acf4..a9733b70210 100644 --- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go +++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go @@ -364,9 +364,6 @@ func (m *managerImpl) processShutdownEvent() error { gracePeriodOverride := group.ShutdownGracePeriodSeconds - // Stop probes for the pod - m.probeManager.RemovePod(pod) - // If the pod's spec specifies a termination gracePeriod which is less than the gracePeriodOverride calculated, use the pod spec termination gracePeriod. if pod.Spec.TerminationGracePeriodSeconds != nil && *pod.Spec.TerminationGracePeriodSeconds <= gracePeriodOverride { gracePeriodOverride = *pod.Spec.TerminationGracePeriodSeconds diff --git a/pkg/kubelet/prober/prober_manager.go b/pkg/kubelet/prober/prober_manager.go index 37282748f65..d8295930383 100644 --- a/pkg/kubelet/prober/prober_manager.go +++ b/pkg/kubelet/prober/prober_manager.go @@ -57,6 +57,9 @@ type Manager interface { // pod created. AddPod(pod *v1.Pod) + // StopLivenessAndStartup handles stopping liveness and startup probes during termination. + StopLivenessAndStartup(pod *v1.Pod) + // RemovePod handles cleaning up the removed pod state, including terminating probe workers and // deleting cached results. RemovePod(pod *v1.Pod) @@ -195,6 +198,22 @@ func (m *manager) AddPod(pod *v1.Pod) { } } +func (m *manager) StopLivenessAndStartup(pod *v1.Pod) { + m.workerLock.RLock() + defer m.workerLock.RUnlock() + + key := probeKey{podUID: pod.UID} + for _, c := range pod.Spec.Containers { + key.containerName = c.Name + for _, probeType := range [...]probeType{liveness, startup} { + key.probeType = probeType + if worker, ok := m.workers[key]; ok { + worker.stop() + } + } + } +} + func (m *manager) RemovePod(pod *v1.Pod) { m.workerLock.RLock() defer m.workerLock.RUnlock() diff --git a/pkg/kubelet/prober/testing/fake_manager.go b/pkg/kubelet/prober/testing/fake_manager.go index 41910382d38..500cfcd9962 100644 --- a/pkg/kubelet/prober/testing/fake_manager.go +++ b/pkg/kubelet/prober/testing/fake_manager.go @@ -33,6 +33,9 @@ func (FakeManager) AddPod(_ *v1.Pod) {} // RemovePod simulates removing a Pod. func (FakeManager) RemovePod(_ *v1.Pod) {} +// Simulated stopping liveness and startup probes. +func (FakeManager) StopLivenessAndStartup(_ *v1.Pod) {} + // CleanupPods simulates cleaning up Pods. func (FakeManager) CleanupPods(_ map[types.UID]sets.Empty) {}