mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
kubelet: Don't delete pod until all container status is available
After a pod reaches a terminal state and all containers are complete we can delete the pod from the API server. The dispatchWork method needs to wait for all container status to be available before invoking delete. Even after the worker stops, status updates will continue to be delivered and the sync handler will continue to sync the pods, so dispatchWork gets multiple opportunities to see status. The previous code assumed that a pod in Failed or Succeeded had no running containers, but eviction or deletion of running pods could still have running containers whose status needed to be reported. This modifies earlier test to guarantee that the "fallback" exit code 137 is never reported to match the expectation that all pods exit with valid status for all containers (unless some exceptional failure like eviction were to occur while the test is running).
This commit is contained in:
parent
ad3d8949f0
commit
2364c10e2e
@ -2001,19 +2001,22 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle
|
||||
}
|
||||
|
||||
// dispatchWork starts the asynchronous sync of the pod in a pod worker.
|
||||
// If the pod is terminated, dispatchWork will perform no action.
|
||||
// If the pod has completed termination, dispatchWork will perform no action.
|
||||
func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
|
||||
if kl.podIsTerminated(pod) {
|
||||
klog.V(4).Infof("Pod %q is terminated, ignoring remaining sync work: %s", format.Pod(pod), syncType)
|
||||
if pod.DeletionTimestamp != nil {
|
||||
// If the pod is in a terminated state, there is no pod worker to
|
||||
// handle the work item. Check if the DeletionTimestamp has been
|
||||
// set, and force a status update to trigger a pod deletion request
|
||||
// to the apiserver.
|
||||
kl.statusManager.TerminatePod(pod)
|
||||
}
|
||||
// check whether we are ready to delete the pod from the API server (all status up to date)
|
||||
containersTerminal, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
|
||||
if pod.DeletionTimestamp != nil && containersTerminal {
|
||||
klog.V(4).Infof("Pod %q has completed execution and should be deleted from the API server: %s", format.Pod(pod), syncType)
|
||||
kl.statusManager.TerminatePod(pod)
|
||||
return
|
||||
}
|
||||
|
||||
// optimization: avoid invoking the pod worker if no further changes are possible to the pod definition
|
||||
if podWorkerTerminal {
|
||||
klog.V(4).Infof("Pod %q has completed, ignoring remaining sync work: %s", format.Pod(pod), syncType)
|
||||
return
|
||||
}
|
||||
|
||||
// Run the sync in an async worker.
|
||||
kl.podWorkers.UpdatePod(&UpdatePodOptions{
|
||||
Pod: pod,
|
||||
|
@ -865,8 +865,9 @@ func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
|
||||
return pullSecrets
|
||||
}
|
||||
|
||||
// podIsTerminated returns true if pod is in the terminated state ("Failed" or "Succeeded").
|
||||
func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
|
||||
// podStatusIsTerminal reports when the specified pod has no running containers or is no longer accepting
|
||||
// spec changes.
|
||||
func (kl *Kubelet) podAndContainersAreTerminal(pod *v1.Pod) (containersTerminal, podWorkerTerminal bool) {
|
||||
// Check the cached pod status which was set after the last sync.
|
||||
status, ok := kl.statusManager.GetPodStatus(pod.UID)
|
||||
if !ok {
|
||||
@ -875,11 +876,28 @@ func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
|
||||
// restarted.
|
||||
status = pod.Status
|
||||
}
|
||||
return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
|
||||
// A pod transitions into failed or succeeded from either container lifecycle (RestartNever container
|
||||
// fails) or due to external events like deletion or eviction. A terminal pod *should* have no running
|
||||
// containers, but to know that the pod has completed its lifecycle you must wait for containers to also
|
||||
// be terminal.
|
||||
containersTerminal = notRunning(status.ContainerStatuses)
|
||||
// The kubelet must accept config changes from the pod spec until it has reached a point where changes would
|
||||
// have no effect on any running container.
|
||||
podWorkerTerminal = status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && containersTerminal)
|
||||
return
|
||||
}
|
||||
|
||||
// IsPodTerminated returns true if the pod with the provided UID is in a terminated state ("Failed" or "Succeeded")
|
||||
// or if the pod has been deleted or removed
|
||||
// podIsTerminated returns true if the provided pod is in a terminal phase ("Failed", "Succeeded") or
|
||||
// has been deleted and has no running containers. This corresponds to when a pod must accept changes to
|
||||
// its pod spec (e.g. terminating containers allow grace period to be shortened).
|
||||
func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
|
||||
_, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
|
||||
return podWorkerTerminal
|
||||
}
|
||||
|
||||
// IsPodTerminated returns true if the pod with the provided UID is in a terminal phase ("Failed",
|
||||
// "Succeeded") or has been deleted and has no running containers. This corresponds to when a pod must
|
||||
// accept changes to its pod spec (e.g. terminating containers allow grace period to be shortened)
|
||||
func (kl *Kubelet) IsPodTerminated(uid types.UID) bool {
|
||||
pod, podFound := kl.podManager.GetPodByUID(uid)
|
||||
if !podFound {
|
||||
|
Loading…
Reference in New Issue
Block a user