kubelet: Don't delete pod until all container status is available

After a pod reaches a terminal state and all containers are complete
we can delete the pod from the API server. The dispatchWork method
needs to wait for all container status to be available before invoking
delete. Even after the worker stops, status updates will continue to
be delivered and the sync handler will continue to sync the pods, so
dispatchWork gets multiple opportunities to see status.

The previous code assumed that a pod in Failed or Succeeded had no
running containers, but eviction or deletion of running pods could
still have running containers whose status needed to be reported.

This modifies earlier test to guarantee that the "fallback" exit
code 137 is never reported to match the expectation that all pods
exit with valid status for all containers (unless some exceptional
failure like eviction were to occur while the test is running).
This commit is contained in:
Yu-Ju Hong 2019-05-15 11:30:23 -07:00 committed by Clayton Coleman
parent ad3d8949f0
commit 2364c10e2e
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
2 changed files with 36 additions and 15 deletions

View File

@ -2001,19 +2001,22 @@ func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handle
}
// dispatchWork starts the asynchronous sync of the pod in a pod worker.
// If the pod is terminated, dispatchWork will perform no action.
// If the pod has completed termination, dispatchWork will perform no action.
func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
if kl.podIsTerminated(pod) {
klog.V(4).Infof("Pod %q is terminated, ignoring remaining sync work: %s", format.Pod(pod), syncType)
if pod.DeletionTimestamp != nil {
// If the pod is in a terminated state, there is no pod worker to
// handle the work item. Check if the DeletionTimestamp has been
// set, and force a status update to trigger a pod deletion request
// to the apiserver.
kl.statusManager.TerminatePod(pod)
}
// check whether we are ready to delete the pod from the API server (all status up to date)
containersTerminal, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
if pod.DeletionTimestamp != nil && containersTerminal {
klog.V(4).Infof("Pod %q has completed execution and should be deleted from the API server: %s", format.Pod(pod), syncType)
kl.statusManager.TerminatePod(pod)
return
}
// optimization: avoid invoking the pod worker if no further changes are possible to the pod definition
if podWorkerTerminal {
klog.V(4).Infof("Pod %q has completed, ignoring remaining sync work: %s", format.Pod(pod), syncType)
return
}
// Run the sync in an async worker.
kl.podWorkers.UpdatePod(&UpdatePodOptions{
Pod: pod,

View File

@ -865,8 +865,9 @@ func (kl *Kubelet) getPullSecretsForPod(pod *v1.Pod) []v1.Secret {
return pullSecrets
}
// podIsTerminated returns true if pod is in the terminated state ("Failed" or "Succeeded").
func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
// podStatusIsTerminal reports when the specified pod has no running containers or is no longer accepting
// spec changes.
func (kl *Kubelet) podAndContainersAreTerminal(pod *v1.Pod) (containersTerminal, podWorkerTerminal bool) {
// Check the cached pod status which was set after the last sync.
status, ok := kl.statusManager.GetPodStatus(pod.UID)
if !ok {
@ -875,11 +876,28 @@ func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
// restarted.
status = pod.Status
}
return status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && notRunning(status.ContainerStatuses))
// A pod transitions into failed or succeeded from either container lifecycle (RestartNever container
// fails) or due to external events like deletion or eviction. A terminal pod *should* have no running
// containers, but to know that the pod has completed its lifecycle you must wait for containers to also
// be terminal.
containersTerminal = notRunning(status.ContainerStatuses)
// The kubelet must accept config changes from the pod spec until it has reached a point where changes would
// have no effect on any running container.
podWorkerTerminal = status.Phase == v1.PodFailed || status.Phase == v1.PodSucceeded || (pod.DeletionTimestamp != nil && containersTerminal)
return
}
// IsPodTerminated returns true if the pod with the provided UID is in a terminated state ("Failed" or "Succeeded")
// or if the pod has been deleted or removed
// podIsTerminated returns true if the provided pod is in a terminal phase ("Failed", "Succeeded") or
// has been deleted and has no running containers. This corresponds to when a pod must accept changes to
// its pod spec (e.g. terminating containers allow grace period to be shortened).
func (kl *Kubelet) podIsTerminated(pod *v1.Pod) bool {
_, podWorkerTerminal := kl.podAndContainersAreTerminal(pod)
return podWorkerTerminal
}
// IsPodTerminated returns true if the pod with the provided UID is in a terminal phase ("Failed",
// "Succeeded") or has been deleted and has no running containers. This corresponds to when a pod must
// accept changes to its pod spec (e.g. terminating containers allow grace period to be shortened)
func (kl *Kubelet) IsPodTerminated(uid types.UID) bool {
pod, podFound := kl.podManager.GetPodByUID(uid)
if !podFound {