diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index 0388d579e55..4bffea17eea 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -397,3 +397,27 @@ func MakePortMappings(container *v1.Container) (ports []PortMapping) { } return } + +// HasAnyRegularContainerStarted returns true if any regular container has +// started, which indicates all init containers have been initialized. +func HasAnyRegularContainerStarted(spec *v1.PodSpec, statuses []v1.ContainerStatus) bool { + if len(statuses) == 0 { + return false + } + + containerNames := make(map[string]struct{}) + for _, c := range spec.Containers { + containerNames[c.Name] = struct{}{} + } + + for _, status := range statuses { + if _, ok := containerNames[status.Name]; !ok { + continue + } + if status.State.Running != nil || status.State.Terminated != nil { + return true + } + } + + return false +} diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index d1e5d8e3247..84e5207da95 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -1450,7 +1450,16 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod spec := pod.Spec pendingInitialization := 0 failedInitialization := 0 + + // regular init containers for _, container := range spec.InitContainers { + if kubetypes.IsRestartableInitContainer(&container) { + // Skip the restartable init containers here to handle them separately as + // they are slightly different from the init containers in terms of the + // pod phase. + continue + } + containerStatus, ok := podutil.GetContainerStatus(info, container.Name) if !ok { pendingInitialization++ @@ -1477,11 +1486,48 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod } } + // counters for restartable init and regular containers unknown := 0 running := 0 waiting := 0 stopped := 0 succeeded := 0 + + // restartable init containers + for _, container := range spec.InitContainers { + if !kubetypes.IsRestartableInitContainer(&container) { + // Skip the regular init containers, as they have been handled above. + continue + } + containerStatus, ok := podutil.GetContainerStatus(info, container.Name) + if !ok { + unknown++ + continue + } + + switch { + case containerStatus.State.Running != nil: + if containerStatus.Started == nil || !*containerStatus.Started { + pendingInitialization++ + } + running++ + case containerStatus.State.Terminated != nil: + // Do nothing here, as terminated restartable init containers are not + // taken into account for the pod phase. + case containerStatus.State.Waiting != nil: + if containerStatus.LastTerminationState.Terminated != nil { + // Do nothing here, as terminated restartable init containers are not + // taken into account for the pod phase. + } else { + pendingInitialization++ + waiting++ + } + default: + pendingInitialization++ + unknown++ + } + } + for _, container := range spec.Containers { containerStatus, ok := podutil.GetContainerStatus(info, container.Name) if !ok { @@ -1513,7 +1559,11 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod } switch { - case pendingInitialization > 0: + case pendingInitialization > 0 && + // This is needed to handle the case where the pod has been initialized but + // the restartable init containers are restarting and the pod should not be + // placed back into v1.PodPending since the regular containers have run. + !kubecontainer.HasAnyRegularContainerStarted(&spec, info): fallthrough case waiting > 0: klog.V(5).InfoS("Pod waiting > 0, pending") @@ -1529,7 +1579,8 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod if podIsTerminal { // TODO(#116484): Also assign terminal phase to static pods. if !kubetypes.IsStaticPod(pod) { - // All containers are terminated in success + // All regular containers are terminated in success and all restartable + // init containers are stopped. if stopped == succeeded { return v1.PodSucceeded } @@ -1543,8 +1594,8 @@ func getPhase(pod *v1.Pod, info []v1.ContainerStatus, podIsTerminal bool) v1.Pod return v1.PodRunning } if stopped == succeeded { - // RestartPolicy is not Always, and all - // containers are terminated in success + // RestartPolicy is not Always, all containers are terminated in success + // and all restartable init containers are stopped. return v1.PodSucceeded } if spec.RestartPolicy == v1.RestartPolicyNever { @@ -1649,7 +1700,7 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po } // ensure the probe managers have up to date status for containers - kl.probeManager.UpdatePodStatus(pod.UID, s) + kl.probeManager.UpdatePodStatus(pod, s) // preserve all conditions not owned by the kubelet s.Conditions = make([]v1.PodCondition, 0, len(pod.Status.Conditions)+1) @@ -1674,7 +1725,7 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po if utilfeature.DefaultFeatureGate.Enabled(features.PodReadyToStartContainersCondition) { s.Conditions = append(s.Conditions, status.GeneratePodReadyToStartContainersCondition(pod, podStatus)) } - s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(&pod.Spec, s.InitContainerStatuses, s.Phase)) + s.Conditions = append(s.Conditions, status.GeneratePodInitializedCondition(&pod.Spec, append(s.InitContainerStatuses, s.ContainerStatuses...), s.Phase)) s.Conditions = append(s.Conditions, status.GeneratePodReadyCondition(&pod.Spec, s.Conditions, s.ContainerStatuses, s.Phase)) s.Conditions = append(s.Conditions, status.GenerateContainersReadyCondition(&pod.Spec, s.ContainerStatuses, s.Phase)) s.Conditions = append(s.Conditions, v1.PodCondition{ diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 7d958234820..dd8e7a0ea06 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -60,6 +60,8 @@ import ( netutils "k8s.io/utils/net" ) +var containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways + func TestNodeHostsFileContent(t *testing.T) { testCases := []struct { hostsFileName string @@ -2080,6 +2082,16 @@ func runningState(cName string) v1.ContainerStatus { }, } } +func startedState(cName string) v1.ContainerStatus { + started := true + return v1.ContainerStatus{ + Name: cName, + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + Started: &started, + } +} func runningStateWithStartedAt(cName string, startedAt time.Time) v1.ContainerStatus { return v1.ContainerStatus{ Name: cName, @@ -2384,6 +2396,218 @@ func TestPodPhaseWithRestartAlwaysInitContainers(t *testing.T) { } } +func TestPodPhaseWithRestartAlwaysRestartableInitContainers(t *testing.T) { + desiredState := v1.PodSpec{ + NodeName: "machine", + InitContainers: []v1.Container{ + {Name: "containerX", RestartPolicy: &containerRestartPolicyAlways}, + }, + Containers: []v1.Container{ + {Name: "containerA"}, + {Name: "containerB"}, + }, + RestartPolicy: v1.RestartPolicyAlways, + } + + tests := []struct { + pod *v1.Pod + podIsTerminal bool + status v1.PodPhase + test string + }{ + {&v1.Pod{Spec: desiredState, Status: v1.PodStatus{}}, false, v1.PodPending, "empty, waiting"}, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + runningState("containerX"), + }, + }, + }, + false, + v1.PodPending, + "restartable init container running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + stoppedState("containerX"), + }, + }, + }, + false, + v1.PodPending, + "restartable init container stopped", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingStateWithLastTermination("containerX"), + }, + }, + }, + false, + v1.PodPending, + "restartable init container waiting, terminated zero", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingStateWithNonZeroTermination("containerX"), + }, + }, + }, + false, + v1.PodPending, + "restartable init container waiting, terminated non-zero", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingState("containerX"), + }, + }, + }, + false, + v1.PodPending, + "restartable init container waiting, not terminated", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + startedState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + }, + }, + }, + false, + v1.PodPending, + "restartable init container started, 1/2 regular container running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + startedState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + false, + v1.PodRunning, + "restartable init container started, all regular containers running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + runningState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + false, + v1.PodRunning, + "restartable init container running, all regular containers running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + stoppedState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + false, + v1.PodRunning, + "restartable init container stopped, all regular containers running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingStateWithLastTermination("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + runningState("containerB"), + }, + }, + }, + false, + v1.PodRunning, + "backoff crashloop restartable init container, all regular containers running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + failedState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + true, + v1.PodSucceeded, + "all regular containers succeeded and restartable init container failed with restart always, but the pod is terminal", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + succeededState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + true, + v1.PodSucceeded, + "all regular containers succeeded and restartable init container succeeded with restart always, but the pod is terminal", + }, + } + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, true)() + for _, test := range tests { + statusInfo := append(test.pod.Status.InitContainerStatuses[:], test.pod.Status.ContainerStatuses[:]...) + status := getPhase(test.pod, statusInfo, test.podIsTerminal) + assert.Equal(t, test.status, status, "[test %s]", test.test) + } +} + func TestPodPhaseWithRestartNever(t *testing.T) { desiredState := v1.PodSpec{ NodeName: "machine", @@ -2587,6 +2811,205 @@ func TestPodPhaseWithRestartNeverInitContainers(t *testing.T) { } } +func TestPodPhaseWithRestartNeverRestartableInitContainers(t *testing.T) { + desiredState := v1.PodSpec{ + NodeName: "machine", + InitContainers: []v1.Container{ + {Name: "containerX", RestartPolicy: &containerRestartPolicyAlways}, + }, + Containers: []v1.Container{ + {Name: "containerA"}, + {Name: "containerB"}, + }, + RestartPolicy: v1.RestartPolicyNever, + } + + tests := []struct { + pod *v1.Pod + status v1.PodPhase + test string + }{ + {&v1.Pod{Spec: desiredState, Status: v1.PodStatus{}}, v1.PodPending, "empty, waiting"}, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + runningState("containerX"), + }, + }, + }, + v1.PodPending, + "restartable init container running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + stoppedState("containerX"), + }, + }, + }, + v1.PodPending, + "restartable init container stopped", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingStateWithLastTermination("containerX"), + }, + }, + }, + v1.PodPending, + "restartable init container waiting, terminated zero", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingStateWithNonZeroTermination("containerX"), + }, + }, + }, + v1.PodPending, + "restartable init container waiting, terminated non-zero", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingState("containerX"), + }, + }, + }, + v1.PodPending, + "restartable init container waiting, not terminated", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + startedState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + runningState("containerA"), + }, + }, + }, + v1.PodPending, + "restartable init container started, one main container running", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + startedState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + v1.PodRunning, + "restartable init container started, main containers succeeded", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + runningState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + v1.PodRunning, + "restartable init container running, main containers succeeded", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + succeededState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + v1.PodSucceeded, + "all containers succeeded", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + failedState("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + v1.PodSucceeded, + "restartable init container terminated non-zero, main containers succeeded", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingStateWithLastTermination("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + v1.PodSucceeded, + "backoff crashloop restartable init container, main containers succeeded", + }, + { + &v1.Pod{ + Spec: desiredState, + Status: v1.PodStatus{ + InitContainerStatuses: []v1.ContainerStatus{ + waitingStateWithNonZeroTermination("containerX"), + }, + ContainerStatuses: []v1.ContainerStatus{ + succeededState("containerA"), + succeededState("containerB"), + }, + }, + }, + v1.PodSucceeded, + "backoff crashloop with non-zero restartable init container, main containers succeeded", + }, + } + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, true)() + for _, test := range tests { + statusInfo := append(test.pod.Status.InitContainerStatuses[:], test.pod.Status.ContainerStatuses[:]...) + status := getPhase(test.pod, statusInfo, false) + assert.Equal(t, test.status, status, "[test %s]", test.test) + } +} + func TestPodPhaseWithRestartOnFailure(t *testing.T) { desiredState := v1.PodSpec{ NodeName: "machine", diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 5c5d727776c..5628620d72b 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -52,6 +52,7 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/cri/remote" "k8s.io/kubernetes/pkg/kubelet/events" + proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results" "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/util/tail" @@ -848,61 +849,203 @@ func (m *kubeGenericRuntimeManager) purgeInitContainers(ctx context.Context, pod } } -// findNextInitContainerToRun returns the status of the last failed container, the -// index of next init container to start, or done if there are no further init containers. -// Status is only returned if an init container is failed, in which case next will -// point to the current container. -func findNextInitContainerToRun(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (status *kubecontainer.Status, next *v1.Container, done bool) { +// hasAnyRegularContainerCreated returns true if any regular container has been +// created, which indicates all init containers have been initialized. +func hasAnyRegularContainerCreated(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool { + for _, container := range pod.Spec.Containers { + status := podStatus.FindContainerStatusByName(container.Name) + if status == nil { + continue + } + switch status.State { + case kubecontainer.ContainerStateCreated, + kubecontainer.ContainerStateRunning, + kubecontainer.ContainerStateExited: + return true + default: + // Ignore other states + } + } + return false +} + +// computeInitContainerActions sets the actions on the given changes that need +// to be taken for the init containers. This includes actions to initialize the +// init containers and actions to keep restartable init containers running. +// computeInitContainerActions returns true if pod has been initialized. +// +// The actions include: +// - Start the first init container that has not been started. +// - Restart all restartable init containers that have started but are not running. +// - Kill the restartable init containers that have failed the startup probe. +func (m *kubeGenericRuntimeManager) computeInitContainerActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus, changes *podActions) bool { if len(pod.Spec.InitContainers) == 0 { - return nil, nil, true + return true } // If any of the main containers have status and are Running, then all init containers must // have been executed at some point in the past. However, they could have been removed // from the container runtime now, and if we proceed, it would appear as if they - // never ran and will re-execute improperly. - for i := range pod.Spec.Containers { - container := &pod.Spec.Containers[i] - status := podStatus.FindContainerStatusByName(container.Name) - if status != nil && status.State == kubecontainer.ContainerStateRunning { - return nil, nil, true - } - } + // never ran and will re-execute improperly except for the restartable init containers. + podHasInitialized := hasAnyRegularContainerCreated(pod, podStatus) - // If there are failed containers, return the status of the last failed one. - for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { - container := &pod.Spec.InitContainers[i] - status := podStatus.FindContainerStatusByName(container.Name) - if status != nil && isInitContainerFailed(status) { - return status, container, false - } - } - - // There are no failed containers now. + // isPreviouslyInitialized indicates if the current init container is + // previously initialized. + isPreviouslyInitialized := podHasInitialized + restartOnFailure := shouldRestartOnFailure(pod) + + // Note that we iterate through the init containers in reverse order to find + // the next init container to run, as the completed init containers may get + // removed from container runtime for various reasons. Therefore the kubelet + // should rely on the minimal number of init containers - the last one. + // + // Once we find the next init container to run, iterate through the rest to + // find the restartable init containers to restart. for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- { container := &pod.Spec.InitContainers[i] status := podStatus.FindContainerStatusByName(container.Name) + klog.V(4).InfoS("Computing init container action", "pod", klog.KObj(pod), "container", container.Name, "status", status) if status == nil { + // If the container is previously initialized but its status is not + // found, it means its last status is removed for some reason. + // Restart it if it is a restartable init container. + if isPreviouslyInitialized && types.IsRestartableInitContainer(container) { + changes.InitContainersToStart = append(changes.InitContainersToStart, i) + } continue } - // container is still running, return not done. - if status.State == kubecontainer.ContainerStateRunning { - return nil, nil, false + if isPreviouslyInitialized && !types.IsRestartableInitContainer(container) { + // after initialization, only restartable init containers need to be kept + // running + continue } - if status.State == kubecontainer.ContainerStateExited { - // all init containers successful - if i == (len(pod.Spec.InitContainers) - 1) { - return nil, nil, true + switch status.State { + case kubecontainer.ContainerStateCreated: + // nothing to do but wait for it to start + + case kubecontainer.ContainerStateRunning: + if !types.IsRestartableInitContainer(container) { + break } - // all containers up to i successful, go to i+1 - return nil, &pod.Spec.InitContainers[i+1], false + if types.IsRestartableInitContainer(container) { + if container.StartupProbe != nil { + startup, found := m.startupManager.Get(status.ID) + if !found { + // If the startup probe has not been run, wait for it. + break + } + if startup != proberesults.Success { + if startup == proberesults.Failure { + // If the restartable init container failed the startup probe, + // restart it. + changes.ContainersToKill[status.ID] = containerToKillInfo{ + name: container.Name, + container: container, + message: fmt.Sprintf("Init container %s failed startup probe", container.Name), + reason: reasonStartupProbe, + } + changes.InitContainersToStart = append(changes.InitContainersToStart, i) + } + break + } + } + + klog.V(4).InfoS("Init container has been initialized", "pod", klog.KObj(pod), "container", container.Name) + if i == (len(pod.Spec.InitContainers) - 1) { + podHasInitialized = true + } else if !isPreviouslyInitialized { + // this init container is initialized for the first time, start the next one + changes.InitContainersToStart = append(changes.InitContainersToStart, i+1) + } + } else { // init container + // nothing do to but wait for it to finish + break + } + + // If the init container failed and the restart policy is Never, the pod is terminal. + // Otherwise, restart the init container. + case kubecontainer.ContainerStateExited: + if types.IsRestartableInitContainer(container) { + changes.InitContainersToStart = append(changes.InitContainersToStart, i) + } else { // init container + if isInitContainerFailed(status) { + if !restartOnFailure { + changes.KillPod = true + changes.InitContainersToStart = nil + return false + } + changes.InitContainersToStart = append(changes.InitContainersToStart, i) + break + } + + klog.V(4).InfoS("Init container has been initialized", "pod", klog.KObj(pod), "container", container.Name) + if i == (len(pod.Spec.InitContainers) - 1) { + podHasInitialized = true + } else { + // this init container is initialized for the first time, start the next one + changes.InitContainersToStart = append(changes.InitContainersToStart, i+1) + } + } + + default: // kubecontainer.ContainerStatusUnknown or other unknown states + if types.IsRestartableInitContainer(container) { + // If the restartable init container is in unknown state, restart it. + changes.ContainersToKill[status.ID] = containerToKillInfo{ + name: container.Name, + container: container, + message: fmt.Sprintf("Init container is in %q state, try killing it before restart", + status.State), + reason: reasonUnknown, + } + changes.InitContainersToStart = append(changes.InitContainersToStart, i) + } else { // init container + if !isInitContainerFailed(status) { + klog.V(4).InfoS("This should not happen, init container is in unknown state but not failed", "pod", klog.KObj(pod), "containerStatus", status) + } + + if !restartOnFailure { + changes.KillPod = true + changes.InitContainersToStart = nil + return false + } + + // If the init container is in unknown state, restart it. + changes.ContainersToKill[status.ID] = containerToKillInfo{ + name: container.Name, + container: container, + message: fmt.Sprintf("Init container is in %q state, try killing it before restart", + status.State), + reason: reasonUnknown, + } + changes.InitContainersToStart = append(changes.InitContainersToStart, i) + } + } + + if !isPreviouslyInitialized { + // the one before this init container has been initialized + isPreviouslyInitialized = true } } - return nil, &pod.Spec.InitContainers[0], false + // this means no init containers have been started, + // start the first one + if !isPreviouslyInitialized { + changes.InitContainersToStart = append(changes.InitContainersToStart, 0) + } + + // reverse the InitContainersToStart, as the above loop iterated through the + // init containers backwards, but we want to start them as per the order in + // the pod spec. + l := len(changes.InitContainersToStart) + for i := 0; i < l/2; i++ { + changes.InitContainersToStart[i], changes.InitContainersToStart[l-1-i] = + changes.InitContainersToStart[l-1-i], changes.InitContainersToStart[i] + } + + return podHasInitialized } // GetContainerLogs returns logs of a specific container. diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 16c08f8d2ec..317f62af65f 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -492,11 +492,13 @@ type podActions struct { // The attempt number of creating sandboxes for the pod. Attempt uint32 - // The next init container to start. - NextInitContainerToStart *v1.Container + // InitContainersToStart keeps a list of indexes for the init containers to + // start, where the index is the index of the specific init container in the + // pod spec (pod.Spec.InitContainers). + InitContainersToStart []int // ContainersToStart keeps a list of indexes for the containers to start, // where the index is the index of the specific container in the pod spec ( - // pod.Spec.Containers. + // pod.Spec.Containers). ContainersToStart []int // ContainersToKill keeps a map of containers that need to be killed, note that // the key is the container ID of the container, while @@ -827,7 +829,7 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * if createPodSandbox { if !shouldRestartOnFailure(pod) && attempt != 0 && len(podStatus.ContainerStatuses) != 0 { // Should not restart the pod, just return. - // we should not create a sandbox for a pod if it is already done. + // we should not create a sandbox, and just kill the pod if it is already done. // if all containers are done and should not be started, there is no need to create a new sandbox. // this stops confusing logs on pods whose containers all have exit codes, but we recreate a sandbox before terminating it. // @@ -846,18 +848,22 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * } containersToStart = append(containersToStart, idx) } - // We should not create a sandbox for a Pod if initialization is done and there is no container to start. - if len(containersToStart) == 0 { - _, _, done := findNextInitContainerToRun(pod, podStatus) - if done { - changes.CreateSandbox = false - return changes - } + + // If there is any regular container, it means all init containers have + // been initialized. + hasInitialized := hasAnyRegularContainerCreated(pod, podStatus) + // We should not create a sandbox, and just kill the pod if initialization + // is done and there is no container to start. + if hasInitialized && len(containersToStart) == 0 { + changes.CreateSandbox = false + return changes } + // If we are creating a pod sandbox, we should restart from the initial + // state. if len(pod.Spec.InitContainers) != 0 { // Pod has init containers, return the first one. - changes.NextInitContainerToStart = &pod.Spec.InitContainers[0] + changes.InitContainersToStart = []int{0} return changes } changes.ContainersToStart = containersToStart @@ -874,27 +880,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * } } - // Check initialization progress. - initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus) - if !done { - if next != nil { - initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus) - if initFailed && !shouldRestartOnFailure(pod) { - changes.KillPod = true - } else { - // Always try to stop containers in unknown state first. - if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown { - changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{ - name: next.Name, - container: next, - message: fmt.Sprintf("Init container is in %q state, try killing it before restart", - initLastStatus.State), - reason: reasonUnknown, - } - } - changes.NextInitContainerToStart = next - } - } + hasInitialized := m.computeInitContainerActions(pod, podStatus, &changes) + if changes.KillPod || !hasInitialized { // Initialization failed or still in progress. Skip inspecting non-init // containers. return changes @@ -993,6 +980,9 @@ func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod * if keepCount == 0 && len(changes.ContainersToStart) == 0 { changes.KillPod = true + // To prevent the restartable init containers to keep pod alive, we should + // not restart them. + changes.InitContainersToStart = nil } return changes @@ -1225,10 +1215,16 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po start(ctx, "ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx])) } - // Step 6: start the init container. - if container := podContainerChanges.NextInitContainerToStart; container != nil { + // Step 6: start init containers. + for _, idx := range podContainerChanges.InitContainersToStart { + container := &pod.Spec.InitContainers[idx] // Start the next init container. if err := start(ctx, "init container", metrics.InitContainer, containerStartSpec(container)); err != nil { + if types.IsRestartableInitContainer(container) { + klog.V(4).InfoS("Failed to start the restartable init container for the pod, skipping", "initContainerName", container.Name, "pod", klog.KObj(pod)) + continue + } + klog.V(4).InfoS("Failed to initialize the pod, as the init container failed to start, aborting", "initContainerName", container.Name, "pod", klog.KObj(pod)) return } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 120d4d14174..f9fe3a9c666 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -53,7 +53,8 @@ import ( ) var ( - fakeCreatedAt int64 = 1 + fakeCreatedAt int64 = 1 + containerRestartPolicyAlways = v1.ContainerRestartPolicyAlways ) func createTestRuntimeManager() (*apitest.FakeRuntimeService, *apitest.FakeImageService, *kubeGenericRuntimeManager, error) { @@ -1244,6 +1245,17 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, + "no init containers have been started; start the first one": { + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses = nil + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, "initialization in progress; do nothing": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, mutateStatusFn: func(status *kubecontainer.PodStatus) { @@ -1257,13 +1269,13 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY }, actions: podActions{ - KillPod: true, - CreateSandbox: true, - SandboxID: baseStatus.SandboxStatuses[0].Id, - Attempt: uint32(1), - NextInitContainerToStart: &basePod.Spec.InitContainers[0], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "initialization failed; restart the last init container if RestartPolicy == Always": { @@ -1272,10 +1284,10 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { status.ContainerStatuses[2].ExitCode = 137 }, actions: podActions{ - SandboxID: baseStatus.SandboxStatuses[0].Id, - NextInitContainerToStart: &basePod.Spec.InitContainers[2], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "initialization failed; restart the last init container if RestartPolicy == OnFailure": { @@ -1284,10 +1296,10 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { status.ContainerStatuses[2].ExitCode = 137 }, actions: podActions{ - SandboxID: baseStatus.SandboxStatuses[0].Id, - NextInitContainerToStart: &basePod.Spec.InitContainers[2], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "initialization failed; kill pod if RestartPolicy == Never": { @@ -1308,10 +1320,10 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown }, actions: podActions{ - SandboxID: baseStatus.SandboxStatuses[0].Id, - NextInitContainerToStart: &basePod.Spec.InitContainers[2], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), }, }, "init container state unknown; kill and recreate the last init container if RestartPolicy == OnFailure": { @@ -1320,10 +1332,10 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown }, actions: podActions{ - SandboxID: baseStatus.SandboxStatuses[0].Id, - NextInitContainerToStart: &basePod.Spec.InitContainers[2], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), }, }, "init container state unknown; kill pod if RestartPolicy == Never": { @@ -1359,13 +1371,13 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { status.ContainerStatuses = []*kubecontainer.Status{} }, actions: podActions{ - KillPod: true, - CreateSandbox: true, - SandboxID: baseStatus.SandboxStatuses[0].Id, - Attempt: uint32(1), - NextInitContainerToStart: &basePod.Spec.InitContainers[0], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "Pod sandbox not ready, init container failed, and RestartPolicy == OnFailure; create a new pod sandbox": { @@ -1375,13 +1387,26 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { status.ContainerStatuses[2].ExitCode = 137 }, actions: podActions{ - KillPod: true, - CreateSandbox: true, - SandboxID: baseStatus.SandboxStatuses[0].Id, - Attempt: uint32(1), - NextInitContainerToStart: &basePod.Spec.InitContainers[0], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "some of the init container statuses are missing but the last init container is running, don't restart preceding ones": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateRunning + status.ContainerStatuses = status.ContainerStatuses[2:] + }, + actions: podActions{ + KillPod: false, + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, } { @@ -1436,6 +1461,345 @@ func makeBasePodAndStatusWithInitContainers() (*v1.Pod, *kubecontainer.PodStatus return pod, status } +func TestComputePodActionsWithRestartableInitContainers(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, true)() + _, _, m, err := createTestRuntimeManager() + require.NoError(t, err) + + // Creating a pair reference pod and status for the test cases to refer + // the specific fields. + basePod, baseStatus := makeBasePodAndStatusWithRestartableInitContainers() + noAction := podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, + } + + for desc, test := range map[string]struct { + mutatePodFn func(*v1.Pod) + mutateStatusFn func(*v1.Pod, *kubecontainer.PodStatus) + actions podActions + resetStatusFn func(*kubecontainer.PodStatus) + }{ + "initialization completed; start all containers": { + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{0, 1, 2}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "no init containers have been started; start the first one": { + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses = nil + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "initialization in progress; do nothing": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateCreated + }, + actions: noAction, + }, + "restartable init container has started; start the next": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses = status.ContainerStatuses[:1] + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{1}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "startupProbe has not been run; do nothing": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + m.startupManager.Remove(status.ContainerStatuses[1].ID) + status.ContainerStatuses = status.ContainerStatuses[:2] + }, + actions: noAction, + }, + "startupProbe in progress; do nothing": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + m.startupManager.Set(status.ContainerStatuses[1].ID, proberesults.Unknown, basePod) + status.ContainerStatuses = status.ContainerStatuses[:2] + }, + actions: noAction, + resetStatusFn: func(status *kubecontainer.PodStatus) { + m.startupManager.Remove(status.ContainerStatuses[1].ID) + }, + }, + "startupProbe has completed; start the next": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses = status.ContainerStatuses[:2] + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "kill and recreate the restartable init container if the startup check has failed": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.RestartPolicy = v1.RestartPolicyAlways + pod.Spec.InitContainers[2].StartupProbe = &v1.Probe{} + }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + m.startupManager.Set(status.ContainerStatuses[2].ID, proberesults.Failure, basePod) + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), + ContainersToStart: []int{}, + }, + resetStatusFn: func(status *kubecontainer.PodStatus) { + m.startupManager.Remove(status.ContainerStatuses[2].ID) + }, + }, + "restart terminated restartable init container and next init container": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[2].State = kubecontainer.ContainerStateExited + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{0, 2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "restart terminated restartable init container and regular containers": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses[0].State = kubecontainer.ContainerStateExited + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{0}, + ContainersToStart: []int{0, 1, 2}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "Pod sandbox not ready, restartable init container failed, but RestartPolicy == Never; kill pod only": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses[2].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + KillPod: true, + CreateSandbox: false, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "Pod sandbox not ready, and RestartPolicy == Never, but no visible restartable init containers; create a new pod sandbox": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses = []*kubecontainer.Status{} + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "Pod sandbox not ready, restartable init container failed, and RestartPolicy == OnFailure; create a new pod sandbox": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses[2].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "Pod sandbox not ready, restartable init container failed, and RestartPolicy == Always; create a new pod sandbox": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses[2].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "initialization failed; restart the last restartable init container even if pod's RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "restartable init container state unknown; kill and recreate the last restartable init container even if pod's RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateUnknown + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), + }, + }, + "restart restartable init container if regular containers are running even if pod's RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses[2].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[2].ExitCode = 137 + // all main containers are running + for i := 1; i <= 3; i++ { + status.ContainerStatuses = append(status.ContainerStatuses, &kubecontainer.Status{ + ID: kubecontainer.ContainerID{ID: fmt.Sprintf("id%d", i)}, + Name: fmt.Sprintf("foo%d", i), + State: kubecontainer.ContainerStateRunning, + Hash: kubecontainer.HashContainer(&pod.Spec.Containers[i-1]), + }) + } + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "kill the pod if all main containers succeeded if pod's RestartPolicy == Never": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + // all main containers succeeded + for i := 1; i <= 3; i++ { + status.ContainerStatuses = append(status.ContainerStatuses, &kubecontainer.Status{ + ID: kubecontainer.ContainerID{ID: fmt.Sprintf("id%d", i)}, + Name: fmt.Sprintf("foo%d", i), + State: kubecontainer.ContainerStateExited, + ExitCode: 0, + Hash: kubecontainer.HashContainer(&pod.Spec.Containers[i-1]), + }) + } + }, + actions: podActions{ + KillPod: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "some of the init container statuses are missing but the last init container is running, restart restartable init and regular containers": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses = status.ContainerStatuses[2:] + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{0, 1}, + ContainersToStart: []int{0, 1, 2}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + } { + pod, status := makeBasePodAndStatusWithRestartableInitContainers() + m.startupManager.Set(status.ContainerStatuses[1].ID, proberesults.Success, basePod) + m.startupManager.Set(status.ContainerStatuses[2].ID, proberesults.Success, basePod) + if test.mutatePodFn != nil { + test.mutatePodFn(pod) + } + if test.mutateStatusFn != nil { + test.mutateStatusFn(pod, status) + } + ctx := context.Background() + actions := m.computePodActions(ctx, pod, status) + verifyActions(t, &test.actions, &actions, desc) + if test.resetStatusFn != nil { + test.resetStatusFn(status) + } + } +} + +func makeBasePodAndStatusWithRestartableInitContainers() (*v1.Pod, *kubecontainer.PodStatus) { + pod, status := makeBasePodAndStatus() + pod.Spec.InitContainers = []v1.Container{ + { + Name: "restartable-init-1", + Image: "bar-image", + RestartPolicy: &containerRestartPolicyAlways, + }, + { + Name: "restartable-init-2", + Image: "bar-image", + RestartPolicy: &containerRestartPolicyAlways, + StartupProbe: &v1.Probe{}, + }, + { + Name: "restartable-init-3", + Image: "bar-image", + RestartPolicy: &containerRestartPolicyAlways, + StartupProbe: &v1.Probe{}, + }, + } + // Replace the original statuses of the containers with those for the init + // containers. + status.ContainerStatuses = []*kubecontainer.Status{ + { + ID: kubecontainer.ContainerID{ID: "initid1"}, + Name: "restartable-init-1", State: kubecontainer.ContainerStateRunning, + Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]), + }, + { + ID: kubecontainer.ContainerID{ID: "initid2"}, + Name: "restartable-init-2", State: kubecontainer.ContainerStateRunning, + Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]), + }, + { + ID: kubecontainer.ContainerID{ID: "initid3"}, + Name: "restartable-init-3", State: kubecontainer.ContainerStateRunning, + Hash: kubecontainer.HashContainer(&pod.Spec.InitContainers[0]), + }, + } + return pod, status +} + func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) { // Make sure existing test cases pass with feature enabled TestComputePodActions(t) @@ -1517,13 +1881,13 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) { status.ContainerStatuses[0].ExitCode = 137 }, actions: podActions{ - KillPod: true, - CreateSandbox: true, - SandboxID: baseStatus.SandboxStatuses[0].Id, - Attempt: uint32(1), - NextInitContainerToStart: &basePod.Spec.InitContainers[0], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "Kill pod and do not restart ephemeral container if the pod sandbox is dead": { @@ -1532,13 +1896,13 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) { status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY }, actions: podActions{ - KillPod: true, - CreateSandbox: true, - SandboxID: baseStatus.SandboxStatuses[0].Id, - Attempt: uint32(1), - NextInitContainerToStart: &basePod.Spec.InitContainers[0], - ContainersToStart: []int{}, - ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + InitContainersToStart: []int{0}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, "Kill pod if all containers exited except ephemeral container": { diff --git a/pkg/kubelet/lifecycle/predicate.go b/pkg/kubelet/lifecycle/predicate.go index 96808cf7100..d2067174975 100644 --- a/pkg/kubelet/lifecycle/predicate.go +++ b/pkg/kubelet/lifecycle/predicate.go @@ -21,9 +21,11 @@ import ( "runtime" v1 "k8s.io/api/core/v1" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/component-helpers/scheduling/corev1" "k8s.io/klog/v2" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/scheduler" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -72,6 +74,22 @@ func (w *predicateAdmitHandler) Admit(attrs *PodAdmitAttributes) PodAdmitResult pods := attrs.OtherPods nodeInfo := schedulerframework.NewNodeInfo(pods...) nodeInfo.SetNode(node) + + // TODO: Remove this after the SidecarContainers feature gate graduates to GA. + if !utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) { + for _, c := range admitPod.Spec.InitContainers { + if types.IsRestartableInitContainer(&c) { + message := fmt.Sprintf("Init container %q may not have a non-default restartPolicy", c.Name) + klog.InfoS("Failed to admit pod", "pod", klog.KObj(admitPod), "message", message) + return PodAdmitResult{ + Admit: false, + Reason: "InitContainerRestartPolicyForbidden", + Message: message, + } + } + } + } + // ensure the node has enough plugin resources for that required in pods if err = w.pluginResourceUpdateFunc(nodeInfo, attrs); err != nil { message := fmt.Sprintf("Update plugin resources failed due to %v, which is unexpected.", err) diff --git a/pkg/kubelet/prober/prober_manager.go b/pkg/kubelet/prober/prober_manager.go index 94ba0022dc1..1a92699f075 100644 --- a/pkg/kubelet/prober/prober_manager.go +++ b/pkg/kubelet/prober/prober_manager.go @@ -29,6 +29,8 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/prober/results" "k8s.io/kubernetes/pkg/kubelet/status" + kubetypes "k8s.io/kubernetes/pkg/kubelet/types" + kubeutil "k8s.io/kubernetes/pkg/kubelet/util" "k8s.io/utils/clock" ) @@ -84,7 +86,7 @@ type Manager interface { // UpdatePodStatus modifies the given PodStatus with the appropriate Ready state for each // container based on container running status, cached probe results and worker states. - UpdatePodStatus(types.UID, *v1.PodStatus) + UpdatePodStatus(*v1.Pod, *v1.PodStatus) } type manager struct { @@ -166,12 +168,22 @@ func (t probeType) String() string { } } +func getRestartableInitContainers(pod *v1.Pod) []v1.Container { + var restartableInitContainers []v1.Container + for _, c := range pod.Spec.InitContainers { + if kubetypes.IsRestartableInitContainer(&c) { + restartableInitContainers = append(restartableInitContainers, c) + } + } + return restartableInitContainers +} + func (m *manager) AddPod(pod *v1.Pod) { m.workerLock.Lock() defer m.workerLock.Unlock() key := probeKey{podUID: pod.UID} - for _, c := range pod.Spec.Containers { + for _, c := range append(pod.Spec.Containers, getRestartableInitContainers(pod)...) { key.containerName = c.Name if c.StartupProbe != nil { @@ -233,7 +245,7 @@ func (m *manager) RemovePod(pod *v1.Pod) { defer m.workerLock.RUnlock() key := probeKey{podUID: pod.UID} - for _, c := range pod.Spec.Containers { + for _, c := range append(pod.Spec.Containers, getRestartableInitContainers(pod)...) { key.containerName = c.Name for _, probeType := range [...]probeType{readiness, liveness, startup} { key.probeType = probeType @@ -255,48 +267,92 @@ func (m *manager) CleanupPods(desiredPods map[types.UID]sets.Empty) { } } -func (m *manager) UpdatePodStatus(podUID types.UID, podStatus *v1.PodStatus) { +func (m *manager) isContainerStarted(pod *v1.Pod, containerStatus *v1.ContainerStatus) bool { + if containerStatus.State.Running == nil { + return false + } + + if result, ok := m.startupManager.Get(kubecontainer.ParseContainerID(containerStatus.ContainerID)); ok { + return result == results.Success + } + + // if there is a startup probe which hasn't run yet, the container is not + // started. + if _, exists := m.getWorker(pod.UID, containerStatus.Name, startup); exists { + return false + } + + // there is no startup probe, so the container is started. + return true +} + +func (m *manager) UpdatePodStatus(pod *v1.Pod, podStatus *v1.PodStatus) { for i, c := range podStatus.ContainerStatuses { - var started bool - if c.State.Running == nil { - started = false - } else if result, ok := m.startupManager.Get(kubecontainer.ParseContainerID(c.ContainerID)); ok { - started = result == results.Success - } else { - // The check whether there is a probe which hasn't run yet. - _, exists := m.getWorker(podUID, c.Name, startup) - started = !exists - } + started := m.isContainerStarted(pod, &podStatus.ContainerStatuses[i]) podStatus.ContainerStatuses[i].Started = &started - if started { - var ready bool - if c.State.Running == nil { - ready = false - } else if result, ok := m.readinessManager.Get(kubecontainer.ParseContainerID(c.ContainerID)); ok && result == results.Success { - ready = true - } else { - // The check whether there is a probe which hasn't run yet. - w, exists := m.getWorker(podUID, c.Name, readiness) - ready = !exists // no readinessProbe -> always ready - if exists { - // Trigger an immediate run of the readinessProbe to update ready state - select { - case w.manualTriggerCh <- struct{}{}: - default: // Non-blocking. - klog.InfoS("Failed to trigger a manual run", "probe", w.probeType.String()) - } + if !started { + continue + } + + var ready bool + if c.State.Running == nil { + ready = false + } else if result, ok := m.readinessManager.Get(kubecontainer.ParseContainerID(c.ContainerID)); ok && result == results.Success { + ready = true + } else { + // The check whether there is a probe which hasn't run yet. + w, exists := m.getWorker(pod.UID, c.Name, readiness) + ready = !exists // no readinessProbe -> always ready + if exists { + // Trigger an immediate run of the readinessProbe to update ready state + select { + case w.manualTriggerCh <- struct{}{}: + default: // Non-blocking. + klog.InfoS("Failed to trigger a manual run", "probe", w.probeType.String()) } } - podStatus.ContainerStatuses[i].Ready = ready } + podStatus.ContainerStatuses[i].Ready = ready } - // init containers are ready if they have exited with success or if a readiness probe has - // succeeded. + for i, c := range podStatus.InitContainerStatuses { + started := m.isContainerStarted(pod, &podStatus.InitContainerStatuses[i]) + podStatus.InitContainerStatuses[i].Started = &started + + initContainer, ok := kubeutil.GetContainerByIndex(pod.Spec.InitContainers, podStatus.InitContainerStatuses, i) + if !ok { + klog.V(4).InfoS("Mismatch between pod spec and status, likely programmer error", "pod", klog.KObj(pod), "containerName", c.Name) + continue + } + if !kubetypes.IsRestartableInitContainer(&initContainer) { + if c.State.Terminated != nil && c.State.Terminated.ExitCode == 0 { + podStatus.InitContainerStatuses[i].Ready = true + } + continue + } + + if !started { + continue + } + var ready bool - if c.State.Terminated != nil && c.State.Terminated.ExitCode == 0 { + if c.State.Running == nil { + ready = false + } else if result, ok := m.readinessManager.Get(kubecontainer.ParseContainerID(c.ContainerID)); ok && result == results.Success { ready = true + } else { + // The check whether there is a probe which hasn't run yet. + w, exists := m.getWorker(pod.UID, c.Name, readiness) + ready = !exists // no readinessProbe -> always ready + if exists { + // Trigger an immediate run of the readinessProbe to update ready state + select { + case w.manualTriggerCh <- struct{}{}: + default: // Non-blocking. + klog.InfoS("Failed to trigger a manual run", "probe", w.probeType.String()) + } + } } podStatus.InitContainerStatuses[i].Ready = ready } diff --git a/pkg/kubelet/prober/prober_manager_test.go b/pkg/kubelet/prober/prober_manager_test.go index 287f2ea2913..13707b4fc17 100644 --- a/pkg/kubelet/prober/prober_manager_test.go +++ b/pkg/kubelet/prober/prober_manager_test.go @@ -27,6 +27,9 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/kubernetes/pkg/features" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/prober/results" "k8s.io/kubernetes/pkg/probe" @@ -130,6 +133,85 @@ func TestAddRemovePods(t *testing.T) { } } +func TestAddRemovePodsWithRestartableInitContainer(t *testing.T) { + m := newTestManager() + defer cleanup(t, m) + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, true)() + if err := expectProbes(m, nil); err != nil { + t.Error(err) + } + + testCases := []struct { + desc string + probePaths []probeKey + enableSidecarContainers bool + }{ + { + desc: "pod with sidecar (no sidecar containers feature enabled)", + probePaths: nil, + enableSidecarContainers: false, + }, + { + desc: "pod with sidecar (sidecar containers feature enabled)", + probePaths: []probeKey{{"sidecar_pod", "sidecar", readiness}}, + enableSidecarContainers: true, + }, + } + + containerRestartPolicy := func(enableSidecarContainers bool) *v1.ContainerRestartPolicy { + if !enableSidecarContainers { + return nil + } + restartPolicy := v1.ContainerRestartPolicyAlways + return &restartPolicy + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, tc.enableSidecarContainers)() + + probePod := v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: "restartable_init_container_pod", + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{{ + Name: "init", + }, { + Name: "restartable-init", + ReadinessProbe: defaultProbe, + RestartPolicy: containerRestartPolicy(tc.enableSidecarContainers), + }}, + Containers: []v1.Container{{ + Name: "main", + }}, + }, + } + + // Adding a pod with probes. + m.AddPod(&probePod) + if err := expectProbes(m, tc.probePaths); err != nil { + t.Error(err) + } + + // Removing probed pod. + m.RemovePod(&probePod) + if err := waitForWorkerExit(t, m, tc.probePaths); err != nil { + t.Fatal(err) + } + if err := expectProbes(m, nil); err != nil { + t.Error(err) + } + + // Removing already removed pods should be a no-op. + m.RemovePod(&probePod) + if err := expectProbes(m, nil); err != nil { + t.Error(err) + } + }) + } +} + func TestCleanupPods(t *testing.T) { m := newTestManager() defer cleanup(t, m) @@ -293,7 +375,22 @@ func TestUpdatePodStatus(t *testing.T) { m.startupManager.Set(kubecontainer.ParseContainerID(startedNoReadiness.ContainerID), results.Success, &v1.Pod{}) m.readinessManager.Set(kubecontainer.ParseContainerID(terminated.ContainerID), results.Success, &v1.Pod{}) - m.UpdatePodStatus(testPodUID, &podStatus) + m.UpdatePodStatus(&v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: testPodUID, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + {Name: unprobed.Name}, + {Name: probedReady.Name}, + {Name: probedPending.Name}, + {Name: probedUnready.Name}, + {Name: notStartedNoReadiness.Name}, + {Name: startedNoReadiness.Name}, + {Name: terminated.Name}, + }, + }, + }, &podStatus) expectedReadiness := map[probeKey]bool{ {testPodUID, unprobed.Name, readiness}: true, @@ -316,6 +413,141 @@ func TestUpdatePodStatus(t *testing.T) { } } +func TestUpdatePodStatusWithInitContainers(t *testing.T) { + notStarted := v1.ContainerStatus{ + Name: "not_started_container", + ContainerID: "test://not_started_container_id", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + } + started := v1.ContainerStatus{ + Name: "started_container", + ContainerID: "test://started_container_id", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + } + terminated := v1.ContainerStatus{ + Name: "terminated_container", + ContainerID: "test://terminated_container_id", + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{}, + }, + } + + m := newTestManager() + // no cleanup: using fake workers. + + // Setup probe "workers" and cached results. + m.workers = map[probeKey]*worker{ + {testPodUID, notStarted.Name, startup}: {}, + {testPodUID, started.Name, startup}: {}, + } + m.startupManager.Set(kubecontainer.ParseContainerID(started.ContainerID), results.Success, &v1.Pod{}) + + testCases := []struct { + desc string + expectedStartup map[probeKey]bool + expectedReadiness map[probeKey]bool + enableSidecarContainers bool + }{ + { + desc: "init containers", + expectedStartup: map[probeKey]bool{ + {testPodUID, notStarted.Name, startup}: false, + {testPodUID, started.Name, startup}: true, + {testPodUID, terminated.Name, startup}: false, + }, + expectedReadiness: map[probeKey]bool{ + {testPodUID, notStarted.Name, readiness}: false, + {testPodUID, started.Name, readiness}: false, + {testPodUID, terminated.Name, readiness}: true, + }, + enableSidecarContainers: false, + }, + { + desc: "init container with SidecarContainers feature", + expectedStartup: map[probeKey]bool{ + {testPodUID, notStarted.Name, startup}: false, + {testPodUID, started.Name, startup}: true, + {testPodUID, terminated.Name, startup}: false, + }, + expectedReadiness: map[probeKey]bool{ + {testPodUID, notStarted.Name, readiness}: false, + {testPodUID, started.Name, readiness}: true, + {testPodUID, terminated.Name, readiness}: false, + }, + enableSidecarContainers: true, + }, + } + + containerRestartPolicy := func(enableSidecarContainers bool) *v1.ContainerRestartPolicy { + if !enableSidecarContainers { + return nil + } + restartPolicy := v1.ContainerRestartPolicyAlways + return &restartPolicy + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, tc.enableSidecarContainers)() + podStatus := v1.PodStatus{ + Phase: v1.PodRunning, + InitContainerStatuses: []v1.ContainerStatus{ + notStarted, started, terminated, + }, + } + + m.UpdatePodStatus(&v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: testPodUID, + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: notStarted.Name, + RestartPolicy: containerRestartPolicy(tc.enableSidecarContainers), + }, + { + Name: started.Name, + RestartPolicy: containerRestartPolicy(tc.enableSidecarContainers), + }, + { + Name: terminated.Name, + RestartPolicy: containerRestartPolicy(tc.enableSidecarContainers), + }, + }, + }, + }, &podStatus) + + for _, c := range podStatus.InitContainerStatuses { + { + expected, ok := tc.expectedStartup[probeKey{testPodUID, c.Name, startup}] + if !ok { + t.Fatalf("Missing expectation for test case: %v", c.Name) + } + if expected != *c.Started { + t.Errorf("Unexpected startup for container %v: Expected %v but got %v", + c.Name, expected, *c.Started) + } + } + { + expected, ok := tc.expectedReadiness[probeKey{testPodUID, c.Name, readiness}] + if !ok { + t.Fatalf("Missing expectation for test case: %v", c.Name) + } + if expected != c.Ready { + t.Errorf("Unexpected readiness for container %v: Expected %v but got %v", + c.Name, expected, c.Ready) + } + } + } + }) + } +} + func (m *manager) extractedReadinessHandling() { update := <-m.readinessManager.Updates() // This code corresponds to an extract from kubelet.syncLoopIteration() diff --git a/pkg/kubelet/prober/testing/fake_manager.go b/pkg/kubelet/prober/testing/fake_manager.go index 500cfcd9962..d3b31c9e793 100644 --- a/pkg/kubelet/prober/testing/fake_manager.go +++ b/pkg/kubelet/prober/testing/fake_manager.go @@ -43,7 +43,7 @@ func (FakeManager) CleanupPods(_ map[types.UID]sets.Empty) {} func (FakeManager) Start() {} // UpdatePodStatus simulates updating the Pod Status. -func (FakeManager) UpdatePodStatus(_ types.UID, podStatus *v1.PodStatus) { +func (FakeManager) UpdatePodStatus(_ *v1.Pod, podStatus *v1.PodStatus) { for i := range podStatus.ContainerStatuses { podStatus.ContainerStatuses[i].Ready = true } diff --git a/pkg/kubelet/prober/worker.go b/pkg/kubelet/prober/worker.go index b9ec0053de6..60cfd690fbf 100644 --- a/pkg/kubelet/prober/worker.go +++ b/pkg/kubelet/prober/worker.go @@ -221,10 +221,13 @@ func (w *worker) doProbe(ctx context.Context) (keepGoing bool) { c, ok := podutil.GetContainerStatus(status.ContainerStatuses, w.container.Name) if !ok || len(c.ContainerID) == 0 { - // Either the container has not been created yet, or it was deleted. - klog.V(3).InfoS("Probe target container not found", - "pod", klog.KObj(w.pod), "containerName", w.container.Name) - return true // Wait for more information. + c, ok = podutil.GetContainerStatus(status.InitContainerStatuses, w.container.Name) + if !ok || len(c.ContainerID) == 0 { + // Either the container has not been created yet, or it was deleted. + klog.V(3).InfoS("Probe target container not found", + "pod", klog.KObj(w.pod), "containerName", w.container.Name) + return true // Wait for more information. + } } if w.containerID.String() != c.ContainerID { diff --git a/pkg/kubelet/status/generate.go b/pkg/kubelet/status/generate.go index 0b3fd517158..7c6c1179a20 100644 --- a/pkg/kubelet/status/generate.go +++ b/pkg/kubelet/status/generate.go @@ -143,6 +143,19 @@ func GeneratePodReadyCondition(spec *v1.PodSpec, conditions []v1.PodCondition, c } } +func isInitContainerInitialized(initContainer *v1.Container, containerStatus *v1.ContainerStatus) bool { + if kubetypes.IsRestartableInitContainer(initContainer) { + if containerStatus.Started == nil || !*containerStatus.Started { + return false + } + } else { // regular init container + if !containerStatus.Ready { + return false + } + } + return true +} + // GeneratePodInitializedCondition returns initialized condition if all init containers in a pod are ready, else it // returns an uninitialized condition. func GeneratePodInitializedCondition(spec *v1.PodSpec, containerStatuses []v1.ContainerStatus, podPhase v1.PodPhase) v1.PodCondition { @@ -154,15 +167,17 @@ func GeneratePodInitializedCondition(spec *v1.PodSpec, containerStatuses []v1.Co Reason: UnknownContainerStatuses, } } + unknownContainers := []string{} - unreadyContainers := []string{} + incompleteContainers := []string{} for _, container := range spec.InitContainers { - if containerStatus, ok := podutil.GetContainerStatus(containerStatuses, container.Name); ok { - if !containerStatus.Ready { - unreadyContainers = append(unreadyContainers, container.Name) - } - } else { + containerStatus, ok := podutil.GetContainerStatus(containerStatuses, container.Name) + if !ok { unknownContainers = append(unknownContainers, container.Name) + continue + } + if !isInitContainerInitialized(&container, &containerStatus) { + incompleteContainers = append(incompleteContainers, container.Name) } } @@ -175,12 +190,23 @@ func GeneratePodInitializedCondition(spec *v1.PodSpec, containerStatuses []v1.Co } } - unreadyMessages := []string{} + // If there is any regular container that has started, then the pod has + // been initialized before. + // This is needed to handle the case where the pod has been initialized but + // the restartable init containers are restarting. + if kubecontainer.HasAnyRegularContainerStarted(spec, containerStatuses) { + return v1.PodCondition{ + Type: v1.PodInitialized, + Status: v1.ConditionTrue, + } + } + + unreadyMessages := make([]string, 0, len(unknownContainers)+len(incompleteContainers)) if len(unknownContainers) > 0 { unreadyMessages = append(unreadyMessages, fmt.Sprintf("containers with unknown status: %s", unknownContainers)) } - if len(unreadyContainers) > 0 { - unreadyMessages = append(unreadyMessages, fmt.Sprintf("containers with incomplete status: %s", unreadyContainers)) + if len(incompleteContainers) > 0 { + unreadyMessages = append(unreadyMessages, fmt.Sprintf("containers with incomplete status: %s", incompleteContainers)) } unreadyMessage := strings.Join(unreadyMessages, ", ") if unreadyMessage != "" { diff --git a/pkg/kubelet/status/generate_test.go b/pkg/kubelet/status/generate_test.go index 777df128869..d185db2cc23 100644 --- a/pkg/kubelet/status/generate_test.go +++ b/pkg/kubelet/status/generate_test.go @@ -23,10 +23,11 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubetypes "k8s.io/kubernetes/pkg/kubelet/types" + "k8s.io/utils/pointer" ) func TestGenerateContainersReadyCondition(t *testing.T) { @@ -324,12 +325,32 @@ func TestGeneratePodInitializedCondition(t *testing.T) { InitContainers: []v1.Container{ {Name: "1234"}, }, + Containers: []v1.Container{ + {Name: "regular"}, + }, } twoInitContainer := &v1.PodSpec{ InitContainers: []v1.Container{ {Name: "1234"}, {Name: "5678"}, }, + Containers: []v1.Container{ + {Name: "regular"}, + }, + } + oneRestartableInitContainer := &v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: "1234", + RestartPolicy: func() *v1.ContainerRestartPolicy { + p := v1.ContainerRestartPolicyAlways + return &p + }(), + }, + }, + Containers: []v1.Container{ + {Name: "regular"}, + }, } tests := []struct { spec *v1.PodSpec @@ -410,6 +431,43 @@ func TestGeneratePodInitializedCondition(t *testing.T) { Reason: PodCompleted, }, }, + { + spec: oneRestartableInitContainer, + containerStatuses: []v1.ContainerStatus{ + getNotStartedStatus("1234"), + }, + podPhase: v1.PodPending, + expected: v1.PodCondition{ + Status: v1.ConditionFalse, + Reason: ContainersNotInitialized, + }, + }, + { + spec: oneRestartableInitContainer, + containerStatuses: []v1.ContainerStatus{ + getStartedStatus("1234"), + }, + podPhase: v1.PodRunning, + expected: v1.PodCondition{ + Status: v1.ConditionTrue, + }, + }, + { + spec: oneRestartableInitContainer, + containerStatuses: []v1.ContainerStatus{ + getNotStartedStatus("1234"), + { + Name: "regular", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + podPhase: v1.PodRunning, + expected: v1.PodCondition{ + Status: v1.ConditionTrue, + }, + }, } for _, test := range tests { @@ -515,3 +573,17 @@ func getNotReadyStatus(cName string) v1.ContainerStatus { Ready: false, } } + +func getStartedStatus(cName string) v1.ContainerStatus { + return v1.ContainerStatus{ + Name: cName, + Started: pointer.Bool(true), + } +} + +func getNotStartedStatus(cName string) v1.ContainerStatus { + return v1.ContainerStatus{ + Name: cName, + Started: pointer.Bool(false), + } +} diff --git a/pkg/kubelet/status/status_manager.go b/pkg/kubelet/status/status_manager.go index 37aea7230d5..a37178a5577 100644 --- a/pkg/kubelet/status/status_manager.go +++ b/pkg/kubelet/status/status_manager.go @@ -42,6 +42,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/status/state" kubetypes "k8s.io/kubernetes/pkg/kubelet/types" + kubeutil "k8s.io/kubernetes/pkg/kubelet/util" statusutil "k8s.io/kubernetes/pkg/util/pod" ) @@ -504,12 +505,25 @@ func hasPodInitialized(pod *v1.Pod) bool { } // if the last init container has ever completed with a zero exit code, the pod is initialized if l := len(pod.Status.InitContainerStatuses); l > 0 { - container := pod.Status.InitContainerStatuses[l-1] - if state := container.LastTerminationState; state.Terminated != nil && state.Terminated.ExitCode == 0 { - return true + container, ok := kubeutil.GetContainerByIndex(pod.Spec.InitContainers, pod.Status.InitContainerStatuses, l-1) + if !ok { + klog.V(4).InfoS("Mismatch between pod spec and status, likely programmer error", "pod", klog.KObj(pod), "containerName", container.Name) + return false } - if state := container.State; state.Terminated != nil && state.Terminated.ExitCode == 0 { - return true + + containerStatus := pod.Status.InitContainerStatuses[l-1] + if kubetypes.IsRestartableInitContainer(&container) { + if containerStatus.State.Running != nil && + containerStatus.Started != nil && *containerStatus.Started { + return true + } + } else { // regular init container + if state := containerStatus.LastTerminationState; state.Terminated != nil && state.Terminated.ExitCode == 0 { + return true + } + if state := containerStatus.State; state.Terminated != nil && state.Terminated.ExitCode == 0 { + return true + } } } // otherwise the pod has no record of being initialized @@ -535,26 +549,50 @@ func initializedContainers(containers []v1.ContainerStatus) []v1.ContainerStatus // checkContainerStateTransition ensures that no container is trying to transition // from a terminated to non-terminated state, which is illegal and indicates a // logical error in the kubelet. -func checkContainerStateTransition(oldStatuses, newStatuses []v1.ContainerStatus, restartPolicy v1.RestartPolicy) error { +func checkContainerStateTransition(oldStatuses, newStatuses *v1.PodStatus, podSpec *v1.PodSpec) error { // If we should always restart, containers are allowed to leave the terminated state - if restartPolicy == v1.RestartPolicyAlways { + if podSpec.RestartPolicy == v1.RestartPolicyAlways { return nil } - for _, oldStatus := range oldStatuses { + for _, oldStatus := range oldStatuses.ContainerStatuses { // Skip any container that wasn't terminated if oldStatus.State.Terminated == nil { continue } // Skip any container that failed but is allowed to restart - if oldStatus.State.Terminated.ExitCode != 0 && restartPolicy == v1.RestartPolicyOnFailure { + if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure { continue } - for _, newStatus := range newStatuses { + for _, newStatus := range newStatuses.ContainerStatuses { if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil { return fmt.Errorf("terminated container %v attempted illegal transition to non-terminated state", newStatus.Name) } } } + + for i, oldStatus := range oldStatuses.InitContainerStatuses { + initContainer, ok := kubeutil.GetContainerByIndex(podSpec.InitContainers, oldStatuses.InitContainerStatuses, i) + if !ok { + return fmt.Errorf("found mismatch between pod spec and status, container: %v", oldStatus.Name) + } + // Skip any restartable init container as it always is allowed to restart + if kubetypes.IsRestartableInitContainer(&initContainer) { + continue + } + // Skip any container that wasn't terminated + if oldStatus.State.Terminated == nil { + continue + } + // Skip any container that failed but is allowed to restart + if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure { + continue + } + for _, newStatus := range newStatuses.InitContainerStatuses { + if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil { + return fmt.Errorf("terminated init container %v attempted illegal transition to non-terminated state", newStatus.Name) + } + } + } return nil } @@ -580,11 +618,7 @@ func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUp } // Check for illegal state transition in containers - if err := checkContainerStateTransition(oldStatus.ContainerStatuses, status.ContainerStatuses, pod.Spec.RestartPolicy); err != nil { - klog.ErrorS(err, "Status update on pod aborted", "pod", klog.KObj(pod)) - return - } - if err := checkContainerStateTransition(oldStatus.InitContainerStatuses, status.InitContainerStatuses, pod.Spec.RestartPolicy); err != nil { + if err := checkContainerStateTransition(&oldStatus, &status, &pod.Spec); err != nil { klog.ErrorS(err, "Status update on pod aborted", "pod", klog.KObj(pod)) return } diff --git a/pkg/kubelet/status/status_manager_test.go b/pkg/kubelet/status/status_manager_test.go index 437b45b26a5..23b9f17f76f 100644 --- a/pkg/kubelet/status/status_manager_test.go +++ b/pkg/kubelet/status/status_manager_test.go @@ -585,6 +585,16 @@ func TestStaticPod(t *testing.T) { func TestTerminatePod(t *testing.T) { syncer := newTestManager(&fake.Clientset{}) testPod := getTestPod() + testPod.Spec.InitContainers = []v1.Container{ + {Name: "init-test-1"}, + {Name: "init-test-2"}, + {Name: "init-test-3"}, + } + testPod.Spec.Containers = []v1.Container{ + {Name: "test-1"}, + {Name: "test-2"}, + {Name: "test-3"}, + } t.Logf("update the pod's status to Failed. TerminatePod should preserve this status update.") firstStatus := getRandomPodStatus() firstStatus.Phase = v1.PodFailed @@ -592,6 +602,9 @@ func TestTerminatePod(t *testing.T) { {Name: "init-test-1"}, {Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, {Name: "init-test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 3}}}, + // TODO: If the last init container had failed, the pod would not have been + // able to start any containers. Maybe, we have to separate this test case + // into two cases, one for init containers and one for containers. } firstStatus.ContainerStatuses = []v1.ContainerStatus{ {Name: "test-1"}, @@ -603,6 +616,16 @@ func TestTerminatePod(t *testing.T) { t.Logf("set the testPod to a pod with Phase running, to simulate a stale pod") testPod.Status = getRandomPodStatus() testPod.Status.Phase = v1.PodRunning + testPod.Status.InitContainerStatuses = []v1.ContainerStatus{ + {Name: "test-1"}, + {Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, + {Name: "init-test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, + } + testPod.Status.ContainerStatuses = []v1.ContainerStatus{ + {Name: "test-1", State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + {Name: "test-2", State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + {Name: "test-3", State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + } syncer.TerminatePod(testPod) @@ -643,6 +666,16 @@ func TestTerminatePod(t *testing.T) { func TestTerminatePodWaiting(t *testing.T) { syncer := newTestManager(&fake.Clientset{}) testPod := getTestPod() + testPod.Spec.InitContainers = []v1.Container{ + {Name: "init-test-1"}, + {Name: "init-test-2"}, + {Name: "init-test-3"}, + } + testPod.Spec.Containers = []v1.Container{ + {Name: "test-1"}, + {Name: "test-2"}, + {Name: "test-3"}, + } t.Logf("update the pod's status to Failed. TerminatePod should preserve this status update.") firstStatus := getRandomPodStatus() firstStatus.Phase = v1.PodFailed @@ -650,6 +683,10 @@ func TestTerminatePodWaiting(t *testing.T) { {Name: "init-test-1"}, {Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, {Name: "init-test-3", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{Reason: "InitTest"}}}, + // TODO: If the last init container had been in a waiting state, it would + // not have been able to start any containers. Maybe, we have to separate + // this test case into two cases, one for init containers and one for + // containers. } firstStatus.ContainerStatuses = []v1.ContainerStatus{ {Name: "test-1"}, @@ -661,6 +698,16 @@ func TestTerminatePodWaiting(t *testing.T) { t.Logf("set the testPod to a pod with Phase running, to simulate a stale pod") testPod.Status = getRandomPodStatus() testPod.Status.Phase = v1.PodRunning + testPod.Status.InitContainerStatuses = []v1.ContainerStatus{ + {Name: "test-1"}, + {Name: "init-test-2", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, + {Name: "init-test-3", State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{Reason: "InitTest", ExitCode: 0}}}, + } + testPod.Status.ContainerStatuses = []v1.ContainerStatus{ + {Name: "test-1", State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + {Name: "test-2", State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + {Name: "test-3", State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + } syncer.TerminatePod(testPod) @@ -809,7 +856,7 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) { name: "uninitialized pod defaults the first init container", pod: newPod(1, 1, func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever - pod.Status.Phase = v1.PodRunning + pod.Status.Phase = v1.PodPending pod.Status.InitContainerStatuses = []v1.ContainerStatus{ {Name: "init-0", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}}, } @@ -826,7 +873,7 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) { name: "uninitialized pod defaults only the first init container", pod: newPod(2, 1, func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever - pod.Status.Phase = v1.PodRunning + pod.Status.Phase = v1.PodPending pod.Status.InitContainerStatuses = []v1.ContainerStatus{ {Name: "init-0", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}}, {Name: "init-1", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}}, @@ -845,7 +892,7 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) { name: "uninitialized pod defaults gaps", pod: newPod(4, 1, func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever - pod.Status.Phase = v1.PodRunning + pod.Status.Phase = v1.PodPending pod.Status.InitContainerStatuses = []v1.ContainerStatus{ {Name: "init-0", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}}, {Name: "init-1", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}}, @@ -868,7 +915,7 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) { name: "failed last container is uninitialized", pod: newPod(3, 1, func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever - pod.Status.Phase = v1.PodRunning + pod.Status.Phase = v1.PodPending pod.Status.InitContainerStatuses = []v1.ContainerStatus{ {Name: "init-0", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}}, {Name: "init-1", State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}}, diff --git a/pkg/kubelet/types/pod_update.go b/pkg/kubelet/types/pod_update.go index 6c7e236fc9a..7f7fc5b799b 100644 --- a/pkg/kubelet/types/pod_update.go +++ b/pkg/kubelet/types/pod_update.go @@ -192,3 +192,13 @@ func IsCriticalPodBasedOnPriority(priority int32) bool { func IsNodeCriticalPod(pod *v1.Pod) bool { return IsCriticalPod(pod) && (pod.Spec.PriorityClassName == scheduling.SystemNodeCritical) } + +// IsRestartableInitContainer returns true if the initContainer has +// ContainerRestartPolicyAlways. +func IsRestartableInitContainer(initContainer *v1.Container) bool { + if initContainer.RestartPolicy == nil { + return false + } + + return *initContainer.RestartPolicy == v1.ContainerRestartPolicyAlways +} diff --git a/pkg/kubelet/util/util.go b/pkg/kubelet/util/util.go index c2969a51126..97933afe39b 100644 --- a/pkg/kubelet/util/util.go +++ b/pkg/kubelet/util/util.go @@ -19,6 +19,7 @@ package util import ( "fmt" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -43,3 +44,16 @@ func GetNodenameForKernel(hostname string, hostDomainName string, setHostnameAsF } return kernelHostname, nil } + +// GetContainerByIndex validates and extracts the container at index "idx" from +// "containers" with respect to "statuses". +// It returns true if the container is valid, else returns false. +func GetContainerByIndex(containers []v1.Container, statuses []v1.ContainerStatus, idx int) (v1.Container, bool) { + if idx < 0 || idx >= len(containers) || idx >= len(statuses) { + return v1.Container{}, false + } + if statuses[idx].Name != containers[idx].Name { + return v1.Container{}, false + } + return containers[idx], true +}