From 7ef2d674e247f8d11937118389a8d8194a13d062 Mon Sep 17 00:00:00 2001 From: Gunju Kim Date: Wed, 28 Jun 2023 00:21:09 +0900 Subject: [PATCH] Allow restartable init containers to have livenessProbe --- pkg/apis/core/validation/validation.go | 25 +++++--- pkg/apis/core/validation/validation_test.go | 31 +++++++++- .../kuberuntime/kuberuntime_container.go | 23 ++++++- .../kuberuntime/kuberuntime_manager_test.go | 60 +++++++++++++++++++ pkg/kubelet/prober/prober_manager_test.go | 2 + 5 files changed, 130 insertions(+), 11 deletions(-) diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index 6af9e0797c5..1ce2b15f602 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -2816,6 +2816,19 @@ func validatePodResourceClaimSource(claimSource core.ClaimSource, fldPath *field return allErrs } +func validateLivenessProbe(probe *core.Probe, fldPath *field.Path) field.ErrorList { + allErrs := field.ErrorList{} + + if probe == nil { + return allErrs + } + allErrs = append(allErrs, validateProbe(probe, fldPath)...) + if probe.SuccessThreshold != 1 { + allErrs = append(allErrs, field.Invalid(fldPath.Child("successThreshold"), probe.SuccessThreshold, "must be 1")) + } + return allErrs +} + func validateReadinessProbe(probe *core.Probe, fldPath *field.Path) field.ErrorList { allErrs := field.ErrorList{} @@ -3232,10 +3245,7 @@ func validateInitContainers(containers []core.Container, regularContainers []cor if ctr.Lifecycle != nil { allErrs = append(allErrs, field.Forbidden(idxPath.Child("lifecycle"), "may not be set for init containers")) } - // TODO: Allow restartable init containers to have a liveness probe. - if ctr.LivenessProbe != nil { - allErrs = append(allErrs, field.Forbidden(idxPath.Child("livenessProbe"), "may not be set for init containers")) - } + allErrs = append(allErrs, validateLivenessProbe(ctr.LivenessProbe, idxPath.Child("livenessProbe"))...) allErrs = append(allErrs, validateReadinessProbe(ctr.ReadinessProbe, idxPath.Child("readinessProbe"))...) allErrs = append(allErrs, validateStartupProbe(ctr.StartupProbe, idxPath.Child("startupProbe"))...) @@ -3245,7 +3255,7 @@ func validateInitContainers(containers []core.Container, regularContainers []cor allErrs = append(allErrs, field.Forbidden(idxPath.Child("lifecycle"), "may not be set for init containers")) } if ctr.LivenessProbe != nil { - allErrs = append(allErrs, field.Forbidden(idxPath.Child("livenessProbe"), "may not be set for init containers")) + allErrs = append(allErrs, field.Forbidden(idxPath.Child("livenessProbe"), "may not be set for init containers without restartPolicy=Always")) } if ctr.ReadinessProbe != nil { allErrs = append(allErrs, field.Forbidden(idxPath.Child("readinessProbe"), "may not be set for init containers without restartPolicy=Always")) @@ -3365,10 +3375,7 @@ func validateContainers(containers []core.Container, volumes map[string]core.Vol if ctr.Lifecycle != nil { allErrs = append(allErrs, validateLifecycle(ctr.Lifecycle, path.Child("lifecycle"))...) } - allErrs = append(allErrs, validateProbe(ctr.LivenessProbe, path.Child("livenessProbe"))...) - if ctr.LivenessProbe != nil && ctr.LivenessProbe.SuccessThreshold != 1 { - allErrs = append(allErrs, field.Invalid(path.Child("livenessProbe", "successThreshold"), ctr.LivenessProbe.SuccessThreshold, "must be 1")) - } + allErrs = append(allErrs, validateLivenessProbe(ctr.LivenessProbe, path.Child("livenessProbe"))...) allErrs = append(allErrs, validateReadinessProbe(ctr.ReadinessProbe, path.Child("readinessProbe"))...) allErrs = append(allErrs, validateStartupProbe(ctr.StartupProbe, path.Child("startupProbe"))...) diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index 17610b0045a..745260103a9 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -8169,6 +8169,14 @@ func TestValidateInitContainers(t *testing.T) { ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File", RestartPolicy: &containerRestartPolicyAlways, + LivenessProbe: &core.Probe{ + ProbeHandler: core.ProbeHandler{ + TCPSocket: &core.TCPSocketAction{ + Port: intstr.FromInt32(80), + }, + }, + SuccessThreshold: 1, + }, ReadinessProbe: &core.Probe{ ProbeHandler: core.ProbeHandler{ TCPSocket: &core.TCPSocketAction{ @@ -8178,7 +8186,9 @@ func TestValidateInitContainers(t *testing.T) { }, StartupProbe: &core.Probe{ ProbeHandler: core.ProbeHandler{ - TCPSocket: &core.TCPSocketAction{Port: intstr.FromInt(80)}, + TCPSocket: &core.TCPSocketAction{ + Port: intstr.FromInt(80), + }, }, SuccessThreshold: 1, }, @@ -8418,6 +8428,25 @@ func TestValidateInitContainers(t *testing.T) { }, }}, field.ErrorList{{Type: field.ErrorTypeInvalid, Field: "initContainers[0].readinessProbe.terminationGracePeriodSeconds", BadValue: utilpointer.Int64(10)}}, + }, { + "invalid liveness probe, successThreshold != 1", + line(), + []core.Container{{ + Name: "live-123", + Image: "image", + ImagePullPolicy: "IfNotPresent", + TerminationMessagePolicy: "File", + RestartPolicy: &containerRestartPolicyAlways, + LivenessProbe: &core.Probe{ + ProbeHandler: core.ProbeHandler{ + TCPSocket: &core.TCPSocketAction{ + Port: intstr.FromInt32(80), + }, + }, + SuccessThreshold: 2, + }, + }}, + field.ErrorList{{Type: field.ErrorTypeInvalid, Field: "initContainers[0].livenessProbe.successThreshold", BadValue: int32(2)}}, }, } for _, tc := range errorCases { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 5628620d72b..eb25241cc11 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -877,7 +877,7 @@ func hasAnyRegularContainerCreated(pod *v1.Pod, podStatus *kubecontainer.PodStat // The actions include: // - Start the first init container that has not been started. // - Restart all restartable init containers that have started but are not running. -// - Kill the restartable init containers that have failed the startup probe. +// - Kill the restartable init containers that are not alive or started. func (m *kubeGenericRuntimeManager) computeInitContainerActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus, changes *podActions) bool { if len(pod.Spec.InitContainers) == 0 { return true @@ -960,6 +960,27 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(pod *v1.Pod, pod // this init container is initialized for the first time, start the next one changes.InitContainersToStart = append(changes.InitContainersToStart, i+1) } + + // A restartable init container does not have to take into account its + // liveness probe when it determines to start the next init container. + if container.LivenessProbe != nil { + liveness, found := m.livenessManager.Get(status.ID) + if !found { + // If the liveness probe has not been run, wait for it. + break + } + if liveness == proberesults.Failure { + // If the restartable init container failed the liveness probe, + // restart it. + changes.ContainersToKill[status.ID] = containerToKillInfo{ + name: container.Name, + container: container, + message: fmt.Sprintf("Init container %s failed liveness probe", container.Name), + reason: reasonLivenessProbe, + } + changes.InitContainersToStart = append(changes.InitContainersToStart, i) + } + } } else { // init container // nothing do to but wait for it to finish break diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index f9fe3a9c666..83df1ec0271 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -1518,6 +1518,62 @@ func TestComputePodActionsWithRestartableInitContainers(t *testing.T) { ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, + "livenessProbe has not been run; start the nothing": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + m.livenessManager.Remove(status.ContainerStatuses[1].ID) + status.ContainerStatuses = status.ContainerStatuses[:2] + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "livenessProbe in progress; start the next": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + m.livenessManager.Set(status.ContainerStatuses[1].ID, proberesults.Unknown, basePod) + status.ContainerStatuses = status.ContainerStatuses[:2] + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + resetStatusFn: func(status *kubecontainer.PodStatus) { + m.livenessManager.Remove(status.ContainerStatuses[1].ID) + }, + }, + "livenessProbe has completed; start the next": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + status.ContainerStatuses = status.ContainerStatuses[:2] + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, + "kill and recreate the restartable init container if the liveness check has failed": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, + mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { + m.livenessManager.Set(status.ContainerStatuses[2].ID, proberesults.Failure, basePod) + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{2}), + ContainersToStart: []int{0, 1, 2}, + }, + resetStatusFn: func(status *kubecontainer.PodStatus) { + m.livenessManager.Remove(status.ContainerStatuses[2].ID) + }, + }, "startupProbe has not been run; do nothing": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { @@ -1740,7 +1796,9 @@ func TestComputePodActionsWithRestartableInitContainers(t *testing.T) { }, } { pod, status := makeBasePodAndStatusWithRestartableInitContainers() + m.livenessManager.Set(status.ContainerStatuses[1].ID, proberesults.Success, basePod) m.startupManager.Set(status.ContainerStatuses[1].ID, proberesults.Success, basePod) + m.livenessManager.Set(status.ContainerStatuses[2].ID, proberesults.Success, basePod) m.startupManager.Set(status.ContainerStatuses[2].ID, proberesults.Success, basePod) if test.mutatePodFn != nil { test.mutatePodFn(pod) @@ -1769,12 +1827,14 @@ func makeBasePodAndStatusWithRestartableInitContainers() (*v1.Pod, *kubecontaine Name: "restartable-init-2", Image: "bar-image", RestartPolicy: &containerRestartPolicyAlways, + LivenessProbe: &v1.Probe{}, StartupProbe: &v1.Probe{}, }, { Name: "restartable-init-3", Image: "bar-image", RestartPolicy: &containerRestartPolicyAlways, + LivenessProbe: &v1.Probe{}, StartupProbe: &v1.Probe{}, }, } diff --git a/pkg/kubelet/prober/prober_manager_test.go b/pkg/kubelet/prober/prober_manager_test.go index 9c8badbab52..16ef20a5371 100644 --- a/pkg/kubelet/prober/prober_manager_test.go +++ b/pkg/kubelet/prober/prober_manager_test.go @@ -154,6 +154,7 @@ func TestAddRemovePodsWithRestartableInitContainer(t *testing.T) { { desc: "pod with sidecar (sidecar containers feature enabled)", probePaths: []probeKey{ + {"restartable_init_container_pod", "restartable-init", liveness}, {"restartable_init_container_pod", "restartable-init", readiness}, {"restartable_init_container_pod", "restartable-init", startup}, }, @@ -182,6 +183,7 @@ func TestAddRemovePodsWithRestartableInitContainer(t *testing.T) { Name: "init", }, { Name: "restartable-init", + LivenessProbe: defaultProbe, ReadinessProbe: defaultProbe, StartupProbe: defaultProbe, RestartPolicy: containerRestartPolicy(tc.enableSidecarContainers),