From 4363a8453cef120fc1406c1d9464ae214c3aaba9 Mon Sep 17 00:00:00 2001 From: Gunju Kim Date: Mon, 5 Aug 2024 19:49:42 +0900 Subject: [PATCH] Restart the init container to not be stuck in created state The main sync loop should have created and started the container in one step. If the init container is in the 'created' state, it's likely that the container runtime failed to start it. To prevent the container from getting stuck in the 'created' state, restart it. --- .../kuberuntime/kuberuntime_container.go | 7 ++++++- .../kuberuntime/kuberuntime_manager_test.go | 20 +++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index b049540d0d3..a9ab3ef4a33 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -1084,7 +1084,12 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(pod *v1.Pod, pod switch status.State { case kubecontainer.ContainerStateCreated: - // nothing to do but wait for it to start + // The main sync loop should have created and started the container + // in one step. If the init container is in the 'created' state, + // it is likely that the container runtime failed to start it. To + // prevent the container from getting stuck in the 'created' state, + // restart it. + changes.InitContainersToStart = append(changes.InitContainersToStart, i) case kubecontainer.ContainerStateRunning: if !types.IsRestartableInitContainer(container) { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 732a6f7f1c8..949b3421e06 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -1170,6 +1170,17 @@ func TestComputePodActions(t *testing.T) { ContainersToStart: []int{1}, }, }, + "Restart the container if the container is in created state": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[1].State = kubecontainer.ContainerStateCreated + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, + ContainersToStart: []int{1}, + }, + }, } { pod, status := makeBasePodAndStatus() if test.mutatePodFn != nil { @@ -1527,12 +1538,17 @@ func TestComputePodActionsWithRestartableInitContainers(t *testing.T) { ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, - "initialization in progress; do nothing": { + "an init container is stuck in the created state; restart it": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { status.ContainerStatuses[2].State = kubecontainer.ContainerStateCreated }, - actions: noAction, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, }, "restartable init container has started; start the next": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },