From cd83ff032ca5a83822e1b223da9f9cede9dbe964 Mon Sep 17 00:00:00 2001 From: Gunju Kim Date: Mon, 5 Aug 2024 19:49:42 +0900 Subject: [PATCH] Restart the init container to not be stuck in created state The main sync loop should have created and started the container in one step. If the init container is in the 'created' state, it's likely that the container runtime failed to start it. To prevent the container from getting stuck in the 'created' state, restart it. --- .../kuberuntime/kuberuntime_container.go | 7 ++++++- .../kuberuntime/kuberuntime_manager_test.go | 20 +++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 3b534f1536f..572df3a4ff0 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -1055,7 +1055,12 @@ func (m *kubeGenericRuntimeManager) computeInitContainerActions(pod *v1.Pod, pod switch status.State { case kubecontainer.ContainerStateCreated: - // nothing to do but wait for it to start + // The main sync loop should have created and started the container + // in one step. If the init container is in the 'created' state, + // it is likely that the container runtime failed to start it. To + // prevent the container from getting stuck in the 'created' state, + // restart it. + changes.InitContainersToStart = append(changes.InitContainersToStart, i) case kubecontainer.ContainerStateRunning: if !types.IsRestartableInitContainer(container) { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index b994ba30c8c..3d5f8bcaaa6 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -1168,6 +1168,17 @@ func TestComputePodActions(t *testing.T) { ContainersToStart: []int{1}, }, }, + "Restart the container if the container is in created state": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.ContainerStatuses[1].State = kubecontainer.ContainerStateCreated + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, + ContainersToStart: []int{1}, + }, + }, } { pod, status := makeBasePodAndStatus() if test.mutatePodFn != nil { @@ -1525,12 +1536,17 @@ func TestComputePodActionsWithRestartableInitContainers(t *testing.T) { ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, - "initialization in progress; do nothing": { + "an init container is stuck in the created state; restart it": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, mutateStatusFn: func(pod *v1.Pod, status *kubecontainer.PodStatus) { status.ContainerStatuses[2].State = kubecontainer.ContainerStateCreated }, - actions: noAction, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + InitContainersToStart: []int{2}, + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, }, "restartable init container has started; start the next": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways },