From b2b082f54f5f4177231d8e56283f41295b3cdd53 Mon Sep 17 00:00:00 2001 From: Quan Tian Date: Tue, 30 Jun 2020 00:20:12 +0800 Subject: [PATCH] Don't create a new sandbox for pod with RestartPolicyOnFailure if all containers succeeded The kubelet would attempt to create a new sandbox for a pod whose RestartPolicy is OnFailure even after all container succeeded. It caused unnecessary CRI and CNI calls, confusing logs and conflicts between the routine that creates the new sandbox and the routine that kills the Pod. This patch checks the containers to start and stops creating sandbox if no container is supposed to start. --- .../kuberuntime/kuberuntime_manager.go | 27 +++++-- .../kuberuntime/kuberuntime_manager_test.go | 56 +++++++++++++ test/e2e/node/BUILD | 1 + test/e2e/node/pods.go | 80 +++++++++++++++++++ 4 files changed, 156 insertions(+), 8 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index eebba49bbbe..9b48b022ed7 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -499,19 +499,30 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku changes.CreateSandbox = false return changes } + + // Get the containers to start, excluding the ones that succeeded if RestartPolicy is OnFailure. + var containersToStart []int + for idx, c := range pod.Spec.Containers { + if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure && containerSucceeded(&c, podStatus) { + continue + } + containersToStart = append(containersToStart, idx) + } + // We should not create a sandbox for a Pod if initialization is done and there is no container to start. + if len(containersToStart) == 0 { + _, _, done := findNextInitContainerToRun(pod, podStatus) + if done { + changes.CreateSandbox = false + return changes + } + } + if len(pod.Spec.InitContainers) != 0 { // Pod has init containers, return the first one. changes.NextInitContainerToStart = &pod.Spec.InitContainers[0] return changes } - // Start all containers by default but exclude the ones that succeeded if - // RestartPolicy is OnFailure. - for idx, c := range pod.Spec.Containers { - if containerSucceeded(&c, podStatus) && pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure { - continue - } - changes.ContainersToStart = append(changes.ContainersToStart, idx) - } + changes.ContainersToStart = containersToStart return changes } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index e8e9512a73b..64095cad770 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -908,6 +908,29 @@ func TestComputePodActions(t *testing.T) { ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, }, }, + "Verify we do not create a pod sandbox if no ready sandbox for pod with RestartPolicy=OnFailure and all containers succeeded": { + mutatePodFn: func(pod *v1.Pod) { + pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure + }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + // no ready sandbox + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.SandboxStatuses[0].Metadata.Attempt = uint32(1) + // all containers succeeded + for i := range status.ContainerStatuses { + status.ContainerStatuses[i].State = kubecontainer.ContainerStateExited + status.ContainerStatuses[i].ExitCode = 0 + } + }, + actions: podActions{ + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(2), + CreateSandbox: false, + KillPod: true, + ContainersToStart: []int{}, + ContainersToKill: map[kubecontainer.ContainerID]containerToKillInfo{}, + }, + }, "Verify we create a pod sandbox if no ready sandbox for pod with RestartPolicy=Never and no containers have ever been created": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyNever @@ -1137,6 +1160,22 @@ func TestComputePodActionsWithInitContainers(t *testing.T) { ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), }, }, + "Pod sandbox not ready, init container failed, and RestartPolicy == OnFailure; create a new pod sandbox": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses[2].ExitCode = 137 + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + NextInitContainerToStart: &basePod.Spec.InitContainers[0], + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, } { pod, status := makeBasePodAndStatusWithInitContainers() if test.mutatePodFn != nil { @@ -1262,6 +1301,23 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) { EphemeralContainersToStart: []int{0}, }, }, + "Create a new pod sandbox if the pod sandbox is dead, init container failed and RestartPolicy == OnFailure": { + mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure }, + mutateStatusFn: func(status *kubecontainer.PodStatus) { + status.SandboxStatuses[0].State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY + status.ContainerStatuses = status.ContainerStatuses[3:] + status.ContainerStatuses[0].ExitCode = 137 + }, + actions: podActions{ + KillPod: true, + CreateSandbox: true, + SandboxID: baseStatus.SandboxStatuses[0].Id, + Attempt: uint32(1), + NextInitContainerToStart: &basePod.Spec.InitContainers[0], + ContainersToStart: []int{}, + ContainersToKill: getKillMapWithInitContainers(basePod, baseStatus, []int{}), + }, + }, "Kill pod and do not restart ephemeral container if the pod sandbox is dead": { mutatePodFn: func(pod *v1.Pod) { pod.Spec.RestartPolicy = v1.RestartPolicyAlways }, mutateStatusFn: func(status *kubecontainer.PodStatus) { diff --git a/test/e2e/node/BUILD b/test/e2e/node/BUILD index 68a9a95b4e8..b087ebbf712 100644 --- a/test/e2e/node/BUILD +++ b/test/e2e/node/BUILD @@ -26,6 +26,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", + "//pkg/kubelet/events:go_default_library", "//pkg/kubelet/runtimeclass/testing:go_default_library", "//pkg/master/ports:go_default_library", "//pkg/util/slice:go_default_library", diff --git a/test/e2e/node/pods.go b/test/e2e/node/pods.go index f6a472d28cd..d308e82e003 100644 --- a/test/e2e/node/pods.go +++ b/test/e2e/node/pods.go @@ -31,10 +31,12 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/watch" + "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/test/e2e/framework" e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" @@ -445,5 +447,83 @@ var _ = SIGDescribe("Pods Extended", func() { framework.Failf("%d errors:\n%v", len(errs), strings.Join(messages, "\n")) } }) + + }) + + framework.KubeDescribe("Pod Container lifecycle", func() { + var podClient *framework.PodClient + ginkgo.BeforeEach(func() { + podClient = f.PodClient() + }) + + ginkgo.It("should not create extra sandbox if all containers are done", func() { + ginkgo.By("creating the pod that should always exit 0") + + name := "pod-always-succeed" + string(uuid.NewUUID()) + image := imageutils.GetE2EImage(imageutils.BusyBox) + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyOnFailure, + InitContainers: []v1.Container{ + { + Name: "foo", + Image: image, + Command: []string{ + "/bin/true", + }, + }, + }, + Containers: []v1.Container{ + { + Name: "bar", + Image: image, + Command: []string{ + "/bin/true", + }, + }, + }, + }, + } + + ginkgo.By("submitting the pod to kubernetes") + createdPod := podClient.Create(pod) + defer func() { + ginkgo.By("deleting the pod") + podClient.Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) + }() + + framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)) + + var eventList *v1.EventList + var err error + ginkgo.By("Getting events about the pod") + framework.ExpectNoError(wait.Poll(time.Second*2, time.Second*60, func() (bool, error) { + selector := fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.uid": string(createdPod.UID), + "involvedObject.namespace": f.Namespace.Name, + "source": "kubelet", + }.AsSelector().String() + options := metav1.ListOptions{FieldSelector: selector} + eventList, err = f.ClientSet.CoreV1().Events(f.Namespace.Name).List(context.TODO(), options) + if err != nil { + return false, err + } + if len(eventList.Items) > 0 { + return true, nil + } + return false, nil + })) + + ginkgo.By("Checking events about the pod") + for _, event := range eventList.Items { + if event.Reason == events.SandboxChanged { + framework.Fail("Unexpected SandboxChanged event") + } + } + }) }) })