Add node serial e2e tests that simulate the kubelet restart

This adds node e2e tests to make sure a completed init container is not
restarted due to the kubelet restart.
This commit is contained in:
Gunju Kim 2023-09-08 20:33:44 +09:00
parent 77e12aeca9
commit 45a243e102
No known key found for this signature in database
GPG Key ID: 9300A528F3F0DAB7
2 changed files with 353 additions and 1 deletions

View File

@ -351,12 +351,13 @@ func parseOutput(ctx context.Context, f *framework.Framework, pod *v1.Pod) conta
sc := bufio.NewScanner(&buf)
var res containerOutputList
for sc.Scan() {
log := sc.Text()
fields := strings.Fields(sc.Text())
if len(fields) < 3 {
framework.ExpectNoError(fmt.Errorf("%v should have at least length 3", fields))
}
timestamp, err := time.Parse(time.RFC3339, fields[0])
framework.ExpectNoError(err)
framework.ExpectNoError(err, "Failed to parse the timestamp, log: %q", log)
res = append(res, containerOutput{
timestamp: timestamp,
containerName: fields[1],

View File

@ -19,6 +19,7 @@ package e2enode
import (
"context"
"fmt"
"strings"
"time"
"github.com/onsi/ginkgo/v2"
@ -1047,6 +1048,356 @@ var _ = SIGDescribe(framework.WithSerial(), "Containers Lifecycle", func() {
framework.ExpectNoError(init2Restarted.IsBefore(init3Restarted))
framework.ExpectNoError(init3Restarted.IsBefore(regular1Restarted))
})
ginkgo.When("a Pod is initialized and running", func() {
var client *e2epod.PodClient
var err error
var pod *v1.Pod
init1 := "init-1"
init2 := "init-2"
init3 := "init-3"
regular1 := "regular-1"
ginkgo.BeforeEach(func(ctx context.Context) {
pod = &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "initialized-pod",
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyAlways,
InitContainers: []v1.Container{
{
Name: init1,
Image: busyboxImage,
Command: ExecCommand(init1, execCommand{
Delay: 1,
ExitCode: 0,
}),
},
{
Name: init2,
Image: busyboxImage,
Command: ExecCommand(init2, execCommand{
Delay: 1,
ExitCode: 0,
}),
},
{
Name: init3,
Image: busyboxImage,
Command: ExecCommand(init3, execCommand{
Delay: 1,
ExitCode: 0,
}),
},
},
Containers: []v1.Container{
{
Name: regular1,
Image: busyboxImage,
Command: ExecCommand(regular1, execCommand{
Delay: 300,
ExitCode: 0,
}),
},
},
},
}
preparePod(pod)
client = e2epod.NewPodClient(f)
pod = client.Create(ctx, pod)
ginkgo.By("Waiting for the pod to be initialized and run")
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
framework.ExpectNoError(err)
})
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be started"))
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
for _, status := range pod.Status.InitContainerStatuses {
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
continue
}
if status.RestartCount > 0 {
return false
}
}
return true
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
ginkgo.By("Parsing results")
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
results := parseOutput(ctx, f, pod)
ginkgo.By("Analyzing results")
framework.ExpectNoError(results.StartsBefore(init1, init2))
framework.ExpectNoError(results.ExitsBefore(init1, init2))
framework.ExpectNoError(results.StartsBefore(init2, init3))
framework.ExpectNoError(results.ExitsBefore(init2, init3))
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
gomega.Expect(pod.Status.InitContainerStatuses[2].RestartCount).To(gomega.Equal(int32(0)))
})
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
ginkgo.By("removing the completed init container statuses from the container runtime")
rs, _, err := getCRIClient()
framework.ExpectNoError(err)
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
for _, c := range pod.Status.InitContainerStatuses {
if c.State.Terminated == nil || c.State.Terminated.ExitCode != 0 {
continue
}
tokens := strings.Split(c.ContainerID, "://")
gomega.Expect(tokens).To(gomega.HaveLen(2))
containerID := tokens[1]
err := rs.RemoveContainer(ctx, containerID)
framework.ExpectNoError(err)
}
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
for _, status := range pod.Status.InitContainerStatuses {
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
continue
}
if status.RestartCount > 0 {
return false
}
}
return true
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
ginkgo.By("Analyzing results")
// Cannot analyze the results with the container logs as the
// container statuses have been removed from container runtime.
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
gomega.Expect(pod.Status.InitContainerStatuses[2].RestartCount).To(gomega.Equal(int32(0)))
gomega.Expect(pod.Status.ContainerStatuses[0].State.Running).ToNot(gomega.BeNil())
})
})
ginkgo.When("a Pod is initializing the long-running init container", func() {
var client *e2epod.PodClient
var err error
var pod *v1.Pod
init1 := "init-1"
init2 := "init-2"
longRunningInit3 := "long-running-init-3"
regular1 := "regular-1"
ginkgo.BeforeEach(func(ctx context.Context) {
pod = &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "initializing-long-running-init-container",
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyAlways,
InitContainers: []v1.Container{
{
Name: init1,
Image: busyboxImage,
Command: ExecCommand(init1, execCommand{
Delay: 1,
ExitCode: 0,
}),
},
{
Name: init2,
Image: busyboxImage,
Command: ExecCommand(init2, execCommand{
Delay: 1,
ExitCode: 0,
}),
},
{
Name: longRunningInit3,
Image: busyboxImage,
Command: ExecCommand(longRunningInit3, execCommand{
Delay: 300,
ExitCode: 0,
}),
},
},
Containers: []v1.Container{
{
Name: regular1,
Image: busyboxImage,
Command: ExecCommand(regular1, execCommand{
Delay: 300,
ExitCode: 0,
}),
},
},
},
}
preparePod(pod)
client = e2epod.NewPodClient(f)
pod = client.Create(ctx, pod)
ginkgo.By("Waiting for the pod to be initializing the long-running init container")
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, pod.Namespace, pod.Name, "long-running init container initializing", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
for _, c := range pod.Status.InitContainerStatuses {
if c.Name != longRunningInit3 {
continue
}
if c.State.Running != nil && (c.Started != nil && *c.Started == true) {
return true, nil
}
}
return false, nil
})
framework.ExpectNoError(err)
})
ginkgo.It("should not restart any completed init container after the kubelet restart", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
for _, status := range pod.Status.InitContainerStatuses {
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
continue
}
if status.RestartCount > 0 {
return false
}
}
return true
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
ginkgo.By("Parsing results")
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
results := parseOutput(ctx, f, pod)
ginkgo.By("Analyzing results")
framework.ExpectNoError(results.StartsBefore(init1, init2))
framework.ExpectNoError(results.ExitsBefore(init1, init2))
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
})
ginkgo.It("should not restart any completed init container, even after the completed init container statuses have been removed and the kubelet restarted", func(ctx context.Context) {
ginkgo.By("stopping the kubelet")
startKubelet := stopKubelet()
// wait until the kubelet health check will fail
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalseBecause("kubelet should be stopped"))
ginkgo.By("removing the completed init container statuses from the container runtime")
rs, _, err := getCRIClient()
framework.ExpectNoError(err)
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
for _, c := range pod.Status.InitContainerStatuses {
if c.State.Terminated == nil || c.State.Terminated.ExitCode != 0 {
continue
}
tokens := strings.Split(c.ContainerID, "://")
gomega.Expect(tokens).To(gomega.HaveLen(2))
containerID := tokens[1]
err := rs.RemoveContainer(ctx, containerID)
framework.ExpectNoError(err)
}
ginkgo.By("restarting the kubelet")
startKubelet()
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)
}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrueBecause("kubelet should be restarted"))
ginkgo.By("ensuring that no completed init container is restarted")
gomega.Consistently(ctx, func() bool {
pod, err = client.Get(ctx, pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
for _, status := range pod.Status.InitContainerStatuses {
if status.State.Terminated == nil || status.State.Terminated.ExitCode != 0 {
continue
}
if status.RestartCount > 0 {
return false
}
}
return true
}, 1*time.Minute, f.Timeouts.Poll).Should(gomega.BeTrueBecause("no completed init container should be restarted"))
ginkgo.By("Analyzing results")
// Cannot analyze the results with the container logs as the
// container statuses have been removed from container runtime.
gomega.Expect(pod.Status.InitContainerStatuses[0].RestartCount).To(gomega.Equal(int32(0)))
gomega.Expect(pod.Status.InitContainerStatuses[1].RestartCount).To(gomega.Equal(int32(0)))
})
})
})
var _ = SIGDescribe(nodefeature.SidecarContainers, "Containers Lifecycle", func() {