From 230124f3d40c0106cd74c7e605b57da2a1396ac2 Mon Sep 17 00:00:00 2001 From: Ryan Phillips Date: Mon, 23 May 2022 17:12:26 -0500 Subject: [PATCH] kubelet: add e2e test to verify probe readiness --- pkg/kubelet/kubelet.go | 5 +- pkg/kubelet/prober/prober_manager.go | 6 +- test/e2e/common/node/container_probe.go | 168 ++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 7 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index e5fe6c5e078..c8640789ba0 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1769,14 +1769,11 @@ func (kl *Kubelet) syncTerminatingPod(ctx context.Context, pod *v1.Pod, podStatu kl.statusManager.SetPodStatus(pod, apiPodStatus) if gracePeriod != nil { - if *gracePeriod <= 1 { - // If we plan to terminate quickly, stop probes immediately, otherwise we will wait until the pod is completely done - kl.probeManager.RemovePod(pod) - } klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", *gracePeriod) } else { klog.V(4).InfoS("Pod terminating with grace period", "pod", klog.KObj(pod), "podUID", pod.UID, "gracePeriod", nil) } + kl.probeManager.StopLivenessAndStartup(pod) p := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus) diff --git a/pkg/kubelet/prober/prober_manager.go b/pkg/kubelet/prober/prober_manager.go index 5c4699bb84d..bb073688f89 100644 --- a/pkg/kubelet/prober/prober_manager.go +++ b/pkg/kubelet/prober/prober_manager.go @@ -163,7 +163,7 @@ func (m *manager) AddPod(pod *v1.Pod) { if c.StartupProbe != nil { key.probeType = startup if _, ok := m.workers[key]; ok { - klog.V(4).ErrorS(nil, "Startup probe already exists for container", + klog.V(8).ErrorS(nil, "Startup probe already exists for container", "pod", klog.KObj(pod), "containerName", c.Name) return } @@ -175,7 +175,7 @@ func (m *manager) AddPod(pod *v1.Pod) { if c.ReadinessProbe != nil { key.probeType = readiness if _, ok := m.workers[key]; ok { - klog.V(4).ErrorS(nil, "Readiness probe already exists for container", + klog.V(8).ErrorS(nil, "Readiness probe already exists for container", "pod", klog.KObj(pod), "containerName", c.Name) return } @@ -187,7 +187,7 @@ func (m *manager) AddPod(pod *v1.Pod) { if c.LivenessProbe != nil { key.probeType = liveness if _, ok := m.workers[key]; ok { - klog.V(4).ErrorS(nil, "Liveness probe already exists for container", + klog.V(8).ErrorS(nil, "Liveness probe already exists for container", "pod", klog.KObj(pod), "containerName", c.Name) return } diff --git a/test/e2e/common/node/container_probe.go b/test/e2e/common/node/container_probe.go index 45ea0b74bc4..c5d981d9003 100644 --- a/test/e2e/common/node/container_probe.go +++ b/test/e2e/common/node/container_probe.go @@ -18,9 +18,11 @@ package node import ( "context" + "errors" "fmt" "net" "net/url" + "strings" "time" v1 "k8s.io/api/core/v1" @@ -28,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/uuid" + "k8s.io/apimachinery/pkg/util/wait" podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/kubelet/events" "k8s.io/kubernetes/test/e2e/framework" @@ -551,6 +554,171 @@ var _ = SIGDescribe("Probing container", func() { pod := gRPCServerPodSpec(nil, livenessProbe, "etcd") RunLivenessTest(f, pod, 1, defaultObservationTimeout) }) + + ginkgo.It("should mark readiness on pods to false while pod is in progress of terminating when a pod has a readiness probe", func() { + podName := "probe-test-" + string(uuid.NewUUID()) + podClient := f.PodClient() + terminationGracePeriod := int64(30) + script := ` +_term() { + rm -f /tmp/ready + sleep 30 + exit 0 +} +trap _term SIGTERM + +touch /tmp/ready + +while true; do + echo \"hello\" + sleep 10 +done + ` + + // Create Pod + podClient.Create(&v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: imageutils.GetE2EImage(imageutils.Agnhost), + Name: podName, + Command: []string{"/bin/bash"}, + Args: []string{"-c", script}, + ReadinessProbe: &v1.Probe{ + ProbeHandler: v1.ProbeHandler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/ready"}, + }, + }, + FailureThreshold: 1, + InitialDelaySeconds: 5, + PeriodSeconds: 2, + }, + }, + }, + TerminationGracePeriodSeconds: &terminationGracePeriod, + }, + }) + + // verify pods are running and ready + err := e2epod.WaitForPodsRunningReady(f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart, map[string]string{}) + framework.ExpectNoError(err) + + // Shutdown pod. Readiness should change to false + podClient.Delete(context.Background(), podName, metav1.DeleteOptions{}) + err = wait.PollImmediate(framework.Poll, f.Timeouts.PodDelete, func() (bool, error) { + pod, err := podClient.Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + return false, err + } + // verify the pod ready status has reported not ready + return podutil.IsPodReady(pod) == false, nil + }) + framework.ExpectNoError(err) + }) + + ginkgo.It("should mark readiness on pods to false and disable liveness probes while pod is in progress of terminating", func() { + podName := "probe-test-" + string(uuid.NewUUID()) + podClient := f.PodClient() + terminationGracePeriod := int64(30) + script := ` +_term() { + rm -f /tmp/ready + rm -f /tmp/liveness + sleep 20 + exit 0 +} +trap _term SIGTERM + +touch /tmp/ready +touch /tmp/liveness + +while true; do + echo \"hello\" + sleep 10 +done +` + + // Create Pod + podClient.Create(&v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: imageutils.GetE2EImage(imageutils.Agnhost), + Name: podName, + Command: []string{"/bin/bash"}, + Args: []string{"-c", script}, + ReadinessProbe: &v1.Probe{ + ProbeHandler: v1.ProbeHandler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/ready"}, + }, + }, + FailureThreshold: 1, + // delay startup to make sure the script script has + // time to create the ready+liveness files + InitialDelaySeconds: 5, + PeriodSeconds: 2, + }, + LivenessProbe: &v1.Probe{ + ProbeHandler: v1.ProbeHandler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", "/tmp/liveness"}, + }, + }, + FailureThreshold: 1, + // delay startup to make sure the script script has + // time to create the ready+liveness files + InitialDelaySeconds: 5, + PeriodSeconds: 1, + }, + }, + }, + TerminationGracePeriodSeconds: &terminationGracePeriod, + }, + }) + + // verify pods are running and ready + err := e2epod.WaitForPodsRunningReady(f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart, map[string]string{}) + framework.ExpectNoError(err) + + // Shutdown pod. Readiness should change to false + podClient.Delete(context.Background(), podName, metav1.DeleteOptions{}) + + // Wait for pod to go unready + err = wait.PollImmediate(framework.Poll, f.Timeouts.PodDelete, func() (bool, error) { + pod, err := podClient.Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + return false, err + } + // verify the pod ready status has reported not ready + return podutil.IsPodReady(pod) == false, nil + }) + framework.ExpectNoError(err) + + // Verify there are zero liveness failures since they are turned off + // during pod termination + gomega.Consistently(func() (bool, error) { + items, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(context.Background(), metav1.ListOptions{}) + framework.ExpectNoError(err) + for _, event := range items.Items { + // Search only for the pod we are interested in + if event.InvolvedObject.Name != podName { + continue + } + if strings.Contains(event.Message, "failed liveness probe") { + return true, errors.New("should not see liveness probe failures") + } + } + return false, nil + }, 1*time.Minute, framework.Poll).ShouldNot(gomega.BeTrue(), "should not see liveness probes") + }) }) // GetContainerStartedTime returns the time when the given container started and error if any