From ddd0d8a3dafcced1ccbf13a4080c5236cf320798 Mon Sep 17 00:00:00 2001 From: David Porter Date: Tue, 2 Nov 2021 21:03:19 -0700 Subject: [PATCH] test: fixes for graceful node shutdown test * Bump the pod status and node status update timeouts to avoid flakes * Add a small delay after dbus restart to ensure dbus has enough time to restart to startup prior to sending shutdown signal * Change check of pod being terminated by graceful shutdown. Previously, the pod phase was checked to see if it was `Failed` and the pod reason string matched. This logic needs to change after 1.22 graceful node shutdown change introduced in PR #102344 which changed behavior to no longer put the pods into a failed phase. Instead, the test now checks that containers are not ready, and the pod status message and reason are set appropriately. Signed-off-by: David Porter --- test/e2e_node/node_shutdown_linux_test.go | 41 ++++++++++++++++++----- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/test/e2e_node/node_shutdown_linux_test.go b/test/e2e_node/node_shutdown_linux_test.go index 3a6a70c4283..558b0cb782a 100644 --- a/test/e2e_node/node_shutdown_linux_test.go +++ b/test/e2e_node/node_shutdown_linux_test.go @@ -46,10 +46,9 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut ginkgo.Context("when gracefully shutting down", func() { const ( - nodeShutdownReason = "Terminated" pollInterval = 1 * time.Second - podStatusUpdateTimeout = 5 * time.Second - nodeStatusUpdateTimeout = 10 * time.Second + podStatusUpdateTimeout = 30 * time.Second + nodeStatusUpdateTimeout = 30 * time.Second nodeShutdownGracePeriod = 20 * time.Second nodeShutdownGracePeriodCriticalPods = 10 * time.Second ) @@ -117,13 +116,13 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut for _, pod := range list.Items { if kubelettypes.IsCriticalPod(&pod) { - if pod.Status.Phase != v1.PodRunning { - framework.Logf("Expecting critcal pod to be running, but it's not currently. Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q", pod.Name, pod.Status.Phase, pod.Status.Reason) + if isPodShutdown(&pod) { + framework.Logf("Expecting critcal pod to be running, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status) return fmt.Errorf("critical pod should not be shutdown, phase: %s", pod.Status.Phase) } } else { - if pod.Status.Phase != v1.PodFailed || pod.Status.Reason != nodeShutdownReason { - framework.Logf("Expecting non-critcal pod to be shutdown, but it's not currently. Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q", pod.Name, pod.Status.Phase, pod.Status.Reason) + if !isPodShutdown(&pod) { + framework.Logf("Expecting non-critcal pod to be shutdown, but it's not currently. Pod: %q, Pod Status %+v", pod.Name, pod.Status) return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase) } } @@ -143,8 +142,8 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut framework.ExpectEqual(len(list.Items), len(pods), "the number of pods is not as expected") for _, pod := range list.Items { - if pod.Status.Phase != v1.PodFailed || pod.Status.Reason != nodeShutdownReason { - framework.Logf("Expecting pod to be shutdown, but it's not currently: Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q", pod.Name, pod.Status.Phase, pod.Status.Reason) + if !isPodShutdown(&pod) { + framework.Logf("Expecting pod to be shutdown, but it's not currently: Pod: %q, Pod Status %+v", pod.Name, pod.Status) return fmt.Errorf("pod should be shutdown, phase: %s", pod.Status.Phase) } } @@ -192,6 +191,9 @@ var _ = SIGDescribe("GracefulNodeShutdown [Serial] [NodeFeature:GracefulNodeShut err = restartDbus() framework.ExpectNoError(err) + // Wait a few seconds to ensure dbus is restarted... + time.Sleep(5 * time.Second) + ginkgo.By("Emitting Shutdown signal") err = emitSignalPrepareForShutdown(true) framework.ExpectNoError(err) @@ -307,3 +309,24 @@ func restoreDbusConfig() error { } return systemctlDaemonReload() } + +const ( + // https://github.com/kubernetes/kubernetes/blob/1dd781ddcad454cc381806fbc6bd5eba8fa368d7/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go#L43-L44 + podShutdownReason = "Terminated" + podShutdownMessage = "Pod was terminated in response to imminent node shutdown." +) + +func isPodShutdown(pod *v1.Pod) bool { + if pod == nil { + return false + } + + hasContainersNotReadyCondition := false + for _, cond := range pod.Status.Conditions { + if cond.Type == v1.ContainersReady && cond.Status == v1.ConditionFalse { + hasContainersNotReadyCondition = true + } + } + + return pod.Status.Message == podShutdownMessage && pod.Status.Reason == podShutdownReason && hasContainersNotReadyCondition +}