From 9ce994af9fcb94a378d3b2b08545794ef2adb3dc Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Wed, 20 Mar 2024 17:34:47 +0200 Subject: [PATCH] e2e_node: remove `Dbus` test case The test case restarts dbus and systemd, which is considered dangerous practice and caused slowdown of the test cases for CRI-O Serial jobs. --- test/e2e_node/node_shutdown_linux_test.go | 39 ---------------- test/e2e_node/restart_test.go | 55 ----------------------- test/e2e_node/util.go | 10 ----- 3 files changed, 104 deletions(-) diff --git a/test/e2e_node/node_shutdown_linux_test.go b/test/e2e_node/node_shutdown_linux_test.go index 812eea262bb..3af1b2a9a7a 100644 --- a/test/e2e_node/node_shutdown_linux_test.go +++ b/test/e2e_node/node_shutdown_linux_test.go @@ -24,7 +24,6 @@ import ( "fmt" "os" "os/exec" - "path/filepath" "regexp" "strconv" "time" @@ -652,44 +651,6 @@ func getNodeReadyStatus(ctx context.Context, f *framework.Framework) bool { return isNodeReady(&nodeList.Items[0]) } -func systemctlDaemonReload() error { - cmd := "systemctl daemon-reload" - _, err := runCommand("sh", "-c", cmd) - return err -} - -var ( - dbusConfPath = "/etc/systemd/system/dbus.service.d/k8s-graceful-node-shutdown-e2e.conf" - dbusConf = ` -[Unit] -RefuseManualStart=no -RefuseManualStop=no -[Service] -KillMode=control-group -ExecStop= -` -) - -func overlayDbusConfig() error { - err := os.MkdirAll(filepath.Dir(dbusConfPath), 0755) - if err != nil { - return err - } - err = os.WriteFile(dbusConfPath, []byte(dbusConf), 0644) - if err != nil { - return err - } - return systemctlDaemonReload() -} - -func restoreDbusConfig() error { - err := os.Remove(dbusConfPath) - if err != nil { - return err - } - return systemctlDaemonReload() -} - const ( // https://github.com/kubernetes/kubernetes/blob/1dd781ddcad454cc381806fbc6bd5eba8fa368d7/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go#L43-L44 podShutdownReason = "Terminated" diff --git a/test/e2e_node/restart_test.go b/test/e2e_node/restart_test.go index aa8194f44e9..1e7ac1e44dd 100644 --- a/test/e2e_node/restart_test.go +++ b/test/e2e_node/restart_test.go @@ -147,61 +147,6 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra }) }) }) - - ginkgo.Context("Dbus", func() { - ginkgo.It("should continue to run pods after a restart", func(ctx context.Context) { - // Allow dbus to be restarted on ubuntu - err := overlayDbusConfig() - framework.ExpectNoError(err) - defer func() { - err := restoreDbusConfig() - framework.ExpectNoError(err) - }() - - preRestartPodCount := 2 - ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", preRestartPodCount)) - restartAlwaysPods := newTestPods(preRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test") - createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval) - ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods) - - allPods := waitForPodsCondition(ctx, f, preRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded) - if len(allPods) < preRestartPodCount { - framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), preRestartPodCount) - } - - ginkgo.By("restarting dbus and systemd", func() { - stdout, err := exec.Command("sudo", "systemctl", "reset-failed", "dbus").CombinedOutput() - framework.ExpectNoError(err, "Failed to reset dbus start-limit with systemctl: %v, %s", err, string(stdout)) - - stdout, err = exec.Command("sudo", "systemctl", "restart", "dbus").CombinedOutput() - framework.ExpectNoError(err, "Failed to restart dbus with systemctl: %v, %s", err, string(stdout)) - - stdout, err = exec.Command("sudo", "systemctl", "daemon-reexec").CombinedOutput() - framework.ExpectNoError(err, "Failed to restart systemd with systemctl: %v, %s", err, string(stdout)) - }) - - ginkgo.By("verifying restartAlways pods stay running", func() { - for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) { - postRestartRunningPods := waitForPodsCondition(ctx, f, preRestartPodCount, recoverTimeout, testutils.PodRunningReadyOrSucceeded) - if len(postRestartRunningPods) < preRestartPodCount { - framework.Failf("fewer pods are running after systemd restart, got %d but expected %d", len(postRestartRunningPods), preRestartPodCount) - } - } - }) - - ginkgo.By("verifying new pods can be started after a dbus restart") - postRestartPodCount := 2 - postRestartPods := newTestPods(postRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test") - createBatchPodWithRateControl(ctx, f, postRestartPods, podCreationInterval) - ginkgo.DeferCleanup(deletePodsSync, f, postRestartPods) - - allPods = waitForPodsCondition(ctx, f, preRestartPodCount+postRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded) - if len(allPods) < preRestartPodCount+postRestartPodCount { - framework.Failf("Failed to run pods after restarting dbus, got %d but expected %d", len(allPods), preRestartPodCount+postRestartPodCount) - } - }) - }) - ginkgo.Context("Kubelet", func() { ginkgo.It("should correctly account for terminated pods after restart", func(ctx context.Context) { node := getLocalNode(ctx, f) diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 27fdfba32f0..b54e2b364aa 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -316,16 +316,6 @@ func logKubeletLatencyMetrics(ctx context.Context, metricNames ...string) { } } -// runCommand runs the cmd and returns the combined stdout and stderr, or an -// error if the command failed. -func runCommand(cmd ...string) (string, error) { - output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput() - if err != nil { - return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output) - } - return string(output), nil -} - // getCRIClient connects CRI and returns CRI runtime service clients and image service client. func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService, error) { // connection timeout for CRI service connection