e2e_node: remove Dbus test case

The test case restarts dbus and systemd, which is considered dangerous
practice and caused slowdown of the test cases for CRI-O Serial jobs.
This commit is contained in:
Ed Bartosh 2024-03-20 17:34:47 +02:00
parent a309fadbac
commit 9ce994af9f
3 changed files with 0 additions and 104 deletions

View File

@ -24,7 +24,6 @@ import (
"fmt"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"time"
@ -652,44 +651,6 @@ func getNodeReadyStatus(ctx context.Context, f *framework.Framework) bool {
return isNodeReady(&nodeList.Items[0])
}
func systemctlDaemonReload() error {
cmd := "systemctl daemon-reload"
_, err := runCommand("sh", "-c", cmd)
return err
}
var (
dbusConfPath = "/etc/systemd/system/dbus.service.d/k8s-graceful-node-shutdown-e2e.conf"
dbusConf = `
[Unit]
RefuseManualStart=no
RefuseManualStop=no
[Service]
KillMode=control-group
ExecStop=
`
)
func overlayDbusConfig() error {
err := os.MkdirAll(filepath.Dir(dbusConfPath), 0755)
if err != nil {
return err
}
err = os.WriteFile(dbusConfPath, []byte(dbusConf), 0644)
if err != nil {
return err
}
return systemctlDaemonReload()
}
func restoreDbusConfig() error {
err := os.Remove(dbusConfPath)
if err != nil {
return err
}
return systemctlDaemonReload()
}
const (
// https://github.com/kubernetes/kubernetes/blob/1dd781ddcad454cc381806fbc6bd5eba8fa368d7/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go#L43-L44
podShutdownReason = "Terminated"

View File

@ -147,61 +147,6 @@ var _ = SIGDescribe("Restart", framework.WithSerial(), framework.WithSlow(), fra
})
})
})
ginkgo.Context("Dbus", func() {
ginkgo.It("should continue to run pods after a restart", func(ctx context.Context) {
// Allow dbus to be restarted on ubuntu
err := overlayDbusConfig()
framework.ExpectNoError(err)
defer func() {
err := restoreDbusConfig()
framework.ExpectNoError(err)
}()
preRestartPodCount := 2
ginkgo.By(fmt.Sprintf("creating %d RestartAlways pods on node", preRestartPodCount))
restartAlwaysPods := newTestPods(preRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test")
createBatchPodWithRateControl(ctx, f, restartAlwaysPods, podCreationInterval)
ginkgo.DeferCleanup(deletePodsSync, f, restartAlwaysPods)
allPods := waitForPodsCondition(ctx, f, preRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded)
if len(allPods) < preRestartPodCount {
framework.Failf("Failed to run sufficient restartAlways pods, got %d but expected %d", len(allPods), preRestartPodCount)
}
ginkgo.By("restarting dbus and systemd", func() {
stdout, err := exec.Command("sudo", "systemctl", "reset-failed", "dbus").CombinedOutput()
framework.ExpectNoError(err, "Failed to reset dbus start-limit with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "restart", "dbus").CombinedOutput()
framework.ExpectNoError(err, "Failed to restart dbus with systemctl: %v, %s", err, string(stdout))
stdout, err = exec.Command("sudo", "systemctl", "daemon-reexec").CombinedOutput()
framework.ExpectNoError(err, "Failed to restart systemd with systemctl: %v, %s", err, string(stdout))
})
ginkgo.By("verifying restartAlways pods stay running", func() {
for start := time.Now(); time.Since(start) < startTimeout && ctx.Err() == nil; time.Sleep(10 * time.Second) {
postRestartRunningPods := waitForPodsCondition(ctx, f, preRestartPodCount, recoverTimeout, testutils.PodRunningReadyOrSucceeded)
if len(postRestartRunningPods) < preRestartPodCount {
framework.Failf("fewer pods are running after systemd restart, got %d but expected %d", len(postRestartRunningPods), preRestartPodCount)
}
}
})
ginkgo.By("verifying new pods can be started after a dbus restart")
postRestartPodCount := 2
postRestartPods := newTestPods(postRestartPodCount, false, imageutils.GetPauseImageName(), "restart-dbus-test")
createBatchPodWithRateControl(ctx, f, postRestartPods, podCreationInterval)
ginkgo.DeferCleanup(deletePodsSync, f, postRestartPods)
allPods = waitForPodsCondition(ctx, f, preRestartPodCount+postRestartPodCount, startTimeout, testutils.PodRunningReadyOrSucceeded)
if len(allPods) < preRestartPodCount+postRestartPodCount {
framework.Failf("Failed to run pods after restarting dbus, got %d but expected %d", len(allPods), preRestartPodCount+postRestartPodCount)
}
})
})
ginkgo.Context("Kubelet", func() {
ginkgo.It("should correctly account for terminated pods after restart", func(ctx context.Context) {
node := getLocalNode(ctx, f)

View File

@ -316,16 +316,6 @@ func logKubeletLatencyMetrics(ctx context.Context, metricNames ...string) {
}
}
// runCommand runs the cmd and returns the combined stdout and stderr, or an
// error if the command failed.
func runCommand(cmd ...string) (string, error) {
output, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to run %q: %s (%s)", strings.Join(cmd, " "), err, output)
}
return string(output), nil
}
// getCRIClient connects CRI and returns CRI runtime service clients and image service client.
func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService, error) {
// connection timeout for CRI service connection