mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 14:37:00 +00:00
e2e_node: add a test to verify the kubelet starts
with systemd cgroup driver and cpumanager none policy. This was originally planned to be a correctness check for https://issues.k8s.io/125923, but it was difficult to reproduce the bug, so it's now a regression test against it. Signed-off-by: Francesco Romani <fromani@redhat.com> Signed-off-by: Peter Hunt <pehunt@redhat.com>
This commit is contained in:
parent
77d03e42cd
commit
cc87438f2f
@ -76,6 +76,82 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
|
||||
framework.ExpectNoError(runTest(ctx, f))
|
||||
})
|
||||
})
|
||||
f.Describe("Validate CGroup management", func() {
|
||||
// Regression test for https://issues.k8s.io/125923
|
||||
// In this issue there's a race involved with systemd which seems to manifest most likely, or perhaps only
|
||||
// (data gathered so far seems inconclusive) on the very first boot of the machine, so restarting the kubelet
|
||||
// seems not sufficient. OTOH, the exact reproducer seems to require a dedicate lane with only this test, or
|
||||
// to reboot the machine before to run this test. Both are practically unrealistic in CI.
|
||||
// The closest approximation is this test in this current form, using a kubelet restart. This at least
|
||||
// acts as non regression testing, so it still brings value.
|
||||
ginkgo.It("should correctly start with cpumanager none policy in use with systemd", func(ctx context.Context) {
|
||||
if !IsCgroup2UnifiedMode() {
|
||||
ginkgo.Skip("this test requires cgroups v2")
|
||||
}
|
||||
|
||||
var err error
|
||||
var oldCfg *kubeletconfig.KubeletConfiguration
|
||||
// Get current kubelet configuration
|
||||
oldCfg, err = getCurrentKubeletConfig(ctx)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.DeferCleanup(func(ctx context.Context) {
|
||||
if oldCfg != nil {
|
||||
// Update the Kubelet configuration.
|
||||
ginkgo.By("Stopping the kubelet")
|
||||
startKubelet := stopKubelet()
|
||||
|
||||
// wait until the kubelet health check will fail
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
|
||||
ginkgo.By("Stopped the kubelet")
|
||||
|
||||
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
|
||||
|
||||
ginkgo.By("Starting the kubelet")
|
||||
startKubelet()
|
||||
|
||||
// wait until the kubelet health check will succeed
|
||||
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
|
||||
ginkgo.By("Started the kubelet")
|
||||
}
|
||||
})
|
||||
|
||||
newCfg := oldCfg.DeepCopy()
|
||||
// Change existing kubelet configuration
|
||||
newCfg.CPUManagerPolicy = "none"
|
||||
newCfg.CgroupDriver = "systemd"
|
||||
newCfg.FailCgroupV1 = true // extra safety. We want to avoid false negatives though, so we added the skip check earlier
|
||||
|
||||
// Update the Kubelet configuration.
|
||||
ginkgo.By("Stopping the kubelet")
|
||||
startKubelet := stopKubelet()
|
||||
|
||||
// wait until the kubelet health check will fail
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
|
||||
ginkgo.By("Stopped the kubelet")
|
||||
|
||||
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
|
||||
|
||||
ginkgo.By("Starting the kubelet")
|
||||
startKubelet()
|
||||
|
||||
// wait until the kubelet health check will succeed
|
||||
gomega.Eventually(ctx, func() bool {
|
||||
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
|
||||
ginkgo.By("Started the kubelet")
|
||||
|
||||
gomega.Consistently(ctx, func(ctx context.Context) bool {
|
||||
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
|
||||
}).WithTimeout(2 * time.Minute).WithPolling(2 * time.Second).Should(gomega.BeTrueBecause("node keeps reporting ready status"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
func expectFileValToEqual(filePath string, expectedValue, delta int64) error {
|
||||
|
Loading…
Reference in New Issue
Block a user