mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-02 16:29:21 +00:00
e2e_node: add a test to verify the kubelet starts
with systemd cgroup driver and cpumanager none policy. This was originally planned to be a correctness check for https://issues.k8s.io/125923, but it was difficult to reproduce the bug, so it's now a regression test against it. Signed-off-by: Francesco Romani <fromani@redhat.com> Signed-off-by: Peter Hunt <pehunt@redhat.com>
This commit is contained in:
parent
77d03e42cd
commit
cc87438f2f
@ -76,6 +76,82 @@ var _ = SIGDescribe("Node Container Manager", framework.WithSerial(), func() {
|
|||||||
framework.ExpectNoError(runTest(ctx, f))
|
framework.ExpectNoError(runTest(ctx, f))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
f.Describe("Validate CGroup management", func() {
|
||||||
|
// Regression test for https://issues.k8s.io/125923
|
||||||
|
// In this issue there's a race involved with systemd which seems to manifest most likely, or perhaps only
|
||||||
|
// (data gathered so far seems inconclusive) on the very first boot of the machine, so restarting the kubelet
|
||||||
|
// seems not sufficient. OTOH, the exact reproducer seems to require a dedicate lane with only this test, or
|
||||||
|
// to reboot the machine before to run this test. Both are practically unrealistic in CI.
|
||||||
|
// The closest approximation is this test in this current form, using a kubelet restart. This at least
|
||||||
|
// acts as non regression testing, so it still brings value.
|
||||||
|
ginkgo.It("should correctly start with cpumanager none policy in use with systemd", func(ctx context.Context) {
|
||||||
|
if !IsCgroup2UnifiedMode() {
|
||||||
|
ginkgo.Skip("this test requires cgroups v2")
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
var oldCfg *kubeletconfig.KubeletConfiguration
|
||||||
|
// Get current kubelet configuration
|
||||||
|
oldCfg, err = getCurrentKubeletConfig(ctx)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
ginkgo.DeferCleanup(func(ctx context.Context) {
|
||||||
|
if oldCfg != nil {
|
||||||
|
// Update the Kubelet configuration.
|
||||||
|
ginkgo.By("Stopping the kubelet")
|
||||||
|
startKubelet := stopKubelet()
|
||||||
|
|
||||||
|
// wait until the kubelet health check will fail
|
||||||
|
gomega.Eventually(ctx, func() bool {
|
||||||
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
|
||||||
|
ginkgo.By("Stopped the kubelet")
|
||||||
|
|
||||||
|
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg))
|
||||||
|
|
||||||
|
ginkgo.By("Starting the kubelet")
|
||||||
|
startKubelet()
|
||||||
|
|
||||||
|
// wait until the kubelet health check will succeed
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) bool {
|
||||||
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
|
||||||
|
ginkgo.By("Started the kubelet")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
newCfg := oldCfg.DeepCopy()
|
||||||
|
// Change existing kubelet configuration
|
||||||
|
newCfg.CPUManagerPolicy = "none"
|
||||||
|
newCfg.CgroupDriver = "systemd"
|
||||||
|
newCfg.FailCgroupV1 = true // extra safety. We want to avoid false negatives though, so we added the skip check earlier
|
||||||
|
|
||||||
|
// Update the Kubelet configuration.
|
||||||
|
ginkgo.By("Stopping the kubelet")
|
||||||
|
startKubelet := stopKubelet()
|
||||||
|
|
||||||
|
// wait until the kubelet health check will fail
|
||||||
|
gomega.Eventually(ctx, func() bool {
|
||||||
|
return kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}).WithTimeout(time.Minute).WithPolling(time.Second).Should(gomega.BeFalseBecause("expected kubelet health check to be failed"))
|
||||||
|
ginkgo.By("Stopped the kubelet")
|
||||||
|
|
||||||
|
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg))
|
||||||
|
|
||||||
|
ginkgo.By("Starting the kubelet")
|
||||||
|
startKubelet()
|
||||||
|
|
||||||
|
// wait until the kubelet health check will succeed
|
||||||
|
gomega.Eventually(ctx, func() bool {
|
||||||
|
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}).WithTimeout(2 * time.Minute).WithPolling(5 * time.Second).Should(gomega.BeTrueBecause("expected kubelet to be in healthy state"))
|
||||||
|
ginkgo.By("Started the kubelet")
|
||||||
|
|
||||||
|
gomega.Consistently(ctx, func(ctx context.Context) bool {
|
||||||
|
return getNodeReadyStatus(ctx, f) && kubeletHealthCheck(kubeletHealthCheckURL)
|
||||||
|
}).WithTimeout(2 * time.Minute).WithPolling(2 * time.Second).Should(gomega.BeTrueBecause("node keeps reporting ready status"))
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
func expectFileValToEqual(filePath string, expectedValue, delta int64) error {
|
func expectFileValToEqual(filePath string, expectedValue, delta int64) error {
|
||||||
|
Loading…
Reference in New Issue
Block a user