diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index a23edd8631d..b8128659f43 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -34,13 +34,14 @@ import ( cpumanagerstate "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" "k8s.io/kubernetes/pkg/kubelet/types" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" - - "github.com/onsi/ginkgo" - "github.com/onsi/gomega" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" ) // Helper for makeCPUManagerPod(). @@ -60,12 +61,12 @@ func makeCPUManagerPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { Image: busyboxImage, Resources: v1.ResourceRequirements{ Requests: v1.ResourceList{ - v1.ResourceName(v1.ResourceCPU): resource.MustParse(ctnAttr.cpuRequest), - v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"), + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), + v1.ResourceMemory: resource.MustParse("100Mi"), }, Limits: v1.ResourceList{ - v1.ResourceName(v1.ResourceCPU): resource.MustParse(ctnAttr.cpuLimit), - v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"), + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), + v1.ResourceMemory: resource.MustParse("100Mi"), }, }, Command: []string{"sh", "-c", cpusetCmd}, @@ -109,7 +110,7 @@ func getLocalNodeCPUDetails(f *framework.Framework) (cpuCapVal int64, cpuAllocVa // RoundUp reserved CPUs to get only integer cores. cpuRes.RoundUp(0) - return cpuCap.Value(), (cpuCap.Value() - cpuRes.Value()), cpuRes.Value() + return cpuCap.Value(), cpuCap.Value() - cpuRes.Value(), cpuRes.Value() } func waitForContainerRemoval(containerName, podName, podNS string) { @@ -190,71 +191,75 @@ func deleteStateFile() { framework.ExpectNoError(err, "error deleting state file") } -func setOldKubeletConfig(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration) { +func setOldKubeletConfig(oldCfg *kubeletconfig.KubeletConfiguration) { + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + // Delete the CPU Manager state file so that the old Kubelet configuration - // can take effect.i + // can take effect deleteStateFile() if oldCfg != nil { - framework.ExpectNoError(setKubeletConfiguration(f, oldCfg)) + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg)) } + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) +} + +type cpuManagerKubeletArguments struct { + policyName string + cleanStateFile bool + enableCPUManager bool + enableCPUManagerOptions bool + reservedSystemCPUs cpuset.CPUSet + options map[string]string } func disableCPUManagerInKubelet(f *framework.Framework) (oldCfg *kubeletconfig.KubeletConfiguration) { // Disable CPU Manager in Kubelet. - oldCfg, err := getCurrentKubeletConfig() - framework.ExpectNoError(err) - newCfg := oldCfg.DeepCopy() - if newCfg.FeatureGates == nil { - newCfg.FeatureGates = make(map[string]bool) - } - newCfg.FeatureGates["CPUManager"] = false - - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) - - // Wait for the Kubelet to be ready. - gomega.Eventually(func() bool { - nodes, err := e2enode.TotalReady(f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, time.Second).Should(gomega.BeTrue()) + oldCfg = configureCPUManagerInKubelet(f, &cpuManagerKubeletArguments{ + enableCPUManager: false, + }) return oldCfg } -func configureCPUManagerInKubelet(f *framework.Framework, policyName string, cleanStateFile bool, reservedSystemCPUs cpuset.CPUSet, enableOptions bool, options map[string]string) (oldCfg *kubeletconfig.KubeletConfiguration) { +func configureCPUManagerInKubelet(f *framework.Framework, kubeletArguments *cpuManagerKubeletArguments) (oldCfg *kubeletconfig.KubeletConfiguration) { // Enable CPU Manager in Kubelet with static policy. oldCfg, err := getCurrentKubeletConfig() framework.ExpectNoError(err) + newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { newCfg.FeatureGates = make(map[string]bool) } - newCfg.FeatureGates["CPUManager"] = true - newCfg.FeatureGates["CPUManagerPolicyOptions"] = enableOptions - newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = enableOptions - newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = enableOptions - // After graduation of the CPU Manager feature to Beta, the CPU Manager - // "none" policy is ON by default. But when we set the CPU Manager policy to - // "static" in this test and the Kubelet is restarted so that "static" - // policy can take effect, there will always be a conflict with the state - // checkpointed in the disk (i.e., the policy checkpointed in the disk will - // be "none" whereas we are trying to restart Kubelet with "static" - // policy). Therefore, we delete the state file so that we can proceed - // with the tests. - // Only delete the state file at the begin of the tests. - if cleanStateFile { - deleteStateFile() + newCfg.FeatureGates["CPUManager"] = kubeletArguments.enableCPUManager + + newCfg.FeatureGates["CPUManagerPolicyOptions"] = kubeletArguments.enableCPUManagerOptions + newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = kubeletArguments.enableCPUManagerOptions + newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions + + newCfg.CPUManagerPolicy = kubeletArguments.policyName + newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} + + if kubeletArguments.options != nil { + newCfg.CPUManagerPolicyOptions = kubeletArguments.options } - newCfg.CPUManagerPolicy = policyName - newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} - newCfg.CPUManagerPolicyOptions = options - - if reservedSystemCPUs.Size() > 0 { - cpus := reservedSystemCPUs.String() + if kubeletArguments.reservedSystemCPUs.Size() > 0 { + cpus := kubeletArguments.reservedSystemCPUs.String() framework.Logf("configureCPUManagerInKubelet: using reservedSystemCPUs=%q", cpus) newCfg.ReservedSystemCPUs = cpus } else { @@ -269,8 +274,37 @@ func configureCPUManagerInKubelet(f *framework.Framework, policyName string, cle newCfg.KubeReserved["cpu"] = "200m" } } - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + // After graduation of the CPU Manager feature to Beta, the CPU Manager + // "none" policy is ON by default. But when we set the CPU Manager policy to + // "static" in this test and the Kubelet is restarted so that "static" + // policy can take effect, there will always be a conflict with the state + // checkpointed in the disk (i.e., the policy checkpointed in the disk will + // be "none" whereas we are trying to restart Kubelet with "static" + // policy). Therefore, we delete the state file so that we can proceed + // with the tests. + // Only delete the state file at the begin of the tests. + if kubeletArguments.cleanStateFile { + deleteStateFile() + } + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) // Wait for the Kubelet to be ready. gomega.Eventually(func() bool { @@ -590,7 +624,12 @@ func runCPUManagerTests(f *framework.Framework) { } // Enable CPU Manager in the kubelet. - oldCfg = configureCPUManagerInKubelet(f, string(cpumanager.PolicyStatic), true, cpuset.CPUSet{}, false, nil) + oldCfg = configureCPUManagerInKubelet(f, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + enableCPUManager: true, + reservedSystemCPUs: cpuset.CPUSet{}, + cleanStateFile: true, + }) ginkgo.By("running a non-Gu pod") runNonGuPodTest(f, cpuCap) @@ -657,7 +696,11 @@ func runCPUManagerTests(f *framework.Framework) { waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) ginkgo.By("enable cpu manager in kubelet without delete state file") - configureCPUManagerInKubelet(f, string(cpumanager.PolicyStatic), false, cpuset.CPUSet{}, false, nil) + configureCPUManagerInKubelet(f, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + enableCPUManager: true, + reservedSystemCPUs: cpuset.CPUSet{}, + }) ginkgo.By("wait for the deleted pod to be cleaned up from the state file") waitForStateFileCleanedUp() @@ -681,13 +724,20 @@ func runCPUManagerTests(f *framework.Framework) { framework.Logf("SMT level %d", smtLevel) - cleanStateFile := true // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably // check what we do have in the node. cpuPolicyOptions := map[string]string{ cpumanager.FullPCPUsOnlyOption: "true", } - oldCfg = configureCPUManagerInKubelet(f, string(cpumanager.PolicyStatic), cleanStateFile, cpuset.NewCPUSet(0), true, cpuPolicyOptions) + oldCfg = configureCPUManagerInKubelet(f, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + enableCPUManager: true, + cleanStateFile: true, + reservedSystemCPUs: cpuset.NewCPUSet(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }) // the order between negative and positive doesn't really matter runSMTAlignmentNegativeTests(f) @@ -695,7 +745,7 @@ func runCPUManagerTests(f *framework.Framework) { }) ginkgo.AfterEach(func() { - setOldKubeletConfig(f, oldCfg) + setOldKubeletConfig(oldCfg) }) } diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go index a62359252fe..98431d3f8e6 100644 --- a/test/e2e_node/memory_manager_test.go +++ b/test/e2e_node/memory_manager_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + /* Copyright 2017 The Kubernetes Authors. @@ -40,6 +43,7 @@ import ( "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" "k8s.io/utils/pointer" "github.com/onsi/ginkgo" @@ -228,11 +232,24 @@ func getUpdatedKubeletConfig(oldCfg *kubeletconfig.KubeletConfiguration, params } func updateKubeletConfig(f *framework.Framework, cfg *kubeletconfig.KubeletConfiguration) { - // remove the state file + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg)) deleteMemoryManagerStateFile() - // Update the Kubelet configuration - framework.ExpectNoError(setKubeletConfiguration(f, cfg)) + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) // Wait for the Kubelet to be ready. gomega.Eventually(func() bool { @@ -264,7 +281,7 @@ func getAllNUMANodes() []int { } // Serial because the test updates kubelet configuration. -var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { +var _ = SIGDescribe("Memory Manager [Disruptive] [Serial] [Feature:MemoryManager]", func() { // TODO: add more complex tests that will include interaction between CPUManager, MemoryManager and TopologyManager var ( allNUMANodes []int @@ -305,6 +322,32 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { framework.ExpectEqual(numaNodeIDs, currentNUMANodeIDs.ToSlice()) } + waitingForHugepages := func(hugepagesCount int) { + gomega.Eventually(func() error { + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{}) + if err != nil { + return err + } + + capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResourceName2Mi)] + if !ok { + return fmt.Errorf("the node does not have the resource %s", hugepagesResourceName2Mi) + } + + size, succeed := capacity.AsInt64() + if !succeed { + return fmt.Errorf("failed to convert quantity to int64") + } + + // 512 Mb, the expected size in bytes + expectedSize := int64(hugepagesCount * hugepagesSize2M * 1024) + if size != expectedSize { + return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize) + } + return nil + }, time.Minute, framework.Poll).Should(gomega.BeNil()) + } + ginkgo.BeforeEach(func() { if isMultiNUMASupported == nil { isMultiNUMASupported = pointer.BoolPtr(isMultiNUMA()) @@ -322,64 +365,23 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { // dynamically update the kubelet configuration ginkgo.JustBeforeEach(func() { var err error + hugepagesCount := 8 // allocate hugepages if *is2MiHugepagesSupported { - hugepagesCount := 256 ginkgo.By("Configuring hugepages") gomega.Eventually(func() error { - if err := configureHugePages(hugepagesSize2M, hugepagesCount); err != nil { - return err - } - return nil + return configureHugePages(hugepagesSize2M, hugepagesCount) }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) - - ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") - - // stop the kubelet and wait until the server will restart it automatically - stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, time.Second).Should(gomega.BeFalse()) - - restartKubelet(false) - - // wait until the kubelet health check will pass - gomega.Eventually(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, 2*time.Minute, 10*time.Second).Should(gomega.BeTrue()) - - ginkgo.By("Waiting for hugepages resource to become available on the local node") - gomega.Eventually(func() error { - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{}) - if err != nil { - return err - } - - capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResourceName2Mi)] - if !ok { - return fmt.Errorf("the node does not have the resource %s", hugepagesResourceName2Mi) - } - - size, succeed := capacity.AsInt64() - if !succeed { - return fmt.Errorf("failed to convert quantity to int64") - } - - // 512 Mb, the expected size in bytes - expectedSize := int64(hugepagesCount * hugepagesSize2M * 1024) - if size != expectedSize { - return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize) - } - return nil - }, time.Minute, framework.Poll).Should(gomega.BeNil()) } // get the old kubelet config - oldCfg, err = getCurrentKubeletConfig() - framework.ExpectNoError(err) + if oldCfg == nil { + gomega.Eventually(func() error { + oldCfg, err = e2enodekubelet.GetCurrentKubeletConfigFromFile() + return err + }, 5*time.Minute, 15*time.Second).Should(gomega.BeNil()) + } // update the kubelet config with new parameters newCfg := getUpdatedKubeletConfig(oldCfg, kubeParams) @@ -387,8 +389,11 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { // request hugepages resources under the container if *is2MiHugepagesSupported { + ginkgo.By("Waiting for hugepages resource to become available on the local node") + waitingForHugepages(hugepagesCount) + for i := 0; i < len(ctnParams); i++ { - ctnParams[i].hugepages2Mi = "128Mi" + ctnParams[i].hugepages2Mi = "8Mi" } } @@ -404,20 +409,21 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { } // release hugepages - gomega.Eventually(func() error { - return configureHugePages(hugepagesSize2M, 0) - }, 90*time.Second, 15*time.Second).ShouldNot(gomega.HaveOccurred(), "failed to release hugepages") + if *is2MiHugepagesSupported { + ginkgo.By("Releasing allocated hugepages") + gomega.Eventually(func() error { + return configureHugePages(hugepagesSize2M, 0) + }, 90*time.Second, 15*time.Second).ShouldNot(gomega.HaveOccurred(), "failed to release hugepages") + } - // update the kubelet config with old values - updateKubeletConfig(f, oldCfg) + // update the kubelet config with old parameters + if oldCfg != nil { + updateKubeletConfig(f, oldCfg) + } - // wait until the kubelet health check will pass and will continue to pass for specified period of time - gomega.Eventually(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, 10*time.Second).Should(gomega.BeTrue()) - gomega.Consistently(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, 10*time.Second).Should(gomega.BeTrue()) + if *is2MiHugepagesSupported { + waitingForHugepages(0) + } }) ginkgo.Context("with static policy", func() { diff --git a/test/e2e_node/node_container_manager_test.go b/test/e2e_node/node_container_manager_test.go index b2b47a32ba3..a44e0ac8492 100644 --- a/test/e2e_node/node_container_manager_test.go +++ b/test/e2e_node/node_container_manager_test.go @@ -34,8 +34,10 @@ import ( kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/stats/pidlimit" + "k8s.io/kubernetes/test/e2e/framework" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -183,7 +185,24 @@ func runTest(f *framework.Framework) error { defer destroyTemporaryCgroupsForReservation(cgroupManager) defer func() { if oldCfg != nil { - framework.ExpectNoError(setKubeletConfiguration(f, oldCfg)) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) } }() if err := createTemporaryCgroupsForReservation(cgroupManager); err != nil { @@ -193,7 +212,25 @@ func runTest(f *framework.Framework) error { // Change existing kubelet configuration setDesiredConfiguration(newCfg) // Set the new kubelet configuration. - err = setKubeletConfiguration(f, newCfg) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) + if err != nil { return err } diff --git a/test/e2e_node/node_perf_test.go b/test/e2e_node/node_perf_test.go index 50be44d111c..e5c064da122 100644 --- a/test/e2e_node/node_perf_test.go +++ b/test/e2e_node/node_perf_test.go @@ -24,10 +24,12 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" "k8s.io/kubernetes/test/e2e_node/perf/workloads" "github.com/onsi/ginkgo" @@ -46,7 +48,24 @@ func makeNodePerfPod(w workloads.NodePerfWorkload) *v1.Pod { func setKubeletConfig(f *framework.Framework, cfg *kubeletconfig.KubeletConfiguration) { if cfg != nil { - framework.ExpectNoError(setKubeletConfiguration(f, cfg)) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) } // Wait for the Kubelet to be ready. diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index 1b27dda6fab..21bce165d55 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -43,6 +43,7 @@ import ( e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -494,7 +495,7 @@ func podresourcesListTests(f *framework.Framework, cli kubeletpodresourcesv1.Pod } -func podresourcesGetAllocatableResourcesTests(f *framework.Framework, cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData, onlineCPUs, reservedSystemCPUs cpuset.CPUSet) { +func podresourcesGetAllocatableResourcesTests(cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData, onlineCPUs, reservedSystemCPUs cpuset.CPUSet) { ginkgo.By("checking the devices known to the kubelet") resp, err := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) framework.ExpectNoErrorWithOffset(1, err) @@ -553,7 +554,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P oldCfg := configurePodResourcesInKubelet(f, true, reservedSystemCPUs) defer func() { // restore kubelet config - setOldKubeletConfig(f, oldCfg) + setOldKubeletConfig(oldCfg) // Delete state file to allow repeated runs deleteStateFile() @@ -577,7 +578,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P ginkgo.By("checking List()") podresourcesListTests(f, cli, sd) ginkgo.By("checking GetAllocatableResources()") - podresourcesGetAllocatableResourcesTests(f, cli, sd, onlineCPUs, reservedSystemCPUs) + podresourcesGetAllocatableResourcesTests(cli, sd, onlineCPUs, reservedSystemCPUs) }) @@ -589,7 +590,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P oldCfg := enablePodResourcesFeatureGateInKubelet(f) defer func() { // restore kubelet config - setOldKubeletConfig(f, oldCfg) + setOldKubeletConfig(oldCfg) // Delete state file to allow repeated runs deleteStateFile() @@ -612,7 +613,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P // intentionally passing empty cpuset instead of onlineCPUs because with none policy // we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static - podresourcesGetAllocatableResourcesTests(f, cli, sd, cpuset.CPUSet{}, cpuset.CPUSet{}) + podresourcesGetAllocatableResourcesTests(cli, sd, cpuset.CPUSet{}, cpuset.CPUSet{}) }) }) @@ -635,7 +636,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P oldCfg := configurePodResourcesInKubelet(f, true, reservedSystemCPUs) defer func() { // restore kubelet config - setOldKubeletConfig(f, oldCfg) + setOldKubeletConfig(oldCfg) // Delete state file to allow repeated runs deleteStateFile() @@ -649,7 +650,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P defer conn.Close() podresourcesListTests(f, cli, nil) - podresourcesGetAllocatableResourcesTests(f, cli, nil, onlineCPUs, reservedSystemCPUs) + podresourcesGetAllocatableResourcesTests(cli, nil, onlineCPUs, reservedSystemCPUs) }) ginkgo.It("should return the expected responses with cpumanager none policy", func() { @@ -660,7 +661,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P oldCfg := enablePodResourcesFeatureGateInKubelet(f) defer func() { // restore kubelet config - setOldKubeletConfig(f, oldCfg) + setOldKubeletConfig(oldCfg) // Delete state file to allow repeated runs deleteStateFile() @@ -675,7 +676,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P // intentionally passing empty cpuset instead of onlineCPUs because with none policy // we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static - podresourcesGetAllocatableResourcesTests(f, cli, nil, cpuset.CPUSet{}, cpuset.CPUSet{}) + podresourcesGetAllocatableResourcesTests(cli, nil, cpuset.CPUSet{}, cpuset.CPUSet{}) }) ginkgo.It("should return the expected error with the feature gate disabled", func() { @@ -717,7 +718,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P oldCfg := configurePodResourcesInKubelet(f, true, reservedSystemCPUs) defer func() { // restore kubelet config - setOldKubeletConfig(f, oldCfg) + setOldKubeletConfig(oldCfg) // Delete state file to allow repeated runs deleteStateFile() @@ -829,7 +830,23 @@ func configurePodResourcesInKubelet(f *framework.Framework, cleanStateFile bool, } } // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) // Wait for the Kubelet to be ready. gomega.Eventually(func() bool { @@ -849,8 +866,23 @@ func enablePodResourcesFeatureGateInKubelet(f *framework.Framework) (oldCfg *kub newCfg.FeatureGates = make(map[string]bool) } - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) // Wait for the Kubelet to be ready. gomega.Eventually(func() bool { diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index ab207325e3d..44cb28fc707 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -27,23 +27,23 @@ import ( "sync" "time" - testutils "k8s.io/kubernetes/test/utils" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/types" + "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" + testutils "k8s.io/kubernetes/test/utils" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -237,8 +237,18 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco // Dump the config -- debug framework.Logf("New kubelet config is %s", *newCfg) - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() // Wait for the Kubelet to be ready. gomega.Eventually(func() bool { @@ -947,7 +957,7 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.AfterEach(func() { // restore kubelet config - setOldKubeletConfig(f, oldCfg) + setOldKubeletConfig(oldCfg) }) } diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index a25fc772a9a..dfb3b967804 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -50,10 +50,12 @@ import ( kubeletconfigcodec "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/codec" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/util" + "k8s.io/kubernetes/test/e2e/framework" e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" imageutils "k8s.io/kubernetes/test/utils/image" "github.com/onsi/ginkgo" @@ -157,12 +159,7 @@ func getCurrentKubeletConfig() (*kubeletconfig.KubeletConfiguration, error) { func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(initialConfig *kubeletconfig.KubeletConfiguration)) { var oldCfg *kubeletconfig.KubeletConfiguration ginkgo.BeforeEach(func() { - configEnabled, err := isKubeletConfigEnabled(f) - framework.ExpectNoError(err) - framework.ExpectEqual(configEnabled, true, "The Dynamic Kubelet Configuration feature is not enabled.\n"+ - "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n"+ - "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.") - oldCfg, err = getCurrentKubeletConfig() + oldCfg, err := getCurrentKubeletConfig() framework.ExpectNoError(err) newCfg := oldCfg.DeepCopy() updateFunction(newCfg) @@ -170,12 +167,45 @@ func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(ini return } - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) }) ginkgo.AfterEach(func() { if oldCfg != nil { - err := setKubeletConfiguration(f, oldCfg) - framework.ExpectNoError(err) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) } }) } @@ -193,64 +223,6 @@ func isKubeletConfigEnabled(f *framework.Framework) (bool, error) { return v, nil } -// Creates or updates the configmap for KubeletConfiguration, waits for the Kubelet to restart -// with the new configuration. Returns an error if the configuration after waiting for restartGap -// doesn't match what you attempted to set, or if the dynamic configuration feature is disabled. -// You should only call this from serial tests. -func setKubeletConfiguration(f *framework.Framework, kubeCfg *kubeletconfig.KubeletConfiguration) error { - const ( - restartGap = 40 * time.Second - pollInterval = 5 * time.Second - ) - - // make sure Dynamic Kubelet Configuration feature is enabled on the Kubelet we are about to reconfigure - if configEnabled, err := isKubeletConfigEnabled(f); err != nil { - return err - } else if !configEnabled { - return fmt.Errorf("The Dynamic Kubelet Configuration feature is not enabled.\n" + - "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n" + - "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.") - } - - // create the ConfigMap with the new configuration - cm, err := createConfigMap(f, kubeCfg) - if err != nil { - return err - } - - // create the reference and set Node.Spec.ConfigSource - src := &v1.NodeConfigSource{ - ConfigMap: &v1.ConfigMapNodeConfigSource{ - Namespace: "kube-system", - Name: cm.Name, - KubeletConfigKey: "kubelet", - }, - } - - // set the source, retry a few times in case we are competing with other writers - gomega.Eventually(func() error { - if err := setNodeConfigSource(f, src); err != nil { - return err - } - return nil - }, time.Minute, time.Second).Should(gomega.BeNil()) - - // poll for new config, for a maximum wait of restartGap - gomega.Eventually(func() error { - newKubeCfg, err := getCurrentKubeletConfig() - if err != nil { - return fmt.Errorf("failed trying to get current Kubelet config, will retry, error: %v", err) - } - if !apiequality.Semantic.DeepEqual(*kubeCfg, *newKubeCfg) { - return fmt.Errorf("still waiting for new configuration to take effect, will continue to watch /configz") - } - klog.Infof("new configuration has taken effect") - return nil - }, restartGap, pollInterval).Should(gomega.BeNil()) - - return nil -} - // sets the current node's configSource, this should only be called from Serial tests func setNodeConfigSource(f *framework.Framework, source *v1.NodeConfigSource) error { // since this is a serial test, we just get the node, change the source, and then update it @@ -275,16 +247,6 @@ func setNodeConfigSource(f *framework.Framework, source *v1.NodeConfigSource) er return nil } -// creates a configmap containing kubeCfg in kube-system namespace -func createConfigMap(f *framework.Framework, internalKC *kubeletconfig.KubeletConfiguration) (*v1.ConfigMap, error) { - cmap := newKubeletConfigMap("testcfg", internalKC) - cmap, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Create(context.TODO(), cmap, metav1.CreateOptions{}) - if err != nil { - return nil, err - } - return cmap, nil -} - // constructs a ConfigMap, populating one of its keys with the KubeletConfiguration. Always uses GenerateName to generate a suffix. func newKubeletConfigMap(name string, internalKC *kubeletconfig.KubeletConfiguration) *v1.ConfigMap { data, err := kubeletconfigcodec.EncodeKubeletConfig(internalKC, kubeletconfigv1beta1.SchemeGroupVersion) @@ -447,8 +409,9 @@ func stopKubelet() func() { framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout)) return func() { - stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput() - framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout)) + // we should restart service, otherwise the transient service start will fail + stdout, err := exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() + framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) } }