diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index a23edd8631d..5ef5aa58a98 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -34,13 +34,12 @@ import ( cpumanagerstate "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" "k8s.io/kubernetes/pkg/kubelet/types" - "k8s.io/kubernetes/test/e2e/framework" - e2enode "k8s.io/kubernetes/test/e2e/framework/node" - e2epod "k8s.io/kubernetes/test/e2e/framework/pod" - e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "github.com/onsi/ginkgo" "github.com/onsi/gomega" + "k8s.io/kubernetes/test/e2e/framework" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" ) // Helper for makeCPUManagerPod(). @@ -60,12 +59,12 @@ func makeCPUManagerPod(podName string, ctnAttributes []ctnAttribute) *v1.Pod { Image: busyboxImage, Resources: v1.ResourceRequirements{ Requests: v1.ResourceList{ - v1.ResourceName(v1.ResourceCPU): resource.MustParse(ctnAttr.cpuRequest), - v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"), + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuRequest), + v1.ResourceMemory: resource.MustParse("100Mi"), }, Limits: v1.ResourceList{ - v1.ResourceName(v1.ResourceCPU): resource.MustParse(ctnAttr.cpuLimit), - v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"), + v1.ResourceCPU: resource.MustParse(ctnAttr.cpuLimit), + v1.ResourceMemory: resource.MustParse("100Mi"), }, }, Command: []string{"sh", "-c", cpusetCmd}, @@ -109,7 +108,7 @@ func getLocalNodeCPUDetails(f *framework.Framework) (cpuCapVal int64, cpuAllocVa // RoundUp reserved CPUs to get only integer cores. cpuRes.RoundUp(0) - return cpuCap.Value(), (cpuCap.Value() - cpuRes.Value()), cpuRes.Value() + return cpuCap.Value(), cpuCap.Value() - cpuRes.Value(), cpuRes.Value() } func waitForContainerRemoval(containerName, podName, podNS string) { @@ -185,76 +184,35 @@ func getCoreSiblingList(cpuRes int64) string { return string(out) } -func deleteStateFile() { - err := exec.Command("/bin/sh", "-c", "rm -f /var/lib/kubelet/cpu_manager_state").Run() - framework.ExpectNoError(err, "error deleting state file") +type cpuManagerKubeletArguments struct { + policyName string + enableCPUManager bool + enableCPUManagerOptions bool + reservedSystemCPUs cpuset.CPUSet + options map[string]string } -func setOldKubeletConfig(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration) { - // Delete the CPU Manager state file so that the old Kubelet configuration - // can take effect.i - deleteStateFile() - - if oldCfg != nil { - framework.ExpectNoError(setKubeletConfiguration(f, oldCfg)) - } -} - -func disableCPUManagerInKubelet(f *framework.Framework) (oldCfg *kubeletconfig.KubeletConfiguration) { - // Disable CPU Manager in Kubelet. - oldCfg, err := getCurrentKubeletConfig() - framework.ExpectNoError(err) +func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, kubeletArguments *cpuManagerKubeletArguments) *kubeletconfig.KubeletConfiguration { newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { newCfg.FeatureGates = make(map[string]bool) } - newCfg.FeatureGates["CPUManager"] = false - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + newCfg.FeatureGates["CPUManager"] = kubeletArguments.enableCPUManager - // Wait for the Kubelet to be ready. - gomega.Eventually(func() bool { - nodes, err := e2enode.TotalReady(f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, time.Second).Should(gomega.BeTrue()) + newCfg.FeatureGates["CPUManagerPolicyOptions"] = kubeletArguments.enableCPUManagerOptions + newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = kubeletArguments.enableCPUManagerOptions + newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = kubeletArguments.enableCPUManagerOptions - return oldCfg -} - -func configureCPUManagerInKubelet(f *framework.Framework, policyName string, cleanStateFile bool, reservedSystemCPUs cpuset.CPUSet, enableOptions bool, options map[string]string) (oldCfg *kubeletconfig.KubeletConfiguration) { - // Enable CPU Manager in Kubelet with static policy. - oldCfg, err := getCurrentKubeletConfig() - framework.ExpectNoError(err) - newCfg := oldCfg.DeepCopy() - if newCfg.FeatureGates == nil { - newCfg.FeatureGates = make(map[string]bool) - } - newCfg.FeatureGates["CPUManager"] = true - newCfg.FeatureGates["CPUManagerPolicyOptions"] = enableOptions - newCfg.FeatureGates["CPUManagerPolicyBetaOptions"] = enableOptions - newCfg.FeatureGates["CPUManagerPolicyAlphaOptions"] = enableOptions - - // After graduation of the CPU Manager feature to Beta, the CPU Manager - // "none" policy is ON by default. But when we set the CPU Manager policy to - // "static" in this test and the Kubelet is restarted so that "static" - // policy can take effect, there will always be a conflict with the state - // checkpointed in the disk (i.e., the policy checkpointed in the disk will - // be "none" whereas we are trying to restart Kubelet with "static" - // policy). Therefore, we delete the state file so that we can proceed - // with the tests. - // Only delete the state file at the begin of the tests. - if cleanStateFile { - deleteStateFile() - } - - newCfg.CPUManagerPolicy = policyName + newCfg.CPUManagerPolicy = kubeletArguments.policyName newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} - newCfg.CPUManagerPolicyOptions = options - if reservedSystemCPUs.Size() > 0 { - cpus := reservedSystemCPUs.String() + if kubeletArguments.options != nil { + newCfg.CPUManagerPolicyOptions = kubeletArguments.options + } + + if kubeletArguments.reservedSystemCPUs.Size() > 0 { + cpus := kubeletArguments.reservedSystemCPUs.String() framework.Logf("configureCPUManagerInKubelet: using reservedSystemCPUs=%q", cpus) newCfg.ReservedSystemCPUs = cpus } else { @@ -269,15 +227,6 @@ func configureCPUManagerInKubelet(f *framework.Framework, policyName string, cle newCfg.KubeReserved["cpu"] = "200m" } } - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) - - // Wait for the Kubelet to be ready. - gomega.Eventually(func() bool { - nodes, err := e2enode.TotalReady(f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, time.Second).Should(gomega.BeTrue()) return oldCfg } @@ -581,6 +530,12 @@ func runCPUManagerTests(f *framework.Framework) { var ctnAttrs []ctnAttribute var pod *v1.Pod + ginkgo.BeforeEach(func() { + var err error + oldCfg, err = getCurrentKubeletConfig() + framework.ExpectNoError(err) + }) + ginkgo.It("should assign CPUs as expected based on the Pod spec", func() { cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(f) @@ -590,7 +545,12 @@ func runCPUManagerTests(f *framework.Framework) { } // Enable CPU Manager in the kubelet. - oldCfg = configureCPUManagerInKubelet(f, string(cpumanager.PolicyStatic), true, cpuset.CPUSet{}, false, nil) + newCfg := configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + enableCPUManager: true, + reservedSystemCPUs: cpuset.CPUSet{}, + }) + updateKubeletConfig(f, newCfg, true) ginkgo.By("running a non-Gu pod") runNonGuPodTest(f, cpuCap) @@ -650,14 +610,24 @@ func runCPUManagerTests(f *framework.Framework) { pod.Spec.Containers[0].Name, pod.Name) ginkgo.By("disable cpu manager in kubelet") - disableCPUManagerInKubelet(f) + newCfg = configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + enableCPUManager: false, + reservedSystemCPUs: cpuset.CPUSet{}, + }) + updateKubeletConfig(f, newCfg, false) ginkgo.By("by deleting the pod and waiting for container removal") deletePods(f, []string{pod.Name}) waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) ginkgo.By("enable cpu manager in kubelet without delete state file") - configureCPUManagerInKubelet(f, string(cpumanager.PolicyStatic), false, cpuset.CPUSet{}, false, nil) + newCfg = configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + enableCPUManager: true, + reservedSystemCPUs: cpuset.CPUSet{}, + }) + updateKubeletConfig(f, newCfg, false) ginkgo.By("wait for the deleted pod to be cleaned up from the state file") waitForStateFileCleanedUp() @@ -681,13 +651,21 @@ func runCPUManagerTests(f *framework.Framework) { framework.Logf("SMT level %d", smtLevel) - cleanStateFile := true // TODO: we assume the first available CPUID is 0, which is pretty fair, but we should probably // check what we do have in the node. cpuPolicyOptions := map[string]string{ cpumanager.FullPCPUsOnlyOption: "true", } - oldCfg = configureCPUManagerInKubelet(f, string(cpumanager.PolicyStatic), cleanStateFile, cpuset.NewCPUSet(0), true, cpuPolicyOptions) + newCfg := configureCPUManagerInKubelet(oldCfg, + &cpuManagerKubeletArguments{ + policyName: string(cpumanager.PolicyStatic), + enableCPUManager: true, + reservedSystemCPUs: cpuset.NewCPUSet(0), + enableCPUManagerOptions: true, + options: cpuPolicyOptions, + }, + ) + updateKubeletConfig(f, newCfg, true) // the order between negative and positive doesn't really matter runSMTAlignmentNegativeTests(f) @@ -695,7 +673,7 @@ func runCPUManagerTests(f *framework.Framework) { }) ginkgo.AfterEach(func() { - setOldKubeletConfig(f, oldCfg) + updateKubeletConfig(f, oldCfg, true) }) } diff --git a/test/e2e_node/device_manager_test.go b/test/e2e_node/device_manager_test.go index a990e56849a..020b4c99814 100644 --- a/test/e2e_node/device_manager_test.go +++ b/test/e2e_node/device_manager_test.go @@ -61,15 +61,6 @@ var _ = SIGDescribe("Device Manager [Serial] [Feature:DeviceManager][NodeFeatur e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device") } - oldCfg := enablePodResourcesFeatureGateInKubelet(f) - defer func() { - // restore kubelet config - setOldKubeletConfig(f, oldCfg) - - // Delete state file to allow repeated runs - deleteStateFile() - }() - configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile) sd := setupSRIOVConfigOrFail(f, configMap) @@ -167,15 +158,6 @@ var _ = SIGDescribe("Device Manager [Serial] [Feature:DeviceManager][NodeFeatur e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device") } - oldCfg := enablePodResourcesFeatureGateInKubelet(f) - defer func() { - // restore kubelet config - setOldKubeletConfig(f, oldCfg) - - // Delete state file to allow repeated runs - deleteStateFile() - }() - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) framework.ExpectNoError(err) diff --git a/test/e2e_node/device_plugin_test.go b/test/e2e_node/device_plugin_test.go index b4be666213f..c96180034fc 100644 --- a/test/e2e_node/device_plugin_test.go +++ b/test/e2e_node/device_plugin_test.go @@ -110,7 +110,6 @@ func readDaemonSetV1OrDie(objBytes []byte) *appsv1.DaemonSet { func testDevicePlugin(f *framework.Framework, pluginSockDir string) { pluginSockDir = filepath.Join(pluginSockDir) + "/" ginkgo.Context("DevicePlugin", func() { - ginkgo.By("Enabling support for Kubelet Plugins Watcher") ginkgo.It("[Flaky] Verifies the Kubelet device plugin functionality.", func() { ginkgo.By("Wait for node is ready to start with") e2enode.WaitForNodeToBeReady(f.ClientSet, framework.TestContext.NodeName, 5*time.Minute) diff --git a/test/e2e_node/dynamic_kubelet_config_test.go b/test/e2e_node/dynamic_kubelet_config_test.go index 311e9b64e90..02d10060b3d 100644 --- a/test/e2e_node/dynamic_kubelet_config_test.go +++ b/test/e2e_node/dynamic_kubelet_config_test.go @@ -70,7 +70,7 @@ type nodeConfigTestCase struct { } // This test is marked [Disruptive] because the Kubelet restarts several times during this test. -var _ = SIGDescribe("[Feature:DynamicKubeletConfig][NodeFeature:DynamicKubeletConfig][Serial][Disruptive]", func() { +var _ = SIGDescribe("[Feature:DynamicKubeletConfig][NodeFeature:DynamicKubeletConfig][Deprecated][Disruptive]", func() { f := framework.NewDefaultFramework("dynamic-kubelet-configuration-test") var beforeNode *v1.Node var beforeConfigMap *v1.ConfigMap diff --git a/test/e2e_node/kubeletconfig/kubeletconfig.go b/test/e2e_node/kubeletconfig/kubeletconfig.go new file mode 100644 index 00000000000..211200dab7b --- /dev/null +++ b/test/e2e_node/kubeletconfig/kubeletconfig.go @@ -0,0 +1,95 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubeletconfig + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + + kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + kubeletconfigscheme "k8s.io/kubernetes/pkg/kubelet/apis/config/scheme" + kubeletconfigcodec "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/codec" + + "sigs.k8s.io/yaml" +) + +func getKubeletConfigFilePath() (string, error) { + cwd, err := os.Getwd() + if err != nil { + return "", fmt.Errorf("failed to get current working directory: %w", err) + } + + // DO NOT name this file "kubelet" - you will overwrite the kubelet binary and be very confused :) + return filepath.Join(cwd, "kubelet-config"), nil +} + +// GetCurrentKubeletConfigFromFile returns the current kubelet configuration under the filesystem. +// This method should only run together with e2e node tests, meaning the test executor and the cluster nodes is the +// same machine +func GetCurrentKubeletConfigFromFile() (*kubeletconfig.KubeletConfiguration, error) { + kubeletConfigFilePath, err := getKubeletConfigFilePath() + if err != nil { + return nil, err + } + + data, err := ioutil.ReadFile(kubeletConfigFilePath) + if err != nil { + return nil, fmt.Errorf("failed to get the kubelet config from the file %q: %w", kubeletConfigFilePath, err) + } + + var kubeletConfigV1Beta1 kubeletconfigv1beta1.KubeletConfiguration + if err := yaml.Unmarshal(data, &kubeletConfigV1Beta1); err != nil { + return nil, fmt.Errorf("failed to unmarshal the kubelet config: %w", err) + } + + scheme, _, err := kubeletconfigscheme.NewSchemeAndCodecs() + if err != nil { + return nil, err + } + + kubeletConfig := kubeletconfig.KubeletConfiguration{} + err = scheme.Convert(&kubeletConfigV1Beta1, &kubeletConfig, nil) + if err != nil { + return nil, err + } + + return &kubeletConfig, nil +} + +// WriteKubeletConfigFile updates the kubelet configuration under the filesystem +// This method should only run together with e2e node tests, meaning the test executor and the cluster nodes is the +// same machine +func WriteKubeletConfigFile(kubeletConfig *kubeletconfig.KubeletConfiguration) error { + data, err := kubeletconfigcodec.EncodeKubeletConfig(kubeletConfig, kubeletconfigv1beta1.SchemeGroupVersion) + if err != nil { + return err + } + + kubeletConfigFilePath, err := getKubeletConfigFilePath() + if err != nil { + return err + } + + if err := ioutil.WriteFile(kubeletConfigFilePath, data, 0644); err != nil { + return fmt.Errorf("failed to write the kubelet file to %q: %w", kubeletConfigFilePath, err) + } + + return nil +} diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go index a62359252fe..9c3e4c1005d 100644 --- a/test/e2e_node/memory_manager_test.go +++ b/test/e2e_node/memory_manager_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + /* Copyright 2017 The Kubernetes Authors. @@ -38,7 +41,6 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/util" "k8s.io/kubernetes/test/e2e/framework" - e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" "k8s.io/utils/pointer" @@ -47,11 +49,10 @@ import ( ) const ( - evictionHardMemory = "memory.available" - memoryManagerStateFile = "/var/lib/kubelet/memory_manager_state" - resourceMemory = "memory" - staticPolicy = "Static" - nonePolicy = "None" + evictionHardMemory = "memory.available" + resourceMemory = "memory" + staticPolicy = "Static" + nonePolicy = "None" ) // Helper for makeMemoryManagerPod(). @@ -134,11 +135,6 @@ func makeMemoryManagerPod(podName string, initCtnAttributes, ctnAttributes []mem return pod } -func deleteMemoryManagerStateFile() { - err := exec.Command("/bin/sh", "-c", fmt.Sprintf("rm -f %s", memoryManagerStateFile)).Run() - framework.ExpectNoError(err, "failed to delete the state file") -} - func getMemoryManagerState() (*state.MemoryManagerCheckpoint, error) { if _, err := os.Stat(memoryManagerStateFile); os.IsNotExist(err) { return nil, fmt.Errorf("the memory manager state file %s does not exist", memoryManagerStateFile) @@ -183,63 +179,44 @@ type kubeletParams struct { evictionHard map[string]string } -func getUpdatedKubeletConfig(oldCfg *kubeletconfig.KubeletConfiguration, params *kubeletParams) *kubeletconfig.KubeletConfiguration { - newCfg := oldCfg.DeepCopy() - - if newCfg.FeatureGates == nil { - newCfg.FeatureGates = map[string]bool{} +func updateKubeletConfigWithMemoryManagerParams(initialCfg *kubeletconfig.KubeletConfiguration, params *kubeletParams) { + if initialCfg.FeatureGates == nil { + initialCfg.FeatureGates = map[string]bool{} } - newCfg.MemoryManagerPolicy = params.memoryManagerPolicy + initialCfg.MemoryManagerPolicy = params.memoryManagerPolicy // update system-reserved - if newCfg.SystemReserved == nil { - newCfg.SystemReserved = map[string]string{} + if initialCfg.SystemReserved == nil { + initialCfg.SystemReserved = map[string]string{} } for resourceName, value := range params.systemReserved { - newCfg.SystemReserved[resourceName] = value + initialCfg.SystemReserved[resourceName] = value } // update kube-reserved - if newCfg.KubeReserved == nil { - newCfg.KubeReserved = map[string]string{} + if initialCfg.KubeReserved == nil { + initialCfg.KubeReserved = map[string]string{} } for resourceName, value := range params.kubeReserved { - newCfg.KubeReserved[resourceName] = value + initialCfg.KubeReserved[resourceName] = value } // update hard eviction threshold - if newCfg.EvictionHard == nil { - newCfg.EvictionHard = map[string]string{} + if initialCfg.EvictionHard == nil { + initialCfg.EvictionHard = map[string]string{} } for resourceName, value := range params.evictionHard { - newCfg.EvictionHard[resourceName] = value + initialCfg.EvictionHard[resourceName] = value } // update reserved memory - if newCfg.ReservedMemory == nil { - newCfg.ReservedMemory = []kubeletconfig.MemoryReservation{} + if initialCfg.ReservedMemory == nil { + initialCfg.ReservedMemory = []kubeletconfig.MemoryReservation{} } for _, memoryReservation := range params.systemReservedMemory { - newCfg.ReservedMemory = append(newCfg.ReservedMemory, memoryReservation) + initialCfg.ReservedMemory = append(initialCfg.ReservedMemory, memoryReservation) } - - return newCfg -} - -func updateKubeletConfig(f *framework.Framework, cfg *kubeletconfig.KubeletConfiguration) { - // remove the state file - deleteMemoryManagerStateFile() - - // Update the Kubelet configuration - framework.ExpectNoError(setKubeletConfiguration(f, cfg)) - - // Wait for the Kubelet to be ready. - gomega.Eventually(func() bool { - nodes, err := e2enode.TotalReady(f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, time.Second).Should(gomega.BeTrue()) } func getAllNUMANodes() []int { @@ -264,15 +241,13 @@ func getAllNUMANodes() []int { } // Serial because the test updates kubelet configuration. -var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { +var _ = SIGDescribe("Memory Manager [Disruptive] [Serial] [Feature:MemoryManager]", func() { // TODO: add more complex tests that will include interaction between CPUManager, MemoryManager and TopologyManager var ( allNUMANodes []int ctnParams, initCtnParams []memoryManagerCtnAttributes is2MiHugepagesSupported *bool isMultiNUMASupported *bool - kubeParams *kubeletParams - oldCfg *kubeletconfig.KubeletConfiguration testPod *v1.Pod ) @@ -305,6 +280,32 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { framework.ExpectEqual(numaNodeIDs, currentNUMANodeIDs.ToSlice()) } + waitingForHugepages := func(hugepagesCount int) { + gomega.Eventually(func() error { + node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{}) + if err != nil { + return err + } + + capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResourceName2Mi)] + if !ok { + return fmt.Errorf("the node does not have the resource %s", hugepagesResourceName2Mi) + } + + size, succeed := capacity.AsInt64() + if !succeed { + return fmt.Errorf("failed to convert quantity to int64") + } + + // 512 Mb, the expected size in bytes + expectedSize := int64(hugepagesCount * hugepagesSize2M * 1024) + if size != expectedSize { + return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize) + } + return nil + }, time.Minute, framework.Poll).Should(gomega.BeNil()) + } + ginkgo.BeforeEach(func() { if isMultiNUMASupported == nil { isMultiNUMASupported = pointer.BoolPtr(isMultiNUMA()) @@ -321,74 +322,22 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { // dynamically update the kubelet configuration ginkgo.JustBeforeEach(func() { - var err error + hugepagesCount := 8 // allocate hugepages if *is2MiHugepagesSupported { - hugepagesCount := 256 ginkgo.By("Configuring hugepages") gomega.Eventually(func() error { - if err := configureHugePages(hugepagesSize2M, hugepagesCount); err != nil { - return err - } - return nil + return configureHugePages(hugepagesSize2M, hugepagesCount) }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) - ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") - - // stop the kubelet and wait until the server will restart it automatically - stopKubelet() - - // wait until the kubelet health check will fail - gomega.Eventually(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, time.Second).Should(gomega.BeFalse()) - - restartKubelet(false) - - // wait until the kubelet health check will pass - gomega.Eventually(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, 2*time.Minute, 10*time.Second).Should(gomega.BeTrue()) + restartKubelet(true) ginkgo.By("Waiting for hugepages resource to become available on the local node") - gomega.Eventually(func() error { - node, err := f.ClientSet.CoreV1().Nodes().Get(context.TODO(), framework.TestContext.NodeName, metav1.GetOptions{}) - if err != nil { - return err - } + waitingForHugepages(hugepagesCount) - capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResourceName2Mi)] - if !ok { - return fmt.Errorf("the node does not have the resource %s", hugepagesResourceName2Mi) - } - - size, succeed := capacity.AsInt64() - if !succeed { - return fmt.Errorf("failed to convert quantity to int64") - } - - // 512 Mb, the expected size in bytes - expectedSize := int64(hugepagesCount * hugepagesSize2M * 1024) - if size != expectedSize { - return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize) - } - return nil - }, time.Minute, framework.Poll).Should(gomega.BeNil()) - } - - // get the old kubelet config - oldCfg, err = getCurrentKubeletConfig() - framework.ExpectNoError(err) - - // update the kubelet config with new parameters - newCfg := getUpdatedKubeletConfig(oldCfg, kubeParams) - updateKubeletConfig(f, newCfg) - - // request hugepages resources under the container - if *is2MiHugepagesSupported { for i := 0; i < len(ctnParams); i++ { - ctnParams[i].hugepages2Mi = "128Mi" + ctnParams[i].hugepages2Mi = "8Mi" } } @@ -404,28 +353,23 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { } // release hugepages - gomega.Eventually(func() error { - return configureHugePages(hugepagesSize2M, 0) - }, 90*time.Second, 15*time.Second).ShouldNot(gomega.HaveOccurred(), "failed to release hugepages") + if *is2MiHugepagesSupported { + ginkgo.By("Releasing allocated hugepages") + gomega.Eventually(func() error { + return configureHugePages(hugepagesSize2M, 0) + }, 90*time.Second, 15*time.Second).ShouldNot(gomega.HaveOccurred(), "failed to release hugepages") - // update the kubelet config with old values - updateKubeletConfig(f, oldCfg) + restartKubelet(true) - // wait until the kubelet health check will pass and will continue to pass for specified period of time - gomega.Eventually(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, 10*time.Second).Should(gomega.BeTrue()) - gomega.Consistently(func() bool { - return kubeletHealthCheck(kubeletHealthCheckURL) - }, time.Minute, 10*time.Second).Should(gomega.BeTrue()) + waitingForHugepages(0) + } }) ginkgo.Context("with static policy", func() { - ginkgo.BeforeEach(func() { - // override kubelet configuration parameters - tmpParams := *defaultKubeParams - tmpParams.memoryManagerPolicy = staticPolicy - kubeParams = &tmpParams + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + kubeParams := *defaultKubeParams + kubeParams.memoryManagerPolicy = staticPolicy + updateKubeletConfigWithMemoryManagerParams(initialConfig, &kubeParams) }) ginkgo.JustAfterEach(func() { @@ -700,70 +644,75 @@ var _ = SIGDescribe("Memory Manager [Serial] [Feature:MemoryManager]", func() { }) ginkgo.Context("with none policy", func() { - ginkgo.BeforeEach(func() { - tmpParams := *defaultKubeParams - tmpParams.memoryManagerPolicy = nonePolicy - kubeParams = &tmpParams - - // override pod parameters - ctnParams = []memoryManagerCtnAttributes{ - { - ctnName: "memory-manager-none", - cpus: "100m", - memory: "128Mi", - }, - } + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + kubeParams := *defaultKubeParams + kubeParams.memoryManagerPolicy = nonePolicy + updateKubeletConfigWithMemoryManagerParams(initialConfig, &kubeParams) }) - // TODO: move the test to pod resource API test suite, see - https://github.com/kubernetes/kubernetes/issues/101945 - ginkgo.It("should not report any memory data during request to pod resources GetAllocatableResources", func() { - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) + // empty context to configure same container parameters for all tests + ginkgo.Context("", func() { + ginkgo.BeforeEach(func() { + // override pod parameters + ctnParams = []memoryManagerCtnAttributes{ + { + ctnName: "memory-manager-none", + cpus: "100m", + memory: "128Mi", + }, + } + }) - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() + // TODO: move the test to pod resource API test suite, see - https://github.com/kubernetes/kubernetes/issues/101945 + ginkgo.It("should not report any memory data during request to pod resources GetAllocatableResources", func() { + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) - resp, err := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) - framework.ExpectNoError(err) + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() - gomega.Expect(resp.Memory).To(gomega.BeEmpty()) - }) + resp, err := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) + framework.ExpectNoError(err) - // TODO: move the test to pod resource API test suite, see - https://github.com/kubernetes/kubernetes/issues/101945 - ginkgo.It("should not report any memory data during request to pod resources List", func() { - testPod = f.PodClient().CreateSync(testPod) + gomega.Expect(resp.Memory).To(gomega.BeEmpty()) + }) - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) + // TODO: move the test to pod resource API test suite, see - https://github.com/kubernetes/kubernetes/issues/101945 + ginkgo.It("should not report any memory data during request to pod resources List", func() { + testPod = f.PodClient().CreateSync(testPod) - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) - resp, err := cli.List(context.TODO(), &kubeletpodresourcesv1.ListPodResourcesRequest{}) - framework.ExpectNoError(err) + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() - for _, podResource := range resp.PodResources { - if podResource.Name != testPod.Name { - continue + resp, err := cli.List(context.TODO(), &kubeletpodresourcesv1.ListPodResourcesRequest{}) + framework.ExpectNoError(err) + + for _, podResource := range resp.PodResources { + if podResource.Name != testPod.Name { + continue + } + + for _, containerResource := range podResource.Containers { + gomega.Expect(containerResource.Memory).To(gomega.BeEmpty()) + } + } + }) + + ginkgo.It("should succeed to start the pod", func() { + testPod = f.PodClient().CreateSync(testPod) + + // it no taste to verify NUMA pinning when the node has only one NUMA node + if !*isMultiNUMASupported { + return } - for _, containerResource := range podResource.Containers { - gomega.Expect(containerResource.Memory).To(gomega.BeEmpty()) - } - } - }) - - ginkgo.It("should succeed to start the pod", func() { - testPod = f.PodClient().CreateSync(testPod) - - // it no taste to verify NUMA pinning when the node has only one NUMA node - if !*isMultiNUMASupported { - return - } - - verifyMemoryPinning(testPod, allNUMANodes) + verifyMemoryPinning(testPod, allNUMANodes) + }) }) }) }) diff --git a/test/e2e_node/node_container_manager_test.go b/test/e2e_node/node_container_manager_test.go index b2b47a32ba3..a44e0ac8492 100644 --- a/test/e2e_node/node_container_manager_test.go +++ b/test/e2e_node/node_container_manager_test.go @@ -34,8 +34,10 @@ import ( kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/stats/pidlimit" + "k8s.io/kubernetes/test/e2e/framework" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -183,7 +185,24 @@ func runTest(f *framework.Framework) error { defer destroyTemporaryCgroupsForReservation(cgroupManager) defer func() { if oldCfg != nil { - framework.ExpectNoError(setKubeletConfiguration(f, oldCfg)) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(oldCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) } }() if err := createTemporaryCgroupsForReservation(cgroupManager); err != nil { @@ -193,7 +212,25 @@ func runTest(f *framework.Framework) error { // Change existing kubelet configuration setDesiredConfiguration(newCfg) // Set the new kubelet configuration. - err = setKubeletConfiguration(f, newCfg) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(newCfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) + if err != nil { return err } diff --git a/test/e2e_node/node_perf_test.go b/test/e2e_node/node_perf_test.go index 50be44d111c..e5c064da122 100644 --- a/test/e2e_node/node_perf_test.go +++ b/test/e2e_node/node_perf_test.go @@ -24,10 +24,12 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" "k8s.io/kubernetes/test/e2e_node/perf/workloads" "github.com/onsi/ginkgo" @@ -46,7 +48,24 @@ func makeNodePerfPod(w workloads.NodePerfWorkload) *v1.Pod { func setKubeletConfig(f *framework.Framework, cfg *kubeletconfig.KubeletConfiguration) { if cfg != nil { - framework.ExpectNoError(setKubeletConfiguration(f, cfg)) + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) } // Wait for the Kubelet to be ready. diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go index 1b27dda6fab..ab2e7a6704b 100644 --- a/test/e2e_node/podresources_test.go +++ b/test/e2e_node/podresources_test.go @@ -28,7 +28,6 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilfeature "k8s.io/apiserver/pkg/util/feature" kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1" kubefeatures "k8s.io/kubernetes/pkg/features" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" @@ -38,14 +37,13 @@ import ( "k8s.io/kubernetes/pkg/kubelet/util" testutils "k8s.io/kubernetes/test/utils" + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" - - "github.com/onsi/ginkgo" - "github.com/onsi/gomega" ) type podDesc struct { @@ -494,7 +492,7 @@ func podresourcesListTests(f *framework.Framework, cli kubeletpodresourcesv1.Pod } -func podresourcesGetAllocatableResourcesTests(f *framework.Framework, cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData, onlineCPUs, reservedSystemCPUs cpuset.CPUSet) { +func podresourcesGetAllocatableResourcesTests(cli kubeletpodresourcesv1.PodResourcesListerClient, sd *sriovData, onlineCPUs, reservedSystemCPUs cpuset.CPUSet) { ginkgo.By("checking the devices known to the kubelet") resp, err := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) framework.ExpectNoErrorWithOffset(1, err) @@ -535,240 +533,255 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P reservedSystemCPUs := cpuset.MustParse("1") - ginkgo.Context("With SRIOV devices in the system", func() { - ginkgo.It("should return the expected responses with cpumanager static policy enabled", func() { - // this is a very rough check. We just want to rule out system that does NOT have enough resources - _, cpuAlloc, _ := getLocalNodeCPUDetails(f) - - if cpuAlloc < minCoreCount { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount) - } - + ginkgo.Context("with SRIOV devices in the system", func() { + ginkgo.BeforeEach(func() { requireSRIOVDevices() - - onlineCPUs, err := getOnlineCPUs() - framework.ExpectNoError(err) - - // Make sure all the feature gates and the right settings are in place. - oldCfg := configurePodResourcesInKubelet(f, true, reservedSystemCPUs) - defer func() { - // restore kubelet config - setOldKubeletConfig(f, oldCfg) - - // Delete state file to allow repeated runs - deleteStateFile() - }() - - configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile) - sd := setupSRIOVConfigOrFail(f, configMap) - defer teardownSRIOVConfigOrFail(f, sd) - - waitForSRIOVResources(f, sd) - - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) - - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() - - waitForSRIOVResources(f, sd) - - ginkgo.By("checking List()") - podresourcesListTests(f, cli, sd) - ginkgo.By("checking GetAllocatableResources()") - podresourcesGetAllocatableResourcesTests(f, cli, sd, onlineCPUs, reservedSystemCPUs) - }) - ginkgo.It("should return the expected responses with cpumanager none policy", func() { - // current default is "none" policy - no need to restart the kubelet + ginkgo.Context("with CPU manager Static policy", func() { + ginkgo.BeforeEach(func() { + // this is a very rough check. We just want to rule out system that does NOT have enough resources + _, cpuAlloc, _ := getLocalNodeCPUDetails(f) - requireSRIOVDevices() + if cpuAlloc < minCoreCount { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount) + } + }) - oldCfg := enablePodResourcesFeatureGateInKubelet(f) - defer func() { - // restore kubelet config - setOldKubeletConfig(f, oldCfg) + // empty context to apply kubelet config changes + ginkgo.Context("", func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + // Set the CPU Manager policy to static. + initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic) - // Delete state file to allow repeated runs - deleteStateFile() - }() + // Set the CPU Manager reconcile period to 1 second. + initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} - configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile) - sd := setupSRIOVConfigOrFail(f, configMap) - defer teardownSRIOVConfigOrFail(f, sd) + cpus := reservedSystemCPUs.String() + framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus) + initialConfig.ReservedSystemCPUs = cpus + }) - waitForSRIOVResources(f, sd) + ginkgo.It("should return the expected responses", func() { + onlineCPUs, err := getOnlineCPUs() + framework.ExpectNoError(err) - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) + configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile) + sd := setupSRIOVConfigOrFail(f, configMap) + defer teardownSRIOVConfigOrFail(f, sd) - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() + waitForSRIOVResources(f, sd) - waitForSRIOVResources(f, sd) + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) - // intentionally passing empty cpuset instead of onlineCPUs because with none policy - // we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static - podresourcesGetAllocatableResourcesTests(f, cli, sd, cpuset.CPUSet{}, cpuset.CPUSet{}) + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() + + waitForSRIOVResources(f, sd) + + ginkgo.By("checking List()") + podresourcesListTests(f, cli, sd) + ginkgo.By("checking GetAllocatableResources()") + podresourcesGetAllocatableResourcesTests(cli, sd, onlineCPUs, reservedSystemCPUs) + }) + }) }) - }) + ginkgo.Context("with CPU manager None policy", func() { + ginkgo.It("should return the expected responses", func() { + // current default is "none" policy - no need to restart the kubelet - ginkgo.Context("Without SRIOV devices in the system", func() { - ginkgo.It("should return the expected responses with cpumanager static policy enabled", func() { - // this is a very rough check. We just want to rule out system that does NOT have enough resources - _, cpuAlloc, _ := getLocalNodeCPUDetails(f) + requireSRIOVDevices() - if cpuAlloc < minCoreCount { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount) - } + configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile) + sd := setupSRIOVConfigOrFail(f, configMap) + defer teardownSRIOVConfigOrFail(f, sd) - requireLackOfSRIOVDevices() + waitForSRIOVResources(f, sd) - onlineCPUs, err := getOnlineCPUs() - framework.ExpectNoError(err) + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) - // Make sure all the feature gates and the right settings are in place. - oldCfg := configurePodResourcesInKubelet(f, true, reservedSystemCPUs) - defer func() { - // restore kubelet config - setOldKubeletConfig(f, oldCfg) + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() - // Delete state file to allow repeated runs - deleteStateFile() - }() + waitForSRIOVResources(f, sd) - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) - - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() - - podresourcesListTests(f, cli, nil) - podresourcesGetAllocatableResourcesTests(f, cli, nil, onlineCPUs, reservedSystemCPUs) - }) - - ginkgo.It("should return the expected responses with cpumanager none policy", func() { - // current default is "none" policy - no need to restart the kubelet - - requireLackOfSRIOVDevices() - - oldCfg := enablePodResourcesFeatureGateInKubelet(f) - defer func() { - // restore kubelet config - setOldKubeletConfig(f, oldCfg) - - // Delete state file to allow repeated runs - deleteStateFile() - }() - - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) - - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() - - // intentionally passing empty cpuset instead of onlineCPUs because with none policy - // we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static - podresourcesGetAllocatableResourcesTests(f, cli, nil, cpuset.CPUSet{}, cpuset.CPUSet{}) - }) - - ginkgo.It("should return the expected error with the feature gate disabled", func() { - e2eskipper.SkipIfFeatureGateEnabled(kubefeatures.KubeletPodResourcesGetAllocatable) - - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) - - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() - - ginkgo.By("checking GetAllocatableResources fail if the feature gate is not enabled") - allocatableRes, err := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) - framework.Logf("GetAllocatableResources result: %v, err: %v", allocatableRes, err) - framework.ExpectError(err, "With feature gate disabled, the call must fail") + // intentionally passing empty cpuset instead of onlineCPUs because with none policy + // we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static + podresourcesGetAllocatableResourcesTests(cli, sd, cpuset.CPUSet{}, cpuset.CPUSet{}) + }) }) }) - ginkgo.Context("Use KubeVirt device plugin, which reports resources w/o hardware topology", func() { - ginkgo.It("should return proper podresources the same as before the restart of kubelet", func() { + ginkgo.Context("without SRIOV devices in the system", func() { + ginkgo.BeforeEach(func() { + requireLackOfSRIOVDevices() + }) - if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletPodResources) { - e2eskipper.Skipf("this test is meant to run with the POD Resources Extensions feature gate enabled") - } + ginkgo.Context("with CPU manager Static policy", func() { + ginkgo.BeforeEach(func() { + // this is a very rough check. We just want to rule out system that does NOT have enough resources + _, cpuAlloc, _ := getLocalNodeCPUDetails(f) + if cpuAlloc < minCoreCount { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount) + } + }) + + // empty context to apply kubelet config changes + ginkgo.Context("", func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + // Set the CPU Manager policy to static. + initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic) + + // Set the CPU Manager reconcile period to 1 second. + initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} + + cpus := reservedSystemCPUs.String() + framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus) + initialConfig.ReservedSystemCPUs = cpus + }) + + ginkgo.It("should return the expected responses", func() { + onlineCPUs, err := getOnlineCPUs() + framework.ExpectNoError(err) + + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) + + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() + + podresourcesListTests(f, cli, nil) + podresourcesGetAllocatableResourcesTests(cli, nil, onlineCPUs, reservedSystemCPUs) + }) + }) + }) + + ginkgo.Context("with CPU manager None policy", func() { + ginkgo.It("should return the expected responses", func() { + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) + + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() + + // intentionally passing empty cpuset instead of onlineCPUs because with none policy + // we should get no allocatable cpus - no exclusively allocatable CPUs, depends on policy static + podresourcesGetAllocatableResourcesTests(cli, nil, cpuset.CPUSet{}, cpuset.CPUSet{}) + }) + }) + + ginkgo.Context("with disabled KubeletPodResourcesGetAllocatable feature gate", func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + if initialConfig.FeatureGates == nil { + initialConfig.FeatureGates = make(map[string]bool) + } + initialConfig.FeatureGates[string(kubefeatures.KubeletPodResourcesGetAllocatable)] = false + }) + + ginkgo.It("should return the expected error with the feature gate disabled", func() { + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) + + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() + + ginkgo.By("checking GetAllocatableResources fail if the feature gate is not enabled") + allocatableRes, err := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) + framework.Logf("GetAllocatableResources result: %v, err: %v", allocatableRes, err) + framework.ExpectError(err, "With feature gate disabled, the call must fail") + }) + }) + }) + + ginkgo.Context("with KubeVirt device plugin, which reports resources w/o hardware topology", func() { + ginkgo.BeforeEach(func() { _, err := os.Stat("/dev/kvm") if errors.Is(err, os.ErrNotExist) { e2eskipper.Skipf("KubeVirt device plugin could work only in kvm based environment") } - - _, cpuAlloc, _ := getLocalNodeCPUDetails(f) - - if cpuAlloc < minCoreCount { - e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount) - } - - // Make sure all the feature gates and the right settings are in place. - oldCfg := configurePodResourcesInKubelet(f, true, reservedSystemCPUs) - defer func() { - // restore kubelet config - setOldKubeletConfig(f, oldCfg) - - // Delete state file to allow repeated runs - deleteStateFile() - }() - - dpPod := setupKubeVirtDevicePluginOrFail(f) - defer teardownKubeVirtDevicePluginOrFail(f, dpPod) - - waitForKubeVirtResources(f, dpPod) - - endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) - framework.ExpectNoError(err) - - cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) - framework.ExpectNoError(err) - defer conn.Close() - - ginkgo.By("checking List and resources kubevirt resource should be without topology") - - allocatableResponse, _ := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) - for _, dev := range allocatableResponse.GetDevices() { - if dev.ResourceName != KubeVirtResourceName { - continue - } - - framework.ExpectEqual(dev.Topology == nil, true, "Topology is expected to be empty for kubevirt resources") - } - - // Run pod which requires KubeVirtResourceName - desc := podDesc{ - podName: "pod-01", - cntName: "cnt-01", - resourceName: KubeVirtResourceName, - resourceAmount: 1, - cpuRequest: 1000, - } - - tpd := newTestPodData() - tpd.createPodsForTest(f, []podDesc{ - desc, - }) - - expectPodResources(1, cli, []podDesc{desc}) - - ginkgo.By("Restarting Kubelet") - restartKubelet(true) - framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout) - expectPodResources(1, cli, []podDesc{desc}) - tpd.deletePodsForTest(f) }) + ginkgo.Context("with CPU manager Static policy", func() { + ginkgo.BeforeEach(func() { + // this is a very rough check. We just want to rule out system that does NOT have enough resources + _, cpuAlloc, _ := getLocalNodeCPUDetails(f) + + if cpuAlloc < minCoreCount { + e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount) + } + }) + + // empty context to apply kubelet config changes + ginkgo.Context("", func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + // Set the CPU Manager policy to static. + initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic) + + // Set the CPU Manager reconcile period to 1 second. + initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} + + cpus := reservedSystemCPUs.String() + framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus) + initialConfig.ReservedSystemCPUs = cpus + }) + + ginkgo.It("should return proper podresources the same as before the restart of kubelet", func() { + dpPod := setupKubeVirtDevicePluginOrFail(f) + defer teardownKubeVirtDevicePluginOrFail(f, dpPod) + + waitForKubeVirtResources(f, dpPod) + + endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) + framework.ExpectNoError(err) + + cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) + framework.ExpectNoError(err) + defer conn.Close() + + ginkgo.By("checking List and resources kubevirt resource should be without topology") + + allocatableResponse, _ := cli.GetAllocatableResources(context.TODO(), &kubeletpodresourcesv1.AllocatableResourcesRequest{}) + for _, dev := range allocatableResponse.GetDevices() { + if dev.ResourceName != KubeVirtResourceName { + continue + } + + framework.ExpectEqual(dev.Topology == nil, true, "Topology is expected to be empty for kubevirt resources") + } + + // Run pod which requires KubeVirtResourceName + desc := podDesc{ + podName: "pod-01", + cntName: "cnt-01", + resourceName: KubeVirtResourceName, + resourceAmount: 1, + cpuRequest: 1000, + } + + tpd := newTestPodData() + tpd.createPodsForTest(f, []podDesc{ + desc, + }) + + expectPodResources(1, cli, []podDesc{desc}) + + ginkgo.By("Restarting Kubelet") + restartKubelet(true) + framework.WaitForAllNodesSchedulable(f.ClientSet, framework.TestContext.NodeSchedulableTimeout) + expectPodResources(1, cli, []podDesc{desc}) + tpd.deletePodsForTest(f) + }) + }) + }) }) }) @@ -786,82 +799,6 @@ func getOnlineCPUs() (cpuset.CPUSet, error) { return cpuset.Parse(strings.TrimSpace(string(onlineCPUList))) } -func configurePodResourcesInKubelet(f *framework.Framework, cleanStateFile bool, reservedSystemCPUs cpuset.CPUSet) (oldCfg *kubeletconfig.KubeletConfiguration) { - // we also need CPUManager with static policy to be able to do meaningful testing - oldCfg, err := getCurrentKubeletConfig() - framework.ExpectNoError(err) - newCfg := oldCfg.DeepCopy() - newCfg.FeatureGates[string(kubefeatures.KubeletPodResourcesGetAllocatable)] = true - - // After graduation of the CPU Manager feature to Beta, the CPU Manager - // "none" policy is ON by default. But when we set the CPU Manager policy to - // "static" in this test and the Kubelet is restarted so that "static" - // policy can take effect, there will always be a conflict with the state - // checkpointed in the disk (i.e., the policy checkpointed in the disk will - // be "none" whereas we are trying to restart Kubelet with "static" - // policy). Therefore, we delete the state file so that we can proceed - // with the tests. - // Only delete the state file at the begin of the tests. - if cleanStateFile { - deleteStateFile() - } - - // Set the CPU Manager policy to static. - newCfg.CPUManagerPolicy = string(cpumanager.PolicyStatic) - - // Set the CPU Manager reconcile period to 1 second. - newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} - - if reservedSystemCPUs.Size() > 0 { - cpus := reservedSystemCPUs.String() - framework.Logf("configurePodResourcesInKubelet: using reservedSystemCPUs=%q", cpus) - newCfg.ReservedSystemCPUs = cpus - } else { - // The Kubelet panics if either kube-reserved or system-reserved is not set - // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that - // kubelet doesn't panic. - if newCfg.KubeReserved == nil { - newCfg.KubeReserved = map[string]string{} - } - - if _, ok := newCfg.KubeReserved["cpu"]; !ok { - newCfg.KubeReserved["cpu"] = "200m" - } - } - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) - - // Wait for the Kubelet to be ready. - gomega.Eventually(func() bool { - nodes, err := e2enode.TotalReady(f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, time.Second).Should(gomega.BeTrue()) - - return oldCfg -} - -func enablePodResourcesFeatureGateInKubelet(f *framework.Framework) (oldCfg *kubeletconfig.KubeletConfiguration) { - oldCfg, err := getCurrentKubeletConfig() - framework.ExpectNoError(err) - newCfg := oldCfg.DeepCopy() - if newCfg.FeatureGates == nil { - newCfg.FeatureGates = make(map[string]bool) - } - - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) - - // Wait for the Kubelet to be ready. - gomega.Eventually(func() bool { - nodes, err := e2enode.TotalReady(f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, time.Second).Should(gomega.BeTrue()) - - return oldCfg -} - func setupKubeVirtDevicePluginOrFail(f *framework.Framework) *v1.Pod { e2enode.WaitForNodeToBeReady(f.ClientSet, framework.TestContext.NodeName, 5*time.Minute) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index ab207325e3d..9329593bce6 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -27,23 +27,22 @@ import ( "sync" "time" - testutils "k8s.io/kubernetes/test/utils" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/types" + "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" + testutils "k8s.io/kubernetes/test/utils" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -194,7 +193,7 @@ func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (in return findNUMANodeWithoutSRIOVDevicesFromSysfs(numaNodes) } -func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy, scope string, configMap *v1.ConfigMap, numaNodes int) string { +func configureTopologyManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, policy, scope string, configMap *v1.ConfigMap, numaNodes int) (*kubeletconfig.KubeletConfiguration, string) { // Configure Topology Manager in Kubelet with policy. newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { @@ -204,8 +203,6 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco newCfg.FeatureGates["CPUManager"] = true newCfg.FeatureGates["TopologyManager"] = true - deleteStateFile() - // Set the Topology Manager policy newCfg.TopologyManagerPolicy = policy @@ -237,17 +234,7 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco // Dump the config -- debug framework.Logf("New kubelet config is %s", *newCfg) - // Update the Kubelet configuration. - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) - - // Wait for the Kubelet to be ready. - gomega.Eventually(func() bool { - nodes, err := e2enode.TotalReady(f.ClientSet) - framework.ExpectNoError(err) - return nodes == 1 - }, time.Minute, time.Second).Should(gomega.BeTrue()) - - return newCfg.ReservedSystemCPUs + return newCfg, newCfg.ReservedSystemCPUs } // getSRIOVDevicePluginPod returns the Device Plugin pod for sriov resources in e2e tests. @@ -900,7 +887,8 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - configureTopologyManagerInKubelet(f, oldCfg, policy, scope, nil, 0) + newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, 0) + updateKubeletConfig(f, newCfg, true) // Run the tests runTopologyManagerPolicySuiteTests(f) } @@ -923,7 +911,8 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - reservedSystemCPUs := configureTopologyManagerInKubelet(f, oldCfg, policy, scope, configMap, numaNodes) + newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, configMap, numaNodes) + updateKubeletConfig(f, newCfg, true) runTopologyManagerNodeAlignmentSuiteTests(f, sd, reservedSystemCPUs, policy, numaNodes, coreCount) } @@ -940,14 +929,15 @@ func runTopologyManagerTests(f *framework.Framework) { policy := topologymanager.PolicySingleNumaNode scope := podScopeTopology - reservedSystemCPUs := configureTopologyManagerInKubelet(f, oldCfg, policy, scope, configMap, numaNodes) + newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, configMap, numaNodes) + updateKubeletConfig(f, newCfg, true) runTMScopeResourceAlignmentTestSuite(f, configMap, reservedSystemCPUs, policy, numaNodes, coreCount) }) ginkgo.AfterEach(func() { // restore kubelet config - setOldKubeletConfig(f, oldCfg) + updateKubeletConfig(f, oldCfg, true) }) } diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index dffeafe3d1d..3bc3e85777f 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -50,10 +50,12 @@ import ( kubeletconfigcodec "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/codec" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/util" + "k8s.io/kubernetes/test/e2e/framework" e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet" e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" imageutils "k8s.io/kubernetes/test/utils/image" "github.com/onsi/ginkgo" @@ -73,6 +75,9 @@ const ( defaultPodResourcesMaxSize = 1024 * 1024 * 16 // 16 Mb kubeletReadOnlyPort = "10255" kubeletHealthCheckURL = "http://127.0.0.1:" + kubeletReadOnlyPort + "/healthz" + // state files + cpuManagerStateFile = "/var/lib/kubelet/cpu_manager_state" + memoryManagerStateFile = "/var/lib/kubelet/memory_manager_state" ) func getNodeSummary() (*stats.Summary, error) { @@ -156,30 +161,67 @@ func getCurrentKubeletConfig() (*kubeletconfig.KubeletConfiguration, error) { // Returns true on success. func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(initialConfig *kubeletconfig.KubeletConfiguration)) { var oldCfg *kubeletconfig.KubeletConfiguration + ginkgo.BeforeEach(func() { - configEnabled, err := isKubeletConfigEnabled(f) - framework.ExpectNoError(err) - framework.ExpectEqual(configEnabled, true, "The Dynamic Kubelet Configuration feature is not enabled.\n"+ - "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n"+ - "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.") - oldCfg, err = getCurrentKubeletConfig() + oldCfg, err := getCurrentKubeletConfig() framework.ExpectNoError(err) + newCfg := oldCfg.DeepCopy() updateFunction(newCfg) if apiequality.Semantic.DeepEqual(*newCfg, *oldCfg) { return } - framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) + updateKubeletConfig(f, newCfg, true) }) + ginkgo.AfterEach(func() { if oldCfg != nil { - err := setKubeletConfiguration(f, oldCfg) - framework.ExpectNoError(err) + // Update the Kubelet configuration. + updateKubeletConfig(f, oldCfg, true) } }) } +func updateKubeletConfig(f *framework.Framework, kubeletConfig *kubeletconfig.KubeletConfiguration, deleteStateFiles bool) { + // Update the Kubelet configuration. + ginkgo.By("Stopping the kubelet") + startKubelet := stopKubelet() + + // wait until the kubelet health check will fail + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, time.Minute, time.Second).Should(gomega.BeFalse()) + + // Delete CPU and memory manager state files to be sure it will not prevent the kubelet restart + if deleteStateFiles { + deleteStateFile(cpuManagerStateFile) + deleteStateFile(memoryManagerStateFile) + } + + framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(kubeletConfig)) + + ginkgo.By("Starting the kubelet") + startKubelet() + + // wait until the kubelet health check will succeed + gomega.Eventually(func() bool { + return kubeletHealthCheck(kubeletHealthCheckURL) + }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) + + // Wait for the Kubelet to be ready. + gomega.Eventually(func() bool { + nodes, err := e2enode.TotalReady(f.ClientSet) + framework.ExpectNoError(err) + return nodes == 1 + }, time.Minute, time.Second).Should(gomega.BeTrue()) +} + +func deleteStateFile(stateFileName string) { + err := exec.Command("/bin/sh", "-c", fmt.Sprintf("rm -f %s", stateFileName)).Run() + framework.ExpectNoError(err, "failed to delete the state file") +} + // Returns true if kubeletConfig is enabled, false otherwise or if we cannot determine if it is. func isKubeletConfigEnabled(f *framework.Framework) (bool, error) { cfgz, err := getCurrentKubeletConfig() @@ -193,64 +235,6 @@ func isKubeletConfigEnabled(f *framework.Framework) (bool, error) { return v, nil } -// Creates or updates the configmap for KubeletConfiguration, waits for the Kubelet to restart -// with the new configuration. Returns an error if the configuration after waiting for restartGap -// doesn't match what you attempted to set, or if the dynamic configuration feature is disabled. -// You should only call this from serial tests. -func setKubeletConfiguration(f *framework.Framework, kubeCfg *kubeletconfig.KubeletConfiguration) error { - const ( - restartGap = 40 * time.Second - pollInterval = 5 * time.Second - ) - - // make sure Dynamic Kubelet Configuration feature is enabled on the Kubelet we are about to reconfigure - if configEnabled, err := isKubeletConfigEnabled(f); err != nil { - return err - } else if !configEnabled { - return fmt.Errorf("The Dynamic Kubelet Configuration feature is not enabled.\n" + - "Pass --feature-gates=DynamicKubeletConfig=true to the Kubelet to enable this feature.\n" + - "For `make test-e2e-node`, you can set `TEST_ARGS='--feature-gates=DynamicKubeletConfig=true'`.") - } - - // create the ConfigMap with the new configuration - cm, err := createConfigMap(f, kubeCfg) - if err != nil { - return err - } - - // create the reference and set Node.Spec.ConfigSource - src := &v1.NodeConfigSource{ - ConfigMap: &v1.ConfigMapNodeConfigSource{ - Namespace: "kube-system", - Name: cm.Name, - KubeletConfigKey: "kubelet", - }, - } - - // set the source, retry a few times in case we are competing with other writers - gomega.Eventually(func() error { - if err := setNodeConfigSource(f, src); err != nil { - return err - } - return nil - }, time.Minute, time.Second).Should(gomega.BeNil()) - - // poll for new config, for a maximum wait of restartGap - gomega.Eventually(func() error { - newKubeCfg, err := getCurrentKubeletConfig() - if err != nil { - return fmt.Errorf("failed trying to get current Kubelet config, will retry, error: %v", err) - } - if !apiequality.Semantic.DeepEqual(*kubeCfg, *newKubeCfg) { - return fmt.Errorf("still waiting for new configuration to take effect, will continue to watch /configz") - } - klog.Infof("new configuration has taken effect") - return nil - }, restartGap, pollInterval).Should(gomega.BeNil()) - - return nil -} - // sets the current node's configSource, this should only be called from Serial tests func setNodeConfigSource(f *framework.Framework, source *v1.NodeConfigSource) error { // since this is a serial test, we just get the node, change the source, and then update it @@ -275,16 +259,6 @@ func setNodeConfigSource(f *framework.Framework, source *v1.NodeConfigSource) er return nil } -// creates a configmap containing kubeCfg in kube-system namespace -func createConfigMap(f *framework.Framework, internalKC *kubeletconfig.KubeletConfiguration) (*v1.ConfigMap, error) { - cmap := newKubeletConfigMap("testcfg", internalKC) - cmap, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Create(context.TODO(), cmap, metav1.CreateOptions{}) - if err != nil { - return nil, err - } - return cmap, nil -} - // constructs a ConfigMap, populating one of its keys with the KubeletConfiguration. Always uses GenerateName to generate a suffix. func newKubeletConfigMap(name string, internalKC *kubeletconfig.KubeletConfiguration) *v1.ConfigMap { data, err := kubeletconfigcodec.EncodeKubeletConfig(internalKC, kubeletconfigv1beta1.SchemeGroupVersion) @@ -447,8 +421,9 @@ func stopKubelet() func() { framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout)) return func() { - stdout, err := exec.Command("sudo", "systemctl", "start", kubeletServiceName).CombinedOutput() - framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout)) + // we should restart service, otherwise the transient service start will fail + stdout, err := exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() + framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) } }