From 3b4122bd0386a5e13d351d9ef191bdd947261d56 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 4 Feb 2020 17:20:22 +0100 Subject: [PATCH] e2e: topomgr: get and use topology hints from conf TO properly implement some e2e tests, we need to know some basic topology facts about the system running the tests. The bare minimum we need to know is how many PCI SRIOV devices are attached to which NUMA node. This way we know which core we can reserve for kube services, and which NUMA socket we can take to test full socket reservation. To let the tests know the PCI device topology, we use annotations in the SRIOV device plugin ConfigMap we need anyway. The format is ```yaml metadata: annotations: pcidevice_node0: "2" pcidevice_node1: "0" ``` with one annotation per NUMA node in the system. Signed-off-by: Francesco Romani --- test/e2e_node/numa_alignment.go | 13 +++++ test/e2e_node/topology_manager_test.go | 80 +++++++++++++++++++------- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/test/e2e_node/numa_alignment.go b/test/e2e_node/numa_alignment.go index d44a58036d3..33edd964e8c 100644 --- a/test/e2e_node/numa_alignment.go +++ b/test/e2e_node/numa_alignment.go @@ -18,6 +18,7 @@ package e2enode import ( "fmt" + "io/ioutil" "sort" "strconv" "strings" @@ -75,6 +76,18 @@ func (R *numaPodResources) String() string { return b.String() } +func getCPUsPerNUMANode(nodeNum int) ([]int, error) { + nodeCPUList, err := ioutil.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeNum)) + if err != nil { + return nil, err + } + cpus, err := cpuset.Parse(strings.TrimSpace(string(nodeCPUList))) + if err != nil { + return nil, err + } + return cpus.ToSlice(), nil +} + func getCPUToNUMANodeMapFromEnv(f *framework.Framework, pod *v1.Pod, environ map[string]string, numaNodes int) (map[int]int, error) { var cpuIDs []int cpuListAllowedEnvVar := "CPULIST_ALLOWED" diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index a59fb3ce372..bd6a8b314cb 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -132,7 +132,27 @@ func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnA } } -func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string) { +func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (int, bool) { + for nodeNum := 0; nodeNum < numaNodes; nodeNum++ { + value, ok := configMap.Annotations[fmt.Sprintf("pcidevice_node%d", nodeNum)] + if !ok { + framework.Logf("missing pcidevice annotation for NUMA node %d", nodeNum) + return -1, false + } + v, err := strconv.Atoi(value) + if err != nil { + framework.Failf("error getting the PCI device count on NUMA node %d: %v", nodeNum, err) + } + if v == 0 { + framework.Logf("NUMA node %d has no SRIOV devices attached", nodeNum) + return nodeNum, true + } + framework.Logf("NUMA node %d has %d SRIOV devices attached", nodeNum, v) + } + return -1, false +} + +func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string, configMap *v1.ConfigMap, numaNodes int) string { // Configure Topology Manager in Kubelet with policy. newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { @@ -153,18 +173,25 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco // Set the CPU Manager reconcile period to 1 second. newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} - // The Kubelet panics if either kube-reserved or system-reserved is not set - // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that - // kubelet doesn't panic. - if newCfg.KubeReserved == nil { - newCfg.KubeReserved = map[string]string{} - } + if nodeNum, ok := findNUMANodeWithoutSRIOVDevices(configMap, numaNodes); ok { + cpus, err := getCPUsPerNUMANode(nodeNum) + framework.Logf("NUMA Node %d doesn't seem to have attached SRIOV devices and has cpus=%v", nodeNum, cpus) + framework.ExpectNoError(err) + newCfg.ReservedSystemCPUs = fmt.Sprintf("%d", cpus[len(cpus)-1]) + } else { + // The Kubelet panics if either kube-reserved or system-reserved is not set + // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that + // kubelet doesn't panic. + if newCfg.KubeReserved == nil { + newCfg.KubeReserved = map[string]string{} + } - if _, ok := newCfg.KubeReserved["cpu"]; !ok { - newCfg.KubeReserved["cpu"] = "200m" + if _, ok := newCfg.KubeReserved["cpu"]; !ok { + newCfg.KubeReserved["cpu"] = "200m" + } } // Dump the config -- debug - framework.Logf("New kublet config is %s", *newCfg) + framework.Logf("New kubelet config is %s", *newCfg) // Update the Kubelet configuration. framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) @@ -175,6 +202,8 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco framework.ExpectNoError(err) return nodes == 1 }, time.Minute, time.Second).Should(gomega.BeTrue()) + + return newCfg.ReservedSystemCPUs } // getSRIOVDevicePluginPod returns the Device Plugin pod for sriov resources in e2e tests. @@ -548,14 +577,14 @@ func runTopologyManagerNegativeTest(f *framework.Framework, numaNodes, numPods i deletePods(f, []string{pod.Name}) } -func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes, coreCount int) { +func getSRIOVDevicePluginConfigMap(cmFile string) *v1.ConfigMap { cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML) var err error // the SRIOVDP configuration is hw-dependent, so we allow per-test-host customization. - framework.Logf("host-local SRIOV Device Plugin Config Map %q", framework.TestContext.SriovdpConfigMapFile) - if framework.TestContext.SriovdpConfigMapFile != "" { - cmData, err = ioutil.ReadFile(framework.TestContext.SriovdpConfigMapFile) + framework.Logf("host-local SRIOV Device Plugin Config Map %q", cmFile) + if cmFile != "" { + cmData, err = ioutil.ReadFile(cmFile) if err != nil { framework.Failf("unable to load the SRIOV Device Plugin ConfigMap: %v", err) } @@ -563,7 +592,12 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes framework.Logf("Using built-in SRIOV Device Plugin Config Map") } - configMap := readConfigMapV1OrDie(cmData) + return readConfigMapV1OrDie(cmData) +} + +func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs string, numaNodes, coreCount int) { + var err error + ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name)) if _, err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(context.TODO(), configMap, metav1.CreateOptions{}); err != nil { framework.Failf("unable to create test configMap %s: %v", configMap.Name, err) @@ -607,10 +641,10 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes // two guaranteed cores, one device runTopologyManagerPositiveTest(f, numaNodes, 1, "2000m", sriovResourceName, "1") - // TODO: test taking an entire NUMA node. - // to do a meaningful test, we need to know: - // - where are the reserved CPUs placed (which NUMA node) - // - where are the SRIOV device attacched to (which NUMA node(s)) + // take an entire socket; but for that, we need to have reserved CPUs on another one. + if reservedSystemCPUs != "" { + runTopologyManagerPositiveTest(f, numaNodes, 1, fmt.Sprintf("%dm", threadsPerCore*coreCount*1000), sriovResourceName, "1") + } if sriovResourceAmount > 1 { // no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function @@ -646,7 +680,7 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - configureTopologyManagerInKubelet(f, oldCfg, policy) + configureTopologyManagerInKubelet(f, oldCfg, policy, nil, 0) // Run the tests runTopologyManagerPolicySuiteTests(f) } @@ -674,6 +708,8 @@ func runTopologyManagerTests(f *framework.Framework) { e2eskipper.Skipf("this test is meant to run on a system with at least one SRIOV device") } + configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile) + oldCfg, err = getCurrentKubeletConfig() framework.ExpectNoError(err) @@ -683,9 +719,9 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - configureTopologyManagerInKubelet(f, oldCfg, policy) + reservedSystemCPUs := configureTopologyManagerInKubelet(f, oldCfg, policy, configMap, numaNodes) - runTopologyManagerNodeAlignmentSuiteTests(f, numaNodes, coreCount) + runTopologyManagerNodeAlignmentSuiteTests(f, configMap, reservedSystemCPUs, numaNodes, coreCount) // restore kubelet config setOldKubeletConfig(f, oldCfg)