diff --git a/test/e2e_node/numa_alignment.go b/test/e2e_node/numa_alignment.go index 11a55ed6186..a1c17454d36 100644 --- a/test/e2e_node/numa_alignment.go +++ b/test/e2e_node/numa_alignment.go @@ -19,6 +19,8 @@ package e2enode import ( "fmt" "io/ioutil" + "os" + "path/filepath" "sort" "strconv" "strings" @@ -153,12 +155,7 @@ func getPCIDeviceToNumaNodeMapFromEnv(f *framework.Framework, pod *v1.Pod, envir for _, pciDev := range pciDevs { pciDevNUMANode := f.ExecCommandInContainer(pod.Name, pod.Spec.Containers[0].Name, "/bin/cat", fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", pciDev)) - - nodeNum, err := strconv.Atoi(pciDevNUMANode) - if err != nil { - return nil, err - } - NUMAPerDev[pciDev] = nodeNum + NUMAPerDev[pciDev] = numaNodeFromSysFsEntry(pciDevNUMANode) } } if len(NUMAPerDev) == 0 { @@ -209,3 +206,56 @@ func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, logs string, numaNo } return numaRes, nil } + +type pciDeviceInfo struct { + Address string + NUMANode int + IsPhysFn bool + IsVFn bool +} + +func getPCIDeviceInfo(sysPCIDir string) ([]pciDeviceInfo, error) { + var pciDevs []pciDeviceInfo + + entries, err := ioutil.ReadDir(sysPCIDir) + if err != nil { + return nil, err + } + + for _, entry := range entries { + isPhysFn := false + isVFn := false + if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "sriov_numvfs")); err == nil { + isPhysFn = true + } else if !os.IsNotExist(err) { + // unexpected error. Bail out + return nil, err + } + if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "physfn")); err == nil { + isVFn = true + } else if !os.IsNotExist(err) { + // unexpected error. Bail out + return nil, err + } + + content, err := ioutil.ReadFile(filepath.Join(sysPCIDir, entry.Name(), "numa_node")) + if err != nil { + return nil, err + } + + pciDevs = append(pciDevs, pciDeviceInfo{ + Address: entry.Name(), + NUMANode: numaNodeFromSysFsEntry(string(content)), + IsPhysFn: isPhysFn, + IsVFn: isVFn, + }) + } + + return pciDevs, nil +} + +func numaNodeFromSysFsEntry(content string) int { + nodeNum, err := strconv.Atoi(strings.TrimSpace(content)) + framework.ExpectNoError(err, "error detecting the device numa_node from sysfs: %v", err) + return nodeNum +} diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index ce025d03c12..6ec77a58cfb 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -134,7 +134,7 @@ func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnA } } -func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (int, bool) { +func findNUMANodeWithoutSRIOVDevicesFromConfigMap(configMap *v1.ConfigMap, numaNodes int) (int, bool) { for nodeNum := 0; nodeNum < numaNodes; nodeNum++ { value, ok := configMap.Annotations[fmt.Sprintf("pcidevice_node%d", nodeNum)] if !ok { @@ -154,6 +154,46 @@ func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (in return -1, false } +func findNUMANodeWithoutSRIOVDevicesFromSysfs(numaNodes int) (int, bool) { + pciDevs, err := getPCIDeviceInfo("/sys/bus/pci/devices") + if err != nil { + framework.Failf("error detecting the PCI device NUMA node: %v", err) + } + + pciPerNuma := make(map[int]int) + for _, pciDev := range pciDevs { + if pciDev.IsVFn { + pciPerNuma[pciDev.NUMANode]++ + } + } + + if len(pciPerNuma) == 0 { + // if we got this far we already passed a rough check that SRIOV devices + // are available in the box, so something is seriously wrong + framework.Failf("failed to find any VF devices from %v", pciDevs) + } + + for nodeNum := 0; nodeNum < numaNodes; nodeNum++ { + v := pciPerNuma[nodeNum] + if v == 0 { + framework.Logf("NUMA node %d has no SRIOV devices attached", nodeNum) + return nodeNum, true + } + framework.Logf("NUMA node %d has %d SRIOV devices attached", nodeNum, v) + } + return -1, false +} + +func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (int, bool) { + // if someone annotated the configMap, let's use this information + if nodeNum, found := findNUMANodeWithoutSRIOVDevicesFromConfigMap(configMap, numaNodes); found { + return nodeNum, found + } + // no annotations, try to autodetect + // NOTE: this assumes all the VFs in the box can be used for the tests. + return findNUMANodeWithoutSRIOVDevicesFromSysfs(numaNodes) +} + func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string, configMap *v1.ConfigMap, numaNodes int) string { // Configure Topology Manager in Kubelet with policy. newCfg := oldCfg.DeepCopy()