mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 01:40:13 +00:00
e2e: TM: add option to fail instead of skip
The Topology Manager e2e tests wants to run on real multi-NUMA system and want to consume real devices supported by device plugins; SRIOV devices happen to be the most commonly available of such devices. CI machines aren't multi NUMA nor expose SRIOV devices, so the biggest portion of the tests will just skip, and we need to keep it like this until we figure out how to enable these features. However, some organizations can and want to run the testsuite on bare metal; in this case, the current test will skip (not fail) with misconfigured boxes, and this reports a misleading result. It will be much better to fail if the test preconditions aren't met. To satisfy both needs, we add an option, controlled by an environment variable, to fail (not skip) if the machine on which the test run doesn't meet the expectations (multi-NUMA, 4+ cores per NUMA cell, expose SRIOV VFs). We keep the old behaviour as default to keep being CI friendly. Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
parent
dd2d12f6dc
commit
54c7d8fbb1
@ -187,6 +187,10 @@ type TestContextType struct {
|
|||||||
|
|
||||||
// SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint.
|
// SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint.
|
||||||
SnapshotControllerHTTPPort int
|
SnapshotControllerHTTPPort int
|
||||||
|
|
||||||
|
// RequireDevices makes mandatory on the environment on which tests are run 1+ devices exposed through device plugins.
|
||||||
|
// With this enabled The e2e tests requiring devices for their operation can assume that if devices aren't reported, the test can fail
|
||||||
|
RequireDevices bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NodeKillerConfig describes configuration of NodeKiller -- a utility to
|
// NodeKillerConfig describes configuration of NodeKiller -- a utility to
|
||||||
|
@ -91,6 +91,7 @@ func registerNodeFlags(flags *flag.FlagSet) {
|
|||||||
flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.")
|
flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.")
|
||||||
flag.StringVar(&framework.TestContext.ClusterDNSDomain, "dns-domain", "", "The DNS Domain of the cluster.")
|
flag.StringVar(&framework.TestContext.ClusterDNSDomain, "dns-domain", "", "The DNS Domain of the cluster.")
|
||||||
flag.Var(cliflag.NewMapStringString(&framework.TestContext.RuntimeConfig), "runtime-config", "The runtime configuration used on node e2e tests.")
|
flag.Var(cliflag.NewMapStringString(&framework.TestContext.RuntimeConfig), "runtime-config", "The runtime configuration used on node e2e tests.")
|
||||||
|
flags.BoolVar(&framework.TestContext.RequireDevices, "require-devices", false, "If true, require device plugins to be installed in the running environment.")
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@ -490,9 +490,8 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
|
|||||||
if cpuAlloc < minCoreCount {
|
if cpuAlloc < minCoreCount {
|
||||||
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
|
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
|
||||||
}
|
}
|
||||||
if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount == 0 {
|
|
||||||
e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
|
requireSRIOVDevices()
|
||||||
}
|
|
||||||
|
|
||||||
onlineCPUs, err := getOnlineCPUs()
|
onlineCPUs, err := getOnlineCPUs()
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
@ -532,9 +531,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
|
|||||||
ginkgo.It("should return the expected responses with cpumanager none policy", func() {
|
ginkgo.It("should return the expected responses with cpumanager none policy", func() {
|
||||||
// current default is "none" policy - no need to restart the kubelet
|
// current default is "none" policy - no need to restart the kubelet
|
||||||
|
|
||||||
if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount == 0 {
|
requireSRIOVDevices()
|
||||||
e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
|
|
||||||
}
|
|
||||||
|
|
||||||
oldCfg := enablePodResourcesFeatureGateInKubelet(f)
|
oldCfg := enablePodResourcesFeatureGateInKubelet(f)
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -575,9 +572,8 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
|
|||||||
if cpuAlloc < minCoreCount {
|
if cpuAlloc < minCoreCount {
|
||||||
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
|
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU allocatable < %d", minCoreCount)
|
||||||
}
|
}
|
||||||
if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount > 0 {
|
|
||||||
e2eskipper.Skipf("this test is meant to run on a system with no configured VF from SRIOV device")
|
requireLackOfSRIOVDevices()
|
||||||
}
|
|
||||||
|
|
||||||
onlineCPUs, err := getOnlineCPUs()
|
onlineCPUs, err := getOnlineCPUs()
|
||||||
framework.ExpectNoError(err)
|
framework.ExpectNoError(err)
|
||||||
@ -606,9 +602,7 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
|
|||||||
ginkgo.It("should return the expected responses with cpumanager none policy", func() {
|
ginkgo.It("should return the expected responses with cpumanager none policy", func() {
|
||||||
// current default is "none" policy - no need to restart the kubelet
|
// current default is "none" policy - no need to restart the kubelet
|
||||||
|
|
||||||
if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount > 0 {
|
requireLackOfSRIOVDevices()
|
||||||
e2eskipper.Skipf("this test is meant to run on a system with no configured VF from SRIOV device")
|
|
||||||
}
|
|
||||||
|
|
||||||
oldCfg := enablePodResourcesFeatureGateInKubelet(f)
|
oldCfg := enablePodResourcesFeatureGateInKubelet(f)
|
||||||
defer func() {
|
defer func() {
|
||||||
@ -651,6 +645,12 @@ var _ = SIGDescribe("POD Resources [Serial] [Feature:PodResources][NodeFeature:P
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
func requireLackOfSRIOVDevices() {
|
||||||
|
if sriovdevCount, err := countSRIOVDevices(); err != nil || sriovdevCount > 0 {
|
||||||
|
e2eskipper.Skipf("this test is meant to run on a system with no configured VF from SRIOV device")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func getOnlineCPUs() (cpuset.CPUSet, error) {
|
func getOnlineCPUs() (cpuset.CPUSet, error) {
|
||||||
onlineCPUList, err := ioutil.ReadFile("/sys/devices/system/cpu/online")
|
onlineCPUList, err := ioutil.ReadFile("/sys/devices/system/cpu/online")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -90,20 +90,6 @@ func detectCoresPerSocket() int {
|
|||||||
return coreCount
|
return coreCount
|
||||||
}
|
}
|
||||||
|
|
||||||
func countSRIOVDevices() (int, error) {
|
|
||||||
outData, err := exec.Command("/bin/sh", "-c", "ls /sys/bus/pci/devices/*/physfn | wc -w").Output()
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
return strconv.Atoi(strings.TrimSpace(string(outData)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func detectSRIOVDevices() int {
|
|
||||||
devCount, err := countSRIOVDevices()
|
|
||||||
framework.ExpectNoError(err)
|
|
||||||
return devCount
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Container) {
|
func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Container) {
|
||||||
for _, ctnAttr := range ctnAttributes {
|
for _, ctnAttr := range ctnAttributes {
|
||||||
ctn := v1.Container{
|
ctn := v1.Container{
|
||||||
@ -898,21 +884,7 @@ func runTopologyManagerTests(f *framework.Framework) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.It("run Topology Manager node alignment test suite", func() {
|
ginkgo.It("run Topology Manager node alignment test suite", func() {
|
||||||
// this is a very rough check. We just want to rule out system that does NOT have
|
numaNodes, coreCount := hostPrecheck()
|
||||||
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
|
|
||||||
sriovdevCount := detectSRIOVDevices()
|
|
||||||
numaNodes := detectNUMANodes()
|
|
||||||
coreCount := detectCoresPerSocket()
|
|
||||||
|
|
||||||
if numaNodes < minNumaNodes {
|
|
||||||
e2eskipper.Skipf("this test is meant to run on a multi-node NUMA system")
|
|
||||||
}
|
|
||||||
if coreCount < minCoreCount {
|
|
||||||
e2eskipper.Skipf("this test is meant to run on a system with at least 4 cores per socket")
|
|
||||||
}
|
|
||||||
if sriovdevCount == 0 {
|
|
||||||
e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
|
|
||||||
}
|
|
||||||
|
|
||||||
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
|
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
|
||||||
|
|
||||||
@ -935,19 +907,7 @@ func runTopologyManagerTests(f *framework.Framework) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.It("run the Topology Manager pod scope alignment test suite", func() {
|
ginkgo.It("run the Topology Manager pod scope alignment test suite", func() {
|
||||||
sriovdevCount := detectSRIOVDevices()
|
numaNodes, coreCount := hostPrecheck()
|
||||||
numaNodes := detectNUMANodes()
|
|
||||||
coreCount := detectCoresPerSocket()
|
|
||||||
|
|
||||||
if numaNodes < minNumaNodes {
|
|
||||||
e2eskipper.Skipf("this test is intended to be run on a multi-node NUMA system")
|
|
||||||
}
|
|
||||||
if coreCount < minCoreCount {
|
|
||||||
e2eskipper.Skipf("this test is intended to be run on a system with at least %d cores per socket", minCoreCount)
|
|
||||||
}
|
|
||||||
if sriovdevCount == 0 {
|
|
||||||
e2eskipper.Skipf("this test is intended to be run on a system with at least one SR-IOV VF enabled")
|
|
||||||
}
|
|
||||||
|
|
||||||
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
|
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
|
||||||
|
|
||||||
@ -968,6 +928,25 @@ func runTopologyManagerTests(f *framework.Framework) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func hostPrecheck() (int, int) {
|
||||||
|
// this is a very rough check. We just want to rule out system that does NOT have
|
||||||
|
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
|
||||||
|
|
||||||
|
numaNodes := detectNUMANodes()
|
||||||
|
if numaNodes < minNumaNodes {
|
||||||
|
e2eskipper.Skipf("this test is intended to be run on a multi-node NUMA system")
|
||||||
|
}
|
||||||
|
|
||||||
|
coreCount := detectCoresPerSocket()
|
||||||
|
if coreCount < minCoreCount {
|
||||||
|
e2eskipper.Skipf("this test is intended to be run on a system with at least %d cores per socket", minCoreCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
requireSRIOVDevices()
|
||||||
|
|
||||||
|
return numaNodes, coreCount
|
||||||
|
}
|
||||||
|
|
||||||
// Serial because the test updates kubelet configuration.
|
// Serial because the test updates kubelet configuration.
|
||||||
var _ = SIGDescribe("Topology Manager [Serial] [Feature:TopologyManager][NodeFeature:TopologyManager]", func() {
|
var _ = SIGDescribe("Topology Manager [Serial] [Feature:TopologyManager][NodeFeature:TopologyManager]", func() {
|
||||||
f := framework.NewDefaultFramework("topology-manager-test")
|
f := framework.NewDefaultFramework("topology-manager-test")
|
||||||
|
@ -16,6 +16,11 @@ limitations under the License.
|
|||||||
|
|
||||||
package e2enode
|
package e2enode
|
||||||
|
|
||||||
|
import (
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
||||||
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// SRIOVDevicePluginCMYAML is the path of the config map to configure the sriov device plugin.
|
// SRIOVDevicePluginCMYAML is the path of the config map to configure the sriov device plugin.
|
||||||
SRIOVDevicePluginCMYAML = "test/e2e_node/testing-manifests/sriovdp-cm.yaml"
|
SRIOVDevicePluginCMYAML = "test/e2e_node/testing-manifests/sriovdp-cm.yaml"
|
||||||
@ -26,3 +31,19 @@ const (
|
|||||||
// SRIOVDevicePluginName is the name of the device plugin pod
|
// SRIOVDevicePluginName is the name of the device plugin pod
|
||||||
SRIOVDevicePluginName = "sriov-device-plugin"
|
SRIOVDevicePluginName = "sriov-device-plugin"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func requireSRIOVDevices() {
|
||||||
|
sriovdevCount, err := countSRIOVDevices()
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
|
||||||
|
if sriovdevCount > 0 {
|
||||||
|
return // all good
|
||||||
|
}
|
||||||
|
|
||||||
|
msg := "this test is meant to run on a system with at least one configured VF from SRIOV device"
|
||||||
|
if framework.TestContext.RequireDevices {
|
||||||
|
framework.Failf(msg)
|
||||||
|
} else {
|
||||||
|
e2eskipper.Skipf(msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
36
test/e2e_node/util_sriov_linux.go
Normal file
36
test/e2e_node/util_sriov_linux.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
//go:build linux
|
||||||
|
// +build linux
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2021 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package e2enode
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// countSRIOVDevices provides a rough estimate of SRIOV Virtual Functions available on the system.
|
||||||
|
// This is a rough check we use to rule out unsuitable systems, not to detect suitable systems.
|
||||||
|
func countSRIOVDevices() (int, error) {
|
||||||
|
outData, err := exec.Command("/bin/sh", "-c", "ls /sys/bus/pci/devices/*/physfn | wc -w").Output()
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
return strconv.Atoi(strings.TrimSpace(string(outData)))
|
||||||
|
}
|
24
test/e2e_node/util_sriov_unsupported.go
Normal file
24
test/e2e_node/util_sriov_unsupported.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
//go:build !linux
|
||||||
|
// +build !linux
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright 2021 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package e2enode
|
||||||
|
|
||||||
|
func countSRIOVDevices() (int, error) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user