Merge pull request #87645 from fromanirh/topomgr-e2e-tests

e2e-topology-manager: single-NUMA-node test
This commit is contained in:
Kubernetes Prow Robot 2020-02-14 08:06:19 -08:00 committed by GitHub
commit 4a45ae3236
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 796 additions and 26 deletions

View File

@ -47,6 +47,7 @@ BINDATA_OUTPUT="test/e2e/generated/bindata.go"
go-bindata -nometadata -o "${BINDATA_OUTPUT}.tmp" -pkg generated \
-ignore .jpg -ignore .png -ignore .md -ignore 'BUILD(\.bazel)?' \
"test/e2e/testing-manifests/..." \
"test/e2e_node/testing-manifests/..." \
"test/images/..." \
"test/fixtures/..."

View File

@ -167,6 +167,9 @@ type TestContextType struct {
// ProgressReportURL is the URL which progress updates will be posted to as tests complete. If empty, no updates are sent.
ProgressReportURL string
// SriovdpConfigMapFile is the path to the ConfigMap to configure the SRIOV device plugin on this host.
SriovdpConfigMapFile string
}
// NodeKillerConfig describes configuration of NodeKiller -- a utility to

View File

@ -1882,7 +1882,6 @@ func DumpDebugInfo(c clientset.Interface, ns string) {
// DsFromManifest reads a .json/yaml file and returns the daemonset in it.
func DsFromManifest(url string) (*appsv1.DaemonSet, error) {
var ds appsv1.DaemonSet
Logf("Parsing ds from %v", url)
var response *http.Response
@ -1908,7 +1907,12 @@ func DsFromManifest(url string) (*appsv1.DaemonSet, error) {
if err != nil {
return nil, fmt.Errorf("Failed to read html response body: %v", err)
}
return DsFromData(data)
}
// DsFromData reads a byte slice and returns the daemonset in it.
func DsFromData(data []byte) (*appsv1.DaemonSet, error) {
var ds appsv1.DaemonSet
dataJSON, err := utilyaml.ToJSON(data)
if err != nil {
return nil, fmt.Errorf("Failed to parse data to json: %v", err)

View File

@ -24,6 +24,7 @@ go_bindata(
name = "bindata",
srcs = [
"//test/e2e/testing-manifests:all-srcs",
"//test/e2e_node/testing-manifests:all-srcs",
"//test/fixtures:all-srcs",
"//test/images:all-srcs",
],

View File

@ -15,8 +15,10 @@ go_library(
"framework.go",
"image_list.go",
"node_problem_detector_linux.go",
"numa_alignment.go",
"resource_collector.go",
"util.go",
"util_sriov.go",
"util_xfs_linux.go",
"util_xfs_unsupported.go",
],
@ -30,6 +32,7 @@ go_library(
"//pkg/kubelet/apis/podresources/v1alpha1:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/cm/cpuset:go_default_library",
"//pkg/kubelet/kubeletconfig/util/codec:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/remote:go_default_library",
@ -49,6 +52,7 @@ go_library(
"//test/e2e/framework/gpu:go_default_library",
"//test/e2e/framework/metrics:go_default_library",
"//test/e2e/framework/node:go_default_library",
"//test/e2e/framework/testfiles:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/github.com/blang/semver:go_default_library",
"//vendor/github.com/coreos/go-systemd/util:go_default_library",
@ -266,6 +270,7 @@ filegroup(
"//test/e2e_node/runner/remote:all-srcs",
"//test/e2e_node/services:all-srcs",
"//test/e2e_node/system:all-srcs",
"//test/e2e_node/testing-manifests:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],

View File

@ -80,6 +80,7 @@ func registerNodeFlags(flags *flag.FlagSet) {
flags.StringVar(&framework.TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
flags.StringVar(&framework.TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
flags.Var(cliflag.NewMapStringString(&framework.TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.")
}
func init() {

View File

@ -31,6 +31,7 @@ import (
commontest "k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/gpu"
"k8s.io/kubernetes/test/e2e/framework/testfiles"
imageutils "k8s.io/kubernetes/test/utils/image"
)
@ -68,6 +69,7 @@ func updateImageWhiteList() {
framework.ImageWhiteList = NodeImageWhiteList.Union(commontest.CommonImageWhiteList)
// Images from extra envs
framework.ImageWhiteList.Insert(getNodeProblemDetectorImage())
framework.ImageWhiteList.Insert(getSRIOVDevicePluginImage())
}
func getNodeProblemDetectorImage() string {
@ -184,3 +186,26 @@ func getGPUDevicePluginImage() string {
}
return ds.Spec.Template.Spec.Containers[0].Image
}
// getSRIOVDevicePluginImage returns the image of SRIOV device plugin.
func getSRIOVDevicePluginImage() string {
data, err := testfiles.Read(SRIOVDevicePluginDSYAML)
if err != nil {
klog.Errorf("Failed to read the device plugin manifest: %v", err)
return ""
}
ds, err := framework.DsFromData(data)
if err != nil {
klog.Errorf("Failed to parse the device plugin image: %v", err)
return ""
}
if ds == nil {
klog.Errorf("Failed to parse the device plugin image: the extracted DaemonSet is nil")
return ""
}
if len(ds.Spec.Template.Spec.Containers) < 1 {
klog.Errorf("Failed to parse the device plugin image: cannot extract the container from YAML")
return ""
}
return ds.Spec.Template.Spec.Containers[0].Image
}

View File

@ -0,0 +1,212 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
import (
"fmt"
"io/ioutil"
"sort"
"strconv"
"strings"
v1 "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/kubernetes/test/e2e/framework"
)
type numaPodResources struct {
CPUToNUMANode map[int]int
PCIDevsToNUMANode map[string]int
}
func (R *numaPodResources) CheckAlignment() bool {
nodeNum := -1 // not set
for _, cpuNode := range R.CPUToNUMANode {
if nodeNum == -1 {
nodeNum = cpuNode
} else if nodeNum != cpuNode {
return false
}
}
for _, devNode := range R.PCIDevsToNUMANode {
// TODO: explain -1
if devNode != -1 && nodeNum != devNode {
return false
}
}
return true
}
func (R *numaPodResources) String() string {
var b strings.Builder
// To store the keys in slice in sorted order
var cpuKeys []int
for ck := range R.CPUToNUMANode {
cpuKeys = append(cpuKeys, ck)
}
sort.Ints(cpuKeys)
for _, k := range cpuKeys {
nodeNum := R.CPUToNUMANode[k]
b.WriteString(fmt.Sprintf("CPU cpu#%03d=%02d\n", k, nodeNum))
}
var pciKeys []string
for pk := range R.PCIDevsToNUMANode {
pciKeys = append(pciKeys, pk)
}
sort.Strings(pciKeys)
for _, k := range pciKeys {
nodeNum := R.PCIDevsToNUMANode[k]
b.WriteString(fmt.Sprintf("PCI %s=%02d\n", k, nodeNum))
}
return b.String()
}
func getCPUsPerNUMANode(nodeNum int) ([]int, error) {
nodeCPUList, err := ioutil.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeNum))
if err != nil {
return nil, err
}
cpus, err := cpuset.Parse(strings.TrimSpace(string(nodeCPUList)))
if err != nil {
return nil, err
}
return cpus.ToSlice(), nil
}
func getCPUToNUMANodeMapFromEnv(f *framework.Framework, pod *v1.Pod, environ map[string]string, numaNodes int) (map[int]int, error) {
var cpuIDs []int
cpuListAllowedEnvVar := "CPULIST_ALLOWED"
for name, value := range environ {
if name == cpuListAllowedEnvVar {
cpus, err := cpuset.Parse(value)
if err != nil {
return nil, err
}
cpuIDs = cpus.ToSlice()
}
}
if len(cpuIDs) == 0 {
return nil, fmt.Errorf("variable %q found in environ", cpuListAllowedEnvVar)
}
cpusPerNUMA := make(map[int][]int)
for numaNode := 0; numaNode < numaNodes; numaNode++ {
nodeCPUList := f.ExecCommandInContainer(pod.Name, pod.Spec.Containers[0].Name,
"/bin/cat", fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", numaNode))
cpus, err := cpuset.Parse(nodeCPUList)
if err != nil {
return nil, err
}
cpusPerNUMA[numaNode] = cpus.ToSlice()
}
// CPU IDs -> NUMA Node ID
CPUToNUMANode := make(map[int]int)
for nodeNum, cpus := range cpusPerNUMA {
for _, cpu := range cpus {
CPUToNUMANode[cpu] = nodeNum
}
}
// filter out only the allowed CPUs
CPUMap := make(map[int]int)
for _, cpuID := range cpuIDs {
_, ok := CPUToNUMANode[cpuID]
if !ok {
return nil, fmt.Errorf("CPU %d not found on NUMA map: %v", cpuID, CPUToNUMANode)
}
CPUMap[cpuID] = CPUToNUMANode[cpuID]
}
return CPUMap, nil
}
func getPCIDeviceToNumaNodeMapFromEnv(f *framework.Framework, pod *v1.Pod, environ map[string]string) (map[string]int, error) {
pciDevPrefix := "PCIDEVICE_"
// at this point we don't care which plugin selected the device,
// we only need to know which devices were assigned to the POD.
// Hence, do prefix search for the variable and fetch the device(s).
NUMAPerDev := make(map[string]int)
for name, value := range environ {
if !strings.HasPrefix(name, pciDevPrefix) {
continue
}
// a single plugin can allocate more than a single device
pciDevs := strings.Split(value, ",")
for _, pciDev := range pciDevs {
pciDevNUMANode := f.ExecCommandInContainer(pod.Name, pod.Spec.Containers[0].Name,
"/bin/cat", fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", pciDev))
nodeNum, err := strconv.Atoi(pciDevNUMANode)
if err != nil {
return nil, err
}
NUMAPerDev[pciDev] = nodeNum
}
}
if len(NUMAPerDev) == 0 {
return nil, fmt.Errorf("no PCI devices found in environ")
}
return NUMAPerDev, nil
}
func makeEnvMap(logs string) (map[string]string, error) {
podEnv := strings.Split(logs, "\n")
envMap := make(map[string]string)
for _, envVar := range podEnv {
if len(envVar) == 0 {
continue
}
pair := strings.SplitN(envVar, "=", 2)
if len(pair) != 2 {
return nil, fmt.Errorf("unable to split %q", envVar)
}
envMap[pair[0]] = pair[1]
}
return envMap, nil
}
func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, logs string, numaNodes int) (numaPodResources, error) {
podEnv, err := makeEnvMap(logs)
if err != nil {
return numaPodResources{}, err
}
CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, podEnv, numaNodes)
if err != nil {
return numaPodResources{}, err
}
PCIDevsToNUMANode, err := getPCIDeviceToNumaNodeMapFromEnv(f, pod, podEnv)
if err != nil {
return numaPodResources{}, err
}
numaRes := numaPodResources{
CPUToNUMANode: CPUToNUMANode,
PCIDevsToNUMANode: PCIDevsToNUMANode,
}
aligned := numaRes.CheckAlignment()
if !aligned {
return numaRes, fmt.Errorf("NUMA resources not aligned")
}
return numaRes, nil
}

View File

@ -0,0 +1,14 @@
package(default_visibility = ["//visibility:public"])
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,36 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: sriovdp-config
namespace: kube-system
data:
config.json: |
{
"resourceList": [{
"resourceName": "intel_sriov_netdevice",
"selectors": {
"vendors": ["8086"],
"devices": ["154c", "10ed"],
"drivers": ["i40evf", "ixgbevf"]
}
},
{
"resourceName": "intel_sriov_dpdk",
"selectors": {
"vendors": ["8086"],
"devices": ["154c", "10ed"],
"drivers": ["vfio-pci"],
"pfNames": ["enp0s0f0","enp2s2f1"]
}
},
{
"resourceName": "mlnx_sriov_rdma",
"isRdma": true,
"selectors": {
"vendors": ["15b3"],
"devices": ["1018"],
"drivers": ["mlx5_ib"]
}
}
]
}

View File

@ -0,0 +1,58 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-sriov-device-plugin-amd64
namespace: kube-system
labels:
tier: node
app: sriovdp
spec:
selector:
matchLabels:
name: sriov-device-plugin
template:
metadata:
labels:
name: sriov-device-plugin
tier: node
app: sriovdp
spec:
hostNetwork: true
hostPID: true
nodeSelector:
beta.kubernetes.io/arch: amd64
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
serviceAccountName: sriov-device-plugin
containers:
- name: kube-sriovdp
image: docker.io/nfvpe/sriov-device-plugin:v3.1
imagePullPolicy: Never
args:
- --log-dir=sriovdp
- --log-level=10
securityContext:
privileged: true
volumeMounts:
- name: devicesock
mountPath: /var/lib/kubelet/
readOnly: false
- name: log
mountPath: /var/log
- name: config-volume
mountPath: /etc/pcidp
volumes:
- name: devicesock
hostPath:
path: /var/lib/kubelet/
- name: log
hostPath:
path: /var/log
- name: config-volume
configMap:
name: sriovdp-config
items:
- key: config.json
path: config.json

View File

@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: sriov-device-plugin
namespace: kube-system

View File

@ -17,10 +17,18 @@ limitations under the License.
package e2enode
import (
"context"
"fmt"
"io/ioutil"
"os/exec"
"regexp"
"strconv"
"strings"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
@ -29,24 +37,70 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/test/e2e/framework"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
"k8s.io/kubernetes/test/e2e/framework/testfiles"
"github.com/onsi/ginkgo"
"github.com/onsi/gomega"
)
const (
numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d`
minNumaNodes = 2
minCoreCount = 4
)
// Helper for makeTopologyManagerPod().
type tmCtnAttribute struct {
ctnName string
cpuRequest string
cpuLimit string
ctnName string
cpuRequest string
cpuLimit string
deviceName string
deviceRequest string
deviceLimit string
}
func detectNUMANodes() int {
outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"NUMA node(s):\" | cut -d \":\" -f 2").Output()
framework.ExpectNoError(err)
numaNodes, err := strconv.Atoi(strings.TrimSpace(string(outData)))
framework.ExpectNoError(err)
return numaNodes
}
func detectCoresPerSocket() int {
outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"Core(s) per socket:\" | cut -d \":\" -f 2").Output()
framework.ExpectNoError(err)
coreCount, err := strconv.Atoi(strings.TrimSpace(string(outData)))
framework.ExpectNoError(err)
return coreCount
}
func detectSRIOVDevices() int {
outData, err := exec.Command("/bin/sh", "-c", "ls /sys/bus/pci/devices/*/sriov_totalvfs | wc -w").Output()
framework.ExpectNoError(err)
devCount, err := strconv.Atoi(strings.TrimSpace(string(outData)))
framework.ExpectNoError(err)
return devCount
}
// makeTopologyMangerPod returns a pod with the provided tmCtnAttributes.
func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v1.Pod {
cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d"
return makeTopologyManagerTestPod(podName, cpusetCmd, tmCtnAttributes)
}
func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnAttribute) *v1.Pod {
var containers []v1.Container
for _, ctnAttr := range tmCtnAttributes {
cpusetCmd := fmt.Sprintf("grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d")
ctn := v1.Container{
Name: ctnAttr.ctnName,
Image: busyboxImage,
@ -60,7 +114,11 @@ func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v
v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"),
},
},
Command: []string{"sh", "-c", cpusetCmd},
Command: []string{"sh", "-c", podCmd},
}
if ctnAttr.deviceName != "" {
ctn.Resources.Requests[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceRequest)
ctn.Resources.Limits[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceLimit)
}
containers = append(containers, ctn)
}
@ -76,10 +134,28 @@ func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v
}
}
func configureTopologyManagerInKubelet(f *framework.Framework, policy string) {
func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (int, bool) {
for nodeNum := 0; nodeNum < numaNodes; nodeNum++ {
value, ok := configMap.Annotations[fmt.Sprintf("pcidevice_node%d", nodeNum)]
if !ok {
framework.Logf("missing pcidevice annotation for NUMA node %d", nodeNum)
return -1, false
}
v, err := strconv.Atoi(value)
if err != nil {
framework.Failf("error getting the PCI device count on NUMA node %d: %v", nodeNum, err)
}
if v == 0 {
framework.Logf("NUMA node %d has no SRIOV devices attached", nodeNum)
return nodeNum, true
}
framework.Logf("NUMA node %d has %d SRIOV devices attached", nodeNum, v)
}
return -1, false
}
func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string, configMap *v1.ConfigMap, numaNodes int) string {
// Configure Topology Manager in Kubelet with policy.
oldCfg, err := getCurrentKubeletConfig()
framework.ExpectNoError(err)
newCfg := oldCfg.DeepCopy()
if newCfg.FeatureGates == nil {
newCfg.FeatureGates = make(map[string]bool)
@ -92,7 +168,6 @@ func configureTopologyManagerInKubelet(f *framework.Framework, policy string) {
// Set the Topology Manager policy
newCfg.TopologyManagerPolicy = policy
//newCfg.TopologyManagerPolicy = topologymanager.PolicySingleNumaNode
// Set the CPU Manager policy to static.
newCfg.CPUManagerPolicy = string(cpumanager.PolicyStatic)
@ -100,18 +175,25 @@ func configureTopologyManagerInKubelet(f *framework.Framework, policy string) {
// Set the CPU Manager reconcile period to 1 second.
newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
// The Kubelet panics if either kube-reserved or system-reserved is not set
// when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that
// kubelet doesn't panic.
if newCfg.KubeReserved == nil {
newCfg.KubeReserved = map[string]string{}
}
if nodeNum, ok := findNUMANodeWithoutSRIOVDevices(configMap, numaNodes); ok {
cpus, err := getCPUsPerNUMANode(nodeNum)
framework.Logf("NUMA Node %d doesn't seem to have attached SRIOV devices and has cpus=%v", nodeNum, cpus)
framework.ExpectNoError(err)
newCfg.ReservedSystemCPUs = fmt.Sprintf("%d", cpus[len(cpus)-1])
} else {
// The Kubelet panics if either kube-reserved or system-reserved is not set
// when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that
// kubelet doesn't panic.
if newCfg.KubeReserved == nil {
newCfg.KubeReserved = map[string]string{}
}
if _, ok := newCfg.KubeReserved["cpu"]; !ok {
newCfg.KubeReserved["cpu"] = "200m"
if _, ok := newCfg.KubeReserved["cpu"]; !ok {
newCfg.KubeReserved["cpu"] = "200m"
}
}
// Dump the config -- debug
framework.Logf("New kublet config is %s", *newCfg)
framework.Logf("New kubelet config is %s", *newCfg)
// Update the Kubelet configuration.
framework.ExpectNoError(setKubeletConfiguration(f, newCfg))
@ -122,9 +204,79 @@ func configureTopologyManagerInKubelet(f *framework.Framework, policy string) {
framework.ExpectNoError(err)
return nodes == 1
}, time.Minute, time.Second).Should(gomega.BeTrue())
return newCfg.ReservedSystemCPUs
}
func runTopologyManagerSuiteTests(f *framework.Framework) {
// getSRIOVDevicePluginPod returns the Device Plugin pod for sriov resources in e2e tests.
func getSRIOVDevicePluginPod() *v1.Pod {
ds := readDaemonSetV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginDSYAML))
p := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: SRIOVDevicePluginName,
Namespace: metav1.NamespaceSystem,
},
Spec: ds.Spec.Template.Spec,
}
return p
}
func readConfigMapV1OrDie(objBytes []byte) *v1.ConfigMap {
v1.AddToScheme(appsScheme)
requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(v1.SchemeGroupVersion), objBytes)
if err != nil {
panic(err)
}
return requiredObj.(*v1.ConfigMap)
}
func readServiceAccountV1OrDie(objBytes []byte) *v1.ServiceAccount {
v1.AddToScheme(appsScheme)
requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(v1.SchemeGroupVersion), objBytes)
if err != nil {
panic(err)
}
return requiredObj.(*v1.ServiceAccount)
}
func findSRIOVResource(node *v1.Node) (string, int64) {
re := regexp.MustCompile(`^intel.com/.*sriov.*`)
for key, val := range node.Status.Capacity {
resource := string(key)
if re.MatchString(resource) {
v := val.Value()
if v > 0 {
return resource, v
}
}
}
return "", 0
}
func deletePodInNamespace(f *framework.Framework, namespace, name string) {
gp := int64(0)
deleteOptions := metav1.DeleteOptions{
GracePeriodSeconds: &gp,
}
err := f.ClientSet.CoreV1().Pods(namespace).Delete(context.TODO(), name, &deleteOptions)
framework.ExpectNoError(err)
}
func validatePodAlignment(f *framework.Framework, pod *v1.Pod, numaNodes int) {
ginkgo.By("validating the Gu pod")
logs, err := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[0].Name, pod.Name)
framework.Logf("got pod logs: %v", logs)
numaRes, err := checkNUMAAlignment(f, pod, logs, numaNodes)
framework.ExpectNoError(err, "NUMA Alignment check failed for [%s] of pod [%s]: %s",
pod.Spec.Containers[0].Name, pod.Name, numaRes.String())
}
func runTopologyManagerPolicySuiteTests(f *framework.Framework) {
var cpuCap, cpuAlloc int64
var cpuListString, expAllowedCPUsListRegex string
var cpuList []int
@ -350,27 +502,252 @@ func runTopologyManagerSuiteTests(f *framework.Framework) {
waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
}
func runTopologyManagerPositiveTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) {
var pods []*v1.Pod
for podID := 0; podID < numPods; podID++ {
ctnAttrs := []tmCtnAttribute{
{
ctnName: "gu-container",
cpuRequest: cpuAmount,
cpuLimit: cpuAmount,
deviceName: sriovResourceName,
deviceRequest: deviceAmount,
deviceLimit: deviceAmount,
},
}
podName := fmt.Sprintf("gu-pod-%d", podID)
framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
pod := makeTopologyManagerTestPod(podName, numalignCmd, ctnAttrs)
pod = f.PodClient().CreateSync(pod)
framework.Logf("created pod %s", podName)
pods = append(pods, pod)
}
for podID := 0; podID < numPods; podID++ {
validatePodAlignment(f, pods[podID], numaNodes)
}
for podID := 0; podID < numPods; podID++ {
pod := pods[podID]
framework.Logf("deleting the pod %s/%s and waiting for container %s removal",
pod.Namespace, pod.Name, pod.Spec.Containers[0].Name)
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
}
}
func runTopologyManagerNegativeTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) {
ctnAttrs := []tmCtnAttribute{
{
ctnName: "gu-container",
cpuRequest: cpuAmount,
cpuLimit: cpuAmount,
deviceName: sriovResourceName,
deviceRequest: deviceAmount,
deviceLimit: deviceAmount,
},
}
podName := "gu-pod"
framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
pod := makeTopologyManagerTestPod(podName, numalignCmd, ctnAttrs)
pod = f.PodClient().Create(pod)
err := e2epod.WaitForPodCondition(f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) {
if pod.Status.Phase != v1.PodPending {
return true, nil
}
return false, nil
})
framework.ExpectNoError(err)
pod, err = f.PodClient().Get(context.TODO(), pod.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
if pod.Status.Phase != v1.PodFailed {
framework.Failf("pod %s not failed: %v", pod.Name, pod.Status)
}
if !isTopologyAffinityError(pod) {
framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason)
}
deletePods(f, []string{pod.Name})
}
func isTopologyAffinityError(pod *v1.Pod) bool {
re := regexp.MustCompile(`Topology.*Affinity.*Error`)
return re.MatchString(pod.Status.Reason)
}
func getSRIOVDevicePluginConfigMap(cmFile string) *v1.ConfigMap {
cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML)
var err error
// the SRIOVDP configuration is hw-dependent, so we allow per-test-host customization.
framework.Logf("host-local SRIOV Device Plugin Config Map %q", cmFile)
if cmFile != "" {
cmData, err = ioutil.ReadFile(cmFile)
if err != nil {
framework.Failf("unable to load the SRIOV Device Plugin ConfigMap: %v", err)
}
} else {
framework.Logf("Using built-in SRIOV Device Plugin Config Map")
}
return readConfigMapV1OrDie(cmData)
}
func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) (*v1.Pod, string, int64) {
var err error
ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name))
if _, err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(context.TODO(), configMap, metav1.CreateOptions{}); err != nil {
framework.Failf("unable to create test configMap %s: %v", configMap.Name, err)
}
serviceAccount := readServiceAccountV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginSAYAML))
ginkgo.By(fmt.Sprintf("Creating serviceAccount %v/%v", metav1.NamespaceSystem, serviceAccount.Name))
if _, err = f.ClientSet.CoreV1().ServiceAccounts(metav1.NamespaceSystem).Create(context.TODO(), serviceAccount, metav1.CreateOptions{}); err != nil {
framework.Failf("unable to create test serviceAccount %s: %v", serviceAccount.Name, err)
}
e2enode.WaitForNodeToBeReady(f.ClientSet, framework.TestContext.NodeName, 5*time.Minute)
dp := getSRIOVDevicePluginPod()
dp.Spec.NodeName = framework.TestContext.NodeName
ginkgo.By("Create SRIOV device plugin pod")
dpPod, err := f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(context.TODO(), dp, metav1.CreateOptions{})
framework.ExpectNoError(err)
sriovResourceName := ""
var sriovResourceAmount int64
ginkgo.By("Waiting for devices to become available on the local node")
gomega.Eventually(func() bool {
node := getLocalNode(f)
framework.Logf("Node status: %v", node.Status.Capacity)
sriovResourceName, sriovResourceAmount = findSRIOVResource(node)
return sriovResourceAmount > 0
}, 2*time.Minute, framework.Poll).Should(gomega.BeTrue())
framework.Logf("Successfully created device plugin pod, detected %d SRIOV device %q", sriovResourceAmount, sriovResourceName)
return dpPod, sriovResourceName, sriovResourceAmount
}
func teardownSRIOVConfigOrFail(f *framework.Framework, dpPod *v1.Pod) {
framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal",
dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name)
deletePodInNamespace(f, dpPod.Namespace, dpPod.Name)
waitForContainerRemoval(dpPod.Spec.Containers[0].Name, dpPod.Name, dpPod.Namespace)
}
func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs string, numaNodes, coreCount int) {
threadsPerCore := 1
if isHTEnabled() {
threadsPerCore = 2
}
dpPod, sriovResourceName, sriovResourceAmount := setupSRIOVConfigOrFail(f, configMap)
// could have been a loop, we unroll it to explain the testcases
// simplest case
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sriovResourceName))
runTopologyManagerPositiveTest(f, numaNodes, 1, "1000m", sriovResourceName, "1")
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 2 cores, 1 %s device", sriovResourceName))
runTopologyManagerPositiveTest(f, numaNodes, 1, "2000m", sriovResourceName, "1")
if reservedSystemCPUs != "" {
// to avoid false negatives, we have put reserved CPUs in such a way there is at least a NUMA node
// with 1+ SRIOV devices and not reserved CPUs.
numCores := threadsPerCore * coreCount
ginkgo.By(fmt.Sprintf("Successfully admit an entire socket (%d cores), 1 %s device", numCores, sriovResourceName))
runTopologyManagerPositiveTest(f, numaNodes, 1, fmt.Sprintf("%dm", numCores*1000), sriovResourceName, "1")
}
if sriovResourceAmount > 1 {
// no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function
// preconditions must ensure the following can be fulfilled
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 1 core, 1 %s device", sriovResourceName))
runTopologyManagerPositiveTest(f, numaNodes, 2, "1000m", sriovResourceName, "1")
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 2 cores, 1 %s device", sriovResourceName))
runTopologyManagerPositiveTest(f, numaNodes, 2, "2000m", sriovResourceName, "1")
// testing more complex conditions require knowledge about the system cpu+bus topology
}
// overflow NUMA node capacity: cores
numCores := 1 + (threadsPerCore * coreCount)
ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pods, with %d cores, 1 %s device - and it should be rejected", numCores, sriovResourceName))
runTopologyManagerNegativeTest(f, numaNodes, 1, fmt.Sprintf("%dm", numCores*1000), sriovResourceName, "1")
teardownSRIOVConfigOrFail(f, dpPod)
}
func runTopologyManagerTests(f *framework.Framework) {
var oldCfg *kubeletconfig.KubeletConfiguration
var err error
ginkgo.It("run Topology Manager test suite", func() {
ginkgo.It("run Topology Manager policy test suite", func() {
oldCfg, err = getCurrentKubeletConfig()
framework.ExpectNoError(err)
var policies = []string{topologymanager.PolicySingleNumaNode, topologymanager.PolicyRestricted,
topologymanager.PolicyBestEffort, topologymanager.PolicyNone}
for _, policy := range policies {
// Configure Topology Manager
ginkgo.By("by configuring Topology Manager policy to xxx")
ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy))
framework.Logf("Configuring topology Manager policy to %s", policy)
configureTopologyManagerInKubelet(f, policy)
configureTopologyManagerInKubelet(f, oldCfg, policy, nil, 0)
// Run the tests
runTopologyManagerSuiteTests(f)
runTopologyManagerPolicySuiteTests(f)
}
// restore kubelet config
setOldKubeletConfig(f, oldCfg)
// Debug sleep to allow time to look at kubelet config
time.Sleep(5 * time.Minute)
// Delete state file to allow repeated runs
deleteStateFile()
})
ginkgo.It("run Topology Manager node alignment test suite", func() {
// this is a very rough check. We just want to rule out system that does NOT have
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
sriovdevCount := detectSRIOVDevices()
numaNodes := detectNUMANodes()
coreCount := detectCoresPerSocket()
if numaNodes < minNumaNodes {
e2eskipper.Skipf("this test is meant to run on a multi-node NUMA system")
}
if coreCount < minCoreCount {
e2eskipper.Skipf("this test is meant to run on a system with at least 4 cores per socket")
}
if sriovdevCount == 0 {
e2eskipper.Skipf("this test is meant to run on a system with at least one SRIOV device")
}
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
oldCfg, err = getCurrentKubeletConfig()
framework.ExpectNoError(err)
policy := topologymanager.PolicySingleNumaNode
// Configure Topology Manager
ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy))
framework.Logf("Configuring topology Manager policy to %s", policy)
reservedSystemCPUs := configureTopologyManagerInKubelet(f, oldCfg, policy, configMap, numaNodes)
runTopologyManagerNodeAlignmentSuiteTests(f, configMap, reservedSystemCPUs, numaNodes, coreCount)
// restore kubelet config
setOldKubeletConfig(f, oldCfg)
// Delete state file to allow repeated runs
deleteStateFile()

View File

@ -0,0 +1,28 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
const (
// SRIOVDevicePluginCMYAML is the path of the config map to configure the sriov device plugin.
SRIOVDevicePluginCMYAML = "test/e2e_node/testing-manifests/sriovdp-cm.yaml"
// SRIOVDevicePluginDSYAML is the path of the daemonset template of the sriov device plugin. // TODO: Parametrize it by making it a feature in TestFramework.
SRIOVDevicePluginDSYAML = "test/e2e_node/testing-manifests/sriovdp-ds.yaml"
// SRIOVDevicePluginSAYAML is the path of the service account needed by the sriov device plugin to run.
SRIOVDevicePluginSAYAML = "test/e2e_node/testing-manifests/sriovdp-sa.yaml"
// SRIOVDevicePluginName is the name of the device plugin pod
SRIOVDevicePluginName = "sriov-device-plugin"
)