Merge pull request #88566 from Deepthidharwar/topology-mgr-numa-tests

Enable running cpu-mgr-multiNUMA e2e tests with Topology manager
This commit is contained in:
Kubernetes Prow Robot 2020-03-05 05:38:37 -08:00 committed by GitHub
commit 1f2e1967d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 296 additions and 437 deletions

View File

@ -258,50 +258,14 @@ func enableCPUManagerInKubelet(f *framework.Framework, cleanStateFile bool) (old
return oldCfg
}
func runCPUManagerTests(f *framework.Framework) {
var cpuCap, cpuAlloc int64
var oldCfg *kubeletconfig.KubeletConfiguration
var cpuListString, expAllowedCPUsListRegex string
var cpuList []int
var cpu1, cpu2 int
var cset cpuset.CPUSet
var err error
func runGuPodTest(f *framework.Framework) {
var ctnAttrs []ctnAttribute
var pod, pod1, pod2 *v1.Pod
var cpu1 int
var err error
var cpuList []int
var pod *v1.Pod
var expAllowedCPUsListRegex string
ginkgo.It("should assign CPUs as expected based on the Pod spec", func() {
cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(f)
// Skip CPU Manager tests altogether if the CPU capacity < 2.
if cpuCap < 2 {
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2")
}
// Enable CPU Manager in the kubelet.
oldCfg = enableCPUManagerInKubelet(f, true)
ginkgo.By("running a non-Gu pod")
ctnAttrs = []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "100m",
cpuLimit: "200m",
},
}
pod = makeCPUManagerPod("non-gu-pod", ctnAttrs)
pod = f.PodClient().CreateSync(pod)
ginkgo.By("checking if the expected cpuset was assigned")
expAllowedCPUsListRegex = fmt.Sprintf("^0-%d\n$", cpuCap-1)
err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[0].Name, pod.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
ginkgo.By("running a Gu pod")
ctnAttrs = []ctnAttribute{
{
ctnName: "gu-container",
@ -329,8 +293,45 @@ func runCPUManagerTests(f *framework.Framework) {
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
}
func runNonGuPodTest(f *framework.Framework, cpuCap int64) {
var ctnAttrs []ctnAttribute
var err error
var pod *v1.Pod
var expAllowedCPUsListRegex string
ctnAttrs = []ctnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "100m",
cpuLimit: "200m",
},
}
pod = makeCPUManagerPod("non-gu-pod", ctnAttrs)
pod = f.PodClient().CreateSync(pod)
ginkgo.By("checking if the expected cpuset was assigned")
expAllowedCPUsListRegex = fmt.Sprintf("^0-%d\n$", cpuCap-1)
err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[0].Name, pod.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
}
func runMultipleGuNonGuPods(f *framework.Framework, cpuCap int64, cpuAlloc int64) {
var cpuListString, expAllowedCPUsListRegex string
var cpuList []int
var cpu1 int
var cset cpuset.CPUSet
var err error
var ctnAttrs []ctnAttribute
var pod1, pod2 *v1.Pod
ginkgo.By("running multiple Gu and non-Gu pods")
ctnAttrs = []ctnAttribute{
{
ctnName: "gu-container",
@ -374,18 +375,20 @@ func runCPUManagerTests(f *framework.Framework) {
err = f.PodClient().MatchContainerOutput(pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod2.Spec.Containers[0].Name, pod2.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod1.Name, pod2.Name})
waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
// Skip rest of the tests if CPU capacity < 3.
if cpuCap < 3 {
e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3")
}
ginkgo.By("running a Gu pod requesting multiple CPUs")
func runMultipleCPUGuPod(f *framework.Framework) {
var cpuListString, expAllowedCPUsListRegex string
var cpuList []int
var cset cpuset.CPUSet
var err error
var ctnAttrs []ctnAttribute
var pod *v1.Pod
ctnAttrs = []ctnAttribute{
{
ctnName: "gu-container",
@ -401,7 +404,7 @@ func runCPUManagerTests(f *framework.Framework) {
if isMultiNUMA() {
cpuList = cpuset.MustParse(getCoreSiblingList(0)).ToSlice()
if !isHTEnabled() {
cset = cpuset.MustParse(fmt.Sprintf("%d,%d", cpuList[1], cpuList[2]))
cset = cpuset.MustParse(fmt.Sprintf("%d-%d", cpuList[1], cpuList[2]))
} else {
cset = cpuset.MustParse(getCPUSiblingList(int64(cpuList[1])))
}
@ -422,8 +425,16 @@ func runCPUManagerTests(f *framework.Framework) {
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
}
ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs")
func runMultipleCPUContainersGuPod(f *framework.Framework) {
var expAllowedCPUsListRegex string
var cpuList []int
var cpu1, cpu2 int
var err error
var ctnAttrs []ctnAttribute
var pod *v1.Pod
ctnAttrs = []ctnAttribute{
{
ctnName: "gu-container1",
@ -467,8 +478,16 @@ func runCPUManagerTests(f *framework.Framework) {
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
waitForContainerRemoval(pod.Spec.Containers[1].Name, pod.Name, pod.Namespace)
}
func runMultipleGuPods(f *framework.Framework) {
var expAllowedCPUsListRegex string
var cpuList []int
var cpu1, cpu2 int
var err error
var ctnAttrs []ctnAttribute
var pod1, pod2 *v1.Pod
ginkgo.By("running multiple Gu pods")
ctnAttrs = []ctnAttribute{
{
ctnName: "gu-container1",
@ -513,11 +532,55 @@ func runCPUManagerTests(f *framework.Framework) {
err = f.PodClient().MatchContainerOutput(pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod2.Spec.Containers[0].Name, pod2.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod1.Name, pod2.Name})
waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
}
func runCPUManagerTests(f *framework.Framework) {
var cpuCap, cpuAlloc int64
var oldCfg *kubeletconfig.KubeletConfiguration
var expAllowedCPUsListRegex string
var cpuList []int
var cpu1 int
var err error
var ctnAttrs []ctnAttribute
var pod *v1.Pod
ginkgo.It("should assign CPUs as expected based on the Pod spec", func() {
cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(f)
// Skip CPU Manager tests altogether if the CPU capacity < 2.
if cpuCap < 2 {
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2")
}
// Enable CPU Manager in the kubelet.
oldCfg = enableCPUManagerInKubelet(f, true)
ginkgo.By("running a non-Gu pod")
runNonGuPodTest(f, cpuCap)
ginkgo.By("running a Gu pod")
runGuPodTest(f)
ginkgo.By("running multiple Gu and non-Gu pods")
runMultipleGuNonGuPods(f, cpuCap, cpuAlloc)
// Skip rest of the tests if CPU capacity < 3.
if cpuCap < 3 {
e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3")
}
ginkgo.By("running a Gu pod requesting multiple CPUs")
runMultipleCPUGuPod(f)
ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs")
runMultipleCPUContainersGuPod(f)
ginkgo.By("running multiple Gu pods")
runMultipleGuPods(f)
ginkgo.By("test for automatically remove inactive pods from cpumanager state file.")
// First running a Gu Pod,

View File

@ -34,7 +34,6 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/test/e2e/framework"
@ -94,12 +93,6 @@ func detectSRIOVDevices() int {
return devCount
}
// makeTopologyMangerPod returns a pod with the provided tmCtnAttributes.
func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v1.Pod {
cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d"
return makeTopologyManagerTestPod(podName, cpusetCmd, tmCtnAttributes)
}
func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnAttribute) *v1.Pod {
var containers []v1.Container
for _, ctnAttr := range tmCtnAttributes {
@ -314,109 +307,17 @@ func validatePodAlignment(f *framework.Framework, pod *v1.Pod, envInfo *testEnvI
func runTopologyManagerPolicySuiteTests(f *framework.Framework) {
var cpuCap, cpuAlloc int64
var cpuListString, expAllowedCPUsListRegex string
var cpuList []int
var cpu1, cpu2 int
var cset cpuset.CPUSet
var err error
var ctnAttrs []tmCtnAttribute
var pod, pod1, pod2 *v1.Pod
cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(f)
ginkgo.By("running a non-Gu pod")
ctnAttrs = []tmCtnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "100m",
cpuLimit: "200m",
},
}
pod = makeTopologyManagerPod("non-gu-pod", ctnAttrs)
pod = f.PodClient().CreateSync(pod)
ginkgo.By("checking if the expected cpuset was assigned")
expAllowedCPUsListRegex = fmt.Sprintf("^0-%d\n$", cpuCap-1)
err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[0].Name, pod.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
runNonGuPodTest(f, cpuCap)
ginkgo.By("running a Gu pod")
ctnAttrs = []tmCtnAttribute{
{
ctnName: "gu-container",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
}
pod = makeTopologyManagerPod("gu-pod", ctnAttrs)
pod = f.PodClient().CreateSync(pod)
ginkgo.By("checking if the expected cpuset was assigned")
cpu1 = 1
if isHTEnabled() {
cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
cpu1 = cpuList[1]
}
expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[0].Name, pod.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
runGuPodTest(f)
ginkgo.By("running multiple Gu and non-Gu pods")
ctnAttrs = []tmCtnAttribute{
{
ctnName: "gu-container",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
}
pod1 = makeTopologyManagerPod("gu-pod", ctnAttrs)
pod1 = f.PodClient().CreateSync(pod1)
ctnAttrs = []tmCtnAttribute{
{
ctnName: "non-gu-container",
cpuRequest: "200m",
cpuLimit: "300m",
},
}
pod2 = makeTopologyManagerPod("non-gu-pod", ctnAttrs)
pod2 = f.PodClient().CreateSync(pod2)
ginkgo.By("checking if the expected cpuset was assigned")
cpu1 = 1
if isHTEnabled() {
cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
cpu1 = cpuList[1]
}
expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
err = f.PodClient().MatchContainerOutput(pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod1.Spec.Containers[0].Name, pod1.Name)
cpuListString = "0"
if cpuAlloc > 2 {
cset = cpuset.MustParse(fmt.Sprintf("0-%d", cpuCap-1))
cpuListString = fmt.Sprintf("%s", cset.Difference(cpuset.NewCPUSet(cpu1)))
}
expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString)
err = f.PodClient().MatchContainerOutput(pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod2.Spec.Containers[0].Name, pod2.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod1.Name, pod2.Name})
waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
runMultipleGuNonGuPods(f, cpuCap, cpuAlloc)
// Skip rest of the tests if CPU capacity < 3.
if cpuCap < 3 {
@ -424,118 +325,13 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) {
}
ginkgo.By("running a Gu pod requesting multiple CPUs")
ctnAttrs = []tmCtnAttribute{
{
ctnName: "gu-container",
cpuRequest: "2000m",
cpuLimit: "2000m",
},
}
pod = makeTopologyManagerPod("gu-pod", ctnAttrs)
pod = f.PodClient().CreateSync(pod)
ginkgo.By("checking if the expected cpuset was assigned")
cpuListString = "1-2"
if isHTEnabled() {
cpuListString = "2-3"
cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
if cpuList[1] != 1 {
cset = cpuset.MustParse(getCPUSiblingList(1))
cpuListString = fmt.Sprintf("%s", cset)
}
}
expAllowedCPUsListRegex = fmt.Sprintf("^%s\n$", cpuListString)
err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[0].Name, pod.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
runMultipleCPUGuPod(f)
ginkgo.By("running a Gu pod with multiple containers requesting integer CPUs")
ctnAttrs = []tmCtnAttribute{
{
ctnName: "gu-container1",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
{
ctnName: "gu-container2",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
}
pod = makeTopologyManagerPod("gu-pod", ctnAttrs)
pod = f.PodClient().CreateSync(pod)
ginkgo.By("checking if the expected cpuset was assigned")
cpu1, cpu2 = 1, 2
if isHTEnabled() {
cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
if cpuList[1] != 1 {
cpu1, cpu2 = cpuList[1], 1
}
}
expAllowedCPUsListRegex = fmt.Sprintf("^%d|%d\n$", cpu1, cpu2)
err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[0].Name, pod.Name)
err = f.PodClient().MatchContainerOutput(pod.Name, pod.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod.Spec.Containers[1].Name, pod.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod.Name})
waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
waitForContainerRemoval(pod.Spec.Containers[1].Name, pod.Name, pod.Namespace)
runMultipleCPUContainersGuPod(f)
ginkgo.By("running multiple Gu pods")
ctnAttrs = []tmCtnAttribute{
{
ctnName: "gu-container1",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
}
pod1 = makeTopologyManagerPod("gu-pod1", ctnAttrs)
pod1 = f.PodClient().CreateSync(pod1)
ctnAttrs = []tmCtnAttribute{
{
ctnName: "gu-container2",
cpuRequest: "1000m",
cpuLimit: "1000m",
},
}
pod2 = makeTopologyManagerPod("gu-pod2", ctnAttrs)
pod2 = f.PodClient().CreateSync(pod2)
ginkgo.By("checking if the expected cpuset was assigned")
cpu1, cpu2 = 1, 2
if isHTEnabled() {
cpuList = cpuset.MustParse(getCPUSiblingList(0)).ToSlice()
if cpuList[1] != 1 {
cpu1, cpu2 = cpuList[1], 1
}
}
expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu1)
err = f.PodClient().MatchContainerOutput(pod1.Name, pod1.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod1.Spec.Containers[0].Name, pod1.Name)
expAllowedCPUsListRegex = fmt.Sprintf("^%d\n$", cpu2)
err = f.PodClient().MatchContainerOutput(pod2.Name, pod2.Spec.Containers[0].Name, expAllowedCPUsListRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
pod2.Spec.Containers[0].Name, pod2.Name)
ginkgo.By("by deleting the pods and waiting for container removal")
deletePods(f, []string{pod1.Name, pod2.Name})
waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace)
waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace)
runMultipleGuPods(f)
}
func waitForAllContainerRemoval(podName, podNS string) {