From 60585da68f69a3c6168a565df1157823f4e1a242 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 2 Feb 2022 13:48:00 +0100 Subject: [PATCH 1/4] e2e: node: {cpu,top}omgr: report node capacity/allocatable Make sure to log out the cpu capacity and allocatable for the node running the tests, to make the troubleshooting of test failures easier. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 913fa8501f9..705545b87b0 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -340,6 +340,7 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { var cpuCap, cpuAlloc int64 cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(f) + ginkgo.By(fmt.Sprintf("checking node CPU capacity (%d) and allocatable CPUs (%d)", cpuCap, cpuAlloc)) ginkgo.By("running a non-Gu pod") runNonGuPodTest(f, cpuCap) From 2d1503dae35fc414e9ce3131deff131a2cf166b9 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 2 Feb 2022 13:49:43 +0100 Subject: [PATCH 2/4] e2e: node: {cpu,topo}mgr: make logic on allocatable The existing cpu/topology manager tests correctly check for the node resources and skip if the detected resources are not enough to run the tests, to avoid false negatives. Unfortunately they do the check against the node capacity, while the correct approach is to check the allocatable resources. The existing check is correct only on a narrow set of cases; otherwise can still lead to false negatives. This PR fixes that. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 705545b87b0..d6cd39ebf54 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -351,8 +351,8 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { ginkgo.By("running multiple Gu and non-Gu pods") runMultipleGuNonGuPods(f, cpuCap, cpuAlloc) - // Skip rest of the tests if CPU capacity < 3. - if cpuCap < 3 { + // Skip rest of the tests if CPU allocatable < 3. + if cpuAlloc < 3 { e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3") } From c92d9f7974f919776c2b58ee6eeb63849269cc1a Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 2 Feb 2022 13:59:20 +0100 Subject: [PATCH 3/4] e2e: node: {cpu,topo}mgr: don't assume cpu capacity >= 2 Even though CI machines _usually_ have at least two cpus, let's rather not assume this holds true, and let's actually check the allocatable CPUs, skipping even the simplest tests if the assumption is broken, to avoid false negatives. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index d6cd39ebf54..50b97be6c5a 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -342,6 +342,12 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(f) ginkgo.By(fmt.Sprintf("checking node CPU capacity (%d) and allocatable CPUs (%d)", cpuCap, cpuAlloc)) + // Albeit even the weakest CI machines usually have 2 cpus, let's be extra careful and + // check explicitly. We prefer to skip than a false negative (and a failed test). + if cpuAlloc < 1 { + e2eskipper.Skipf("Skipping basic CPU Manager tests since CPU capacity < 2") + } + ginkgo.By("running a non-Gu pod") runNonGuPodTest(f, cpuCap) From 7004a718d95764e7812a91b11a8f7e9696f2f48b Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 2 Feb 2022 14:02:17 +0100 Subject: [PATCH 4/4] e2e: node: {cpu,topo}mgr: round up test requirement A cpu/topology manager e2e test wants to require one exclusive CPU and a share of CPU time; let's round up the allocatable CPU requirements (from 1 to 2) to reduce the chances of false negatives. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 50b97be6c5a..7784258845b 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -354,14 +354,14 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { ginkgo.By("running a Gu pod") runGuPodTest(f, 1) - ginkgo.By("running multiple Gu and non-Gu pods") - runMultipleGuNonGuPods(f, cpuCap, cpuAlloc) - // Skip rest of the tests if CPU allocatable < 3. if cpuAlloc < 3 { e2eskipper.Skipf("Skipping rest of the CPU Manager tests since CPU capacity < 3") } + ginkgo.By("running multiple Gu and non-Gu pods") + runMultipleGuNonGuPods(f, cpuCap, cpuAlloc) + ginkgo.By("running a Gu pod requesting multiple CPUs") runMultipleCPUGuPod(f)