mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #130498 from swatisehgal/distribute-across-numa-e2e-tests
node: cpumgr: e2e: Tests for `distribute-cpus-across-numa` policy option
This commit is contained in:
commit
fba63656c9
@ -23,6 +23,7 @@ import (
|
|||||||
"io/fs"
|
"io/fs"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -40,12 +41,19 @@ import (
|
|||||||
|
|
||||||
"github.com/onsi/ginkgo/v2"
|
"github.com/onsi/ginkgo/v2"
|
||||||
"github.com/onsi/gomega"
|
"github.com/onsi/gomega"
|
||||||
|
"github.com/onsi/gomega/gcustom"
|
||||||
|
gomegatypes "github.com/onsi/gomega/types"
|
||||||
"k8s.io/kubernetes/test/e2e/feature"
|
"k8s.io/kubernetes/test/e2e/feature"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||||
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
minSMTLevel = 2
|
||||||
|
minCPUCapacity = 2
|
||||||
|
)
|
||||||
|
|
||||||
// Helper for makeCPUManagerPod().
|
// Helper for makeCPUManagerPod().
|
||||||
type ctnAttribute struct {
|
type ctnAttribute struct {
|
||||||
ctnName string
|
ctnName string
|
||||||
@ -877,9 +885,9 @@ func runCPUManagerTests(f *framework.Framework) {
|
|||||||
ginkgo.It("should assign CPUs as expected based on the Pod spec", func(ctx context.Context) {
|
ginkgo.It("should assign CPUs as expected based on the Pod spec", func(ctx context.Context) {
|
||||||
cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
|
cpuCap, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
|
||||||
|
|
||||||
// Skip CPU Manager tests altogether if the CPU capacity < 2.
|
// Skip CPU Manager tests altogether if the CPU capacity < minCPUCapacity.
|
||||||
if cpuCap < 2 {
|
if cpuCap < minCPUCapacity {
|
||||||
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < 2")
|
e2eskipper.Skipf("Skipping CPU Manager tests since the CPU capacity < %d", minCPUCapacity)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable CPU Manager in the kubelet.
|
// Enable CPU Manager in the kubelet.
|
||||||
@ -972,13 +980,14 @@ func runCPUManagerTests(f *framework.Framework) {
|
|||||||
smtLevel := getSMTLevel()
|
smtLevel := getSMTLevel()
|
||||||
|
|
||||||
// strict SMT alignment is trivially verified and granted on non-SMT systems
|
// strict SMT alignment is trivially verified and granted on non-SMT systems
|
||||||
if smtLevel < 2 {
|
if smtLevel < minSMTLevel {
|
||||||
e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt)
|
e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt)
|
||||||
}
|
}
|
||||||
|
|
||||||
// our tests want to allocate a full core, so we need at last 2*2=4 virtual cpus
|
// our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus
|
||||||
if cpuAlloc < int64(smtLevel*2) {
|
minCPUCount := int64(smtLevel * minCPUCapacity)
|
||||||
e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < 4", fullCPUsOnlyOpt)
|
if cpuAlloc < minCPUCount {
|
||||||
|
e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
framework.Logf("SMT level %d", smtLevel)
|
framework.Logf("SMT level %d", smtLevel)
|
||||||
@ -1153,6 +1162,155 @@ func runCPUManagerTests(f *framework.Framework) {
|
|||||||
waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
|
waitForContainerRemoval(ctx, pod.Spec.Containers[0].Name, pod.Name, pod.Namespace)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
ginkgo.It("should assign packed CPUs with distribute-cpus-across-numa disabled and pcpu-only policy options enabled", func(ctx context.Context) {
|
||||||
|
fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption)
|
||||||
|
_, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
|
||||||
|
smtLevel := getSMTLevel()
|
||||||
|
|
||||||
|
// strict SMT alignment is trivially verified and granted on non-SMT systems
|
||||||
|
if smtLevel < minSMTLevel {
|
||||||
|
e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt)
|
||||||
|
}
|
||||||
|
|
||||||
|
// our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus
|
||||||
|
minCPUCount := int64(smtLevel * minCPUCapacity)
|
||||||
|
if cpuAlloc < minCPUCount {
|
||||||
|
e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
framework.Logf("SMT level %d", smtLevel)
|
||||||
|
|
||||||
|
cpuPolicyOptions := map[string]string{
|
||||||
|
cpumanager.FullPCPUsOnlyOption: "true",
|
||||||
|
cpumanager.DistributeCPUsAcrossNUMAOption: "false",
|
||||||
|
}
|
||||||
|
newCfg := configureCPUManagerInKubelet(oldCfg,
|
||||||
|
&cpuManagerKubeletArguments{
|
||||||
|
policyName: string(cpumanager.PolicyStatic),
|
||||||
|
reservedSystemCPUs: cpuset.New(0),
|
||||||
|
enableCPUManagerOptions: true,
|
||||||
|
options: cpuPolicyOptions,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
updateKubeletConfig(ctx, f, newCfg, true)
|
||||||
|
|
||||||
|
ctnAttrs := []ctnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "test-gu-container-distribute-cpus-across-numa-disabled",
|
||||||
|
cpuRequest: "2000m",
|
||||||
|
cpuLimit: "2000m",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa-disabled", ctnAttrs)
|
||||||
|
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||||
|
|
||||||
|
for _, cnt := range pod.Spec.Containers {
|
||||||
|
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
|
||||||
|
|
||||||
|
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||||
|
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
|
||||||
|
framework.Logf("got pod logs: %v", logs)
|
||||||
|
cpus, err := cpuset.Parse(strings.TrimSpace(logs))
|
||||||
|
framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
|
||||||
|
validateSMTAlignment(cpus, smtLevel, pod, &cnt)
|
||||||
|
gomega.Expect(cpus).To(BePackedCPUs())
|
||||||
|
}
|
||||||
|
deletePodSyncByName(ctx, f, pod.Name)
|
||||||
|
// we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
|
||||||
|
// this is in turn needed because we will have an unavoidable (in the current framework) race with th
|
||||||
|
// reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
|
||||||
|
waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.It("should assign CPUs distributed across NUMA with distribute-cpus-across-numa and pcpu-only policy options enabled", func(ctx context.Context) {
|
||||||
|
var cpusNumPerNUMA, coresNumPerNUMA, numaNodeNum, threadsPerCore int
|
||||||
|
|
||||||
|
fullCPUsOnlyOpt := fmt.Sprintf("option=%s", cpumanager.FullPCPUsOnlyOption)
|
||||||
|
_, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
|
||||||
|
smtLevel := getSMTLevel()
|
||||||
|
framework.Logf("SMT level %d", smtLevel)
|
||||||
|
|
||||||
|
// strict SMT alignment is trivially verified and granted on non-SMT systems
|
||||||
|
if smtLevel < minSMTLevel {
|
||||||
|
e2eskipper.Skipf("Skipping CPU Manager %s tests since SMT disabled", fullCPUsOnlyOpt)
|
||||||
|
}
|
||||||
|
|
||||||
|
// our tests want to allocate a full core, so we need at least 2*2=4 virtual cpus
|
||||||
|
minCPUCount := int64(smtLevel * minCPUCapacity)
|
||||||
|
if cpuAlloc < minCPUCount {
|
||||||
|
e2eskipper.Skipf("Skipping CPU Manager %s tests since the CPU capacity < %d", fullCPUsOnlyOpt, minCPUCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
// this test is intended to be run on a multi-node NUMA system and
|
||||||
|
// a system with at least 4 cores per socket, hostcheck skips test
|
||||||
|
// if above requirements are not satisfied
|
||||||
|
numaNodeNum, coresNumPerNUMA, threadsPerCore = hostCheck()
|
||||||
|
cpusNumPerNUMA = coresNumPerNUMA * threadsPerCore
|
||||||
|
|
||||||
|
framework.Logf("numaNodes on the system %d", numaNodeNum)
|
||||||
|
framework.Logf("Cores per NUMA on the system %d", coresNumPerNUMA)
|
||||||
|
framework.Logf("Threads per Core on the system %d", threadsPerCore)
|
||||||
|
framework.Logf("CPUs per NUMA on the system %d", cpusNumPerNUMA)
|
||||||
|
|
||||||
|
cpuPolicyOptions := map[string]string{
|
||||||
|
cpumanager.FullPCPUsOnlyOption: "true",
|
||||||
|
cpumanager.DistributeCPUsAcrossNUMAOption: "true",
|
||||||
|
}
|
||||||
|
newCfg := configureCPUManagerInKubelet(oldCfg,
|
||||||
|
&cpuManagerKubeletArguments{
|
||||||
|
policyName: string(cpumanager.PolicyStatic),
|
||||||
|
reservedSystemCPUs: cpuset.New(0),
|
||||||
|
enableCPUManagerOptions: true,
|
||||||
|
options: cpuPolicyOptions,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
updateKubeletConfig(ctx, f, newCfg, true)
|
||||||
|
// 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed
|
||||||
|
// across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation.
|
||||||
|
// So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single
|
||||||
|
// NUMA node. We have to pick cpuRequest such that:
|
||||||
|
// 1. CPURequest > cpusNumPerNUMA
|
||||||
|
// 2. Not occupy all the CPUs on the node ande leave room for reserved CPU
|
||||||
|
// 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes
|
||||||
|
//
|
||||||
|
// In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodeNum) - reservedCPUscount)
|
||||||
|
// Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodeNum
|
||||||
|
|
||||||
|
cpuReq := (cpusNumPerNUMA - smtLevel) * numaNodeNum
|
||||||
|
ctnAttrs := []ctnAttribute{
|
||||||
|
{
|
||||||
|
ctnName: "test-gu-container-distribute-cpus-across-numa",
|
||||||
|
cpuRequest: fmt.Sprintf("%d", cpuReq),
|
||||||
|
cpuLimit: fmt.Sprintf("%d", cpuReq),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
pod := makeCPUManagerPod("test-pod-distribute-cpus-across-numa", ctnAttrs)
|
||||||
|
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||||
|
|
||||||
|
for _, cnt := range pod.Spec.Containers {
|
||||||
|
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
|
||||||
|
|
||||||
|
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
|
||||||
|
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
|
||||||
|
framework.Logf("got pod logs: %v", logs)
|
||||||
|
cpus, err := cpuset.Parse(strings.TrimSpace(logs))
|
||||||
|
framework.ExpectNoError(err, "parsing cpuset from logs for [%s] of pod [%s]", cnt.Name, pod.Name)
|
||||||
|
|
||||||
|
validateSMTAlignment(cpus, smtLevel, pod, &cnt)
|
||||||
|
// We expect a perfectly even spilit i.e. equal distribution across NUMA Node as the CPU Request is 4*smtLevel*numaNodeNum.
|
||||||
|
expectedSpread := cpus.Size() / numaNodeNum
|
||||||
|
gomega.Expect(cpus).To(BeDistributedCPUs(expectedSpread))
|
||||||
|
}
|
||||||
|
deletePodSyncByName(ctx, f, pod.Name)
|
||||||
|
// we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
|
||||||
|
// this is in turn needed because we will have an unavoidable (in the current framework) race with th
|
||||||
|
// reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
|
||||||
|
waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
|
||||||
|
})
|
||||||
|
|
||||||
ginkgo.AfterEach(func(ctx context.Context) {
|
ginkgo.AfterEach(func(ctx context.Context) {
|
||||||
updateKubeletConfig(ctx, f, oldCfg, true)
|
updateKubeletConfig(ctx, f, oldCfg, true)
|
||||||
})
|
})
|
||||||
@ -1260,6 +1418,78 @@ func isSMTAlignmentError(pod *v1.Pod) bool {
|
|||||||
return re.MatchString(pod.Status.Reason)
|
return re.MatchString(pod.Status.Reason)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getNumaNodeCPUs retrieves CPUs for each NUMA node.
|
||||||
|
func getNumaNodeCPUs() (map[int]cpuset.CPUSet, error) {
|
||||||
|
numaNodes := make(map[int]cpuset.CPUSet)
|
||||||
|
nodePaths, err := filepath.Glob("/sys/devices/system/node/node*/cpulist")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, nodePath := range nodePaths {
|
||||||
|
data, err := os.ReadFile(nodePath)
|
||||||
|
framework.ExpectNoError(err, "Error obtaning CPU information from the node")
|
||||||
|
cpuSet := strings.TrimSpace(string(data))
|
||||||
|
cpus, err := cpuset.Parse(cpuSet)
|
||||||
|
framework.ExpectNoError(err, "Error parsing CPUset")
|
||||||
|
|
||||||
|
// Extract node ID from path (e.g., "node0" -> 0)
|
||||||
|
base := filepath.Base(filepath.Dir(nodePath))
|
||||||
|
nodeID, err := strconv.Atoi(strings.TrimPrefix(base, "node"))
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
numaNodes[nodeID] = cpus
|
||||||
|
}
|
||||||
|
|
||||||
|
return numaNodes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// computeNUMADistribution calculates CPU distribution per NUMA node.
|
||||||
|
func computeNUMADistribution(allocatedCPUs cpuset.CPUSet) map[int]int {
|
||||||
|
numaCPUs, err := getNumaNodeCPUs()
|
||||||
|
framework.ExpectNoError(err, "Error retrieving NUMA nodes")
|
||||||
|
framework.Logf("NUMA Node CPUs allocation: %v", numaCPUs)
|
||||||
|
|
||||||
|
distribution := make(map[int]int)
|
||||||
|
for node, cpus := range numaCPUs {
|
||||||
|
distribution[node] = cpus.Intersection(allocatedCPUs).Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
framework.Logf("allocated CPUs %s distribution: %v", allocatedCPUs.String(), distribution)
|
||||||
|
return distribution
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom matcher for checking packed CPUs.
|
||||||
|
func BePackedCPUs() gomegatypes.GomegaMatcher {
|
||||||
|
return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) {
|
||||||
|
distribution := computeNUMADistribution(allocatedCPUs)
|
||||||
|
for _, count := range distribution {
|
||||||
|
// This assumption holds true if there are enough CPUs on a single NUMA node.
|
||||||
|
// We are intentionally limiting the CPU request to 2 to minimize the number
|
||||||
|
// of CPUs required to fulfill this case and therefore maximize the chances
|
||||||
|
// of correctly validating this case.
|
||||||
|
if count == allocatedCPUs.Size() {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}).WithMessage("expected CPUs to be packed")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom matcher for checking distributed CPUs.
|
||||||
|
func BeDistributedCPUs(expectedSpread int) gomegatypes.GomegaMatcher {
|
||||||
|
return gcustom.MakeMatcher(func(allocatedCPUs cpuset.CPUSet) (bool, error) {
|
||||||
|
distribution := computeNUMADistribution(allocatedCPUs)
|
||||||
|
for _, count := range distribution {
|
||||||
|
if count != expectedSpread {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}).WithTemplate("expected CPUs to be evenly distributed across NUMA nodes\nExpected: {{.Data}}\nGot:\n{{.FormattedActual}}\nDistribution: {{.Data}}\n").WithTemplateData(expectedSpread)
|
||||||
|
}
|
||||||
|
|
||||||
// Serial because the test updates kubelet configuration.
|
// Serial because the test updates kubelet configuration.
|
||||||
var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() {
|
var _ = SIGDescribe("CPU Manager", framework.WithSerial(), feature.CPUManager, func() {
|
||||||
f := framework.NewDefaultFramework("cpu-manager-test")
|
f := framework.NewDefaultFramework("cpu-manager-test")
|
||||||
|
Loading…
Reference in New Issue
Block a user