e2e: node: cpumgr: check CPU allocatable for CFS quota test

add (admittedly pretty crude) CPU allocatable check.
A more incisive refactoring is needed, but we need
to unbreak CI first, so this seems the minimal decently clean test.

Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
Francesco Romani 2025-02-14 19:37:44 +01:00
parent 844c2ef39d
commit 323410664c

View File

@ -592,12 +592,14 @@ func runMultipleCPUContainersGuPod(ctx context.Context, f *framework.Framework)
waitForContainerRemoval(ctx, pod.Spec.Containers[1].Name, pod.Name, pod.Namespace)
}
func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQuotaWithExclusiveCPUs bool) {
func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQuotaWithExclusiveCPUs bool, cpuAlloc int64) {
var err error
var ctnAttrs []ctnAttribute
var pod1, pod2, pod3 *v1.Pod
podsToClean := make(map[string]*v1.Pod) // pod.UID -> pod
framework.Logf("runCfsQuotaGuPods: disableQuota=%v, CPU Allocatable=%v", disabledCPUQuotaWithExclusiveCPUs, cpuAlloc)
deleteTestPod := func(pod *v1.Pod) {
// waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a
// 'deadline expired' message and the cleanup aborts, which we don't want.
@ -619,6 +621,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
deletePodsAsync(ctx2, f, podsToClean)
})
podCFSCheckCommand := []string{"sh", "-c", `cat $(find /sysfscgroup | grep "$(cat /podinfo/uid | sed 's/-/_/g').slice/cpu.max$") && sleep 1d`}
cfsCheckCommand := []string{"sh", "-c", "cat /sys/fs/cgroup/cpu.max && sleep 1d"}
defaultPeriod := "100000"
@ -688,6 +691,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
pod3.Spec.Containers[0].Name, pod3.Name)
deleteTestPod(pod3)
if cpuAlloc >= 2 {
ctnAttrs = []ctnAttribute{
{
ctnName: "gu-container-non-int-values",
@ -736,8 +740,6 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
},
}
podCFSCheckCommand := []string{"sh", "-c", `cat $(find /sysfscgroup | grep "$(cat /podinfo/uid | sed 's/-/_/g').slice/cpu.max$") && sleep 1d`}
pod5 := makeCPUManagerPod("gu-pod5", ctnAttrs)
pod5.Spec.Containers[0].Command = podCFSCheckCommand
pod5 = e2epod.NewPodClient(f).CreateSync(ctx, pod5)
@ -756,6 +758,9 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod5.Name, pod5.Spec.Containers[0].Name, expCFSQuotaRegex)
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod5.Spec.Containers[0].Name, pod5.Name)
deleteTestPod(pod5)
} else {
ginkgo.By(fmt.Sprintf("some cases SKIPPED - requests at least %d allocatable cores, got %d", 2, cpuAlloc))
}
ctnAttrs = []ctnAttribute{
{
@ -936,6 +941,10 @@ func runCPUManagerTests(f *framework.Framework) {
if !IsCgroup2UnifiedMode() {
e2eskipper.Skipf("Skipping since CgroupV2 not used")
}
_, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
if cpuAlloc < 1 { // save expensive kubelet restart
e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc)
}
newCfg := configureCPUManagerInKubelet(oldCfg,
&cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
@ -944,13 +953,19 @@ func runCPUManagerTests(f *framework.Framework) {
},
)
updateKubeletConfig(ctx, f, newCfg, true)
runCfsQuotaGuPods(ctx, f, true)
_, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU
runCfsQuotaGuPods(ctx, f, true, cpuAlloc)
})
ginkgo.It("should keep enforcing the CFS quota for containers with static CPUs assigned and feature gate disabled", func(ctx context.Context) {
if !IsCgroup2UnifiedMode() {
e2eskipper.Skipf("Skipping since CgroupV2 not used")
}
_, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f)
if cpuAlloc < 1 { // save expensive kubelet restart
e2eskipper.Skipf("Skipping since not enough allocatable CPU got %d required 1", cpuAlloc)
}
newCfg := configureCPUManagerInKubelet(oldCfg,
&cpuManagerKubeletArguments{
policyName: string(cpumanager.PolicyStatic),
@ -960,7 +975,9 @@ func runCPUManagerTests(f *framework.Framework) {
)
updateKubeletConfig(ctx, f, newCfg, true)
runCfsQuotaGuPods(ctx, f, false)
_, cpuAlloc, _ = getLocalNodeCPUDetails(ctx, f) // check again after we reserved 1 full CPU. Some tests require > 1 exclusive CPU
runCfsQuotaGuPods(ctx, f, false, cpuAlloc)
})
f.It("should not reuse CPUs of restartable init containers", feature.SidecarContainers, func(ctx context.Context) {