From 449763fb115f375020eb06c517383ed220a8e07e Mon Sep 17 00:00:00 2001
From: Francesco Romani <fromani@redhat.com>
Date: Thu, 24 Jul 2025 12:35:45 +0200
Subject: [PATCH] e2e: podresources: disable memory manager integration

As part of the PR 132028 we added more e2e test coverage to validate
the fix, and check as much as possible there are no regressions.

The issue and the fix become evident largely when inspecting
memory allocation with the Memory Manager static policy enabled.
Quoting the commit message of bc56d0e45a24b24ca9727911a891275b81bae69b
```
The podresources API List implementation uses the internal data of the
resource managers as source of truth.
Looking at the implementation here:
https://github.com/kubernetes/kubernetes/blob/v1.34.0-alpha.0/pkg/kubelet/apis/podresources/server_v1.go#L60
we take care of syncing the device allocation data before querying the
device manager to return its pod->devices assignment.
This is needed because otherwise the device manager (and all the other
resource managers) would do the cleanup asynchronously, so the `List` call
will return incorrect data.

But we don't do this syncing neither for CPUs or for memory,
so when we report these we will get stale data as the issue #132020 demonstrates.

For CPU manager, we however have the reconcile loop which cleans the stale data periodically.
Turns out this timing interplay was actually the reason the existing issue #119423 seemed fixed
(see: #119423 (comment)).
But it's actually timing. If in the reproducer we set the `cpuManagerReconcilePeriod` to a time
very high (>= 5 minutes), then the issue still reproduces against current master branch
(https://github.com/kubernetes/kubernetes/blob/v1.34.0-alpha.0/test/e2e_node/podresources_test.go#L983).
```

The missing actor here is memory manager. Memory manager has no
reconcile loop (implicit fixing the stale data problem) no explicit
synchronization, so it is the unlucky one which reported stale data,
leading to the eventual understanding of the problem.

For this reason it was (and still is) important to exercise it during
the test.
Turns out the test is however wrong, likely because a hidden dependency
between the test expectations and the lane configuration (notably
machine specs), so we disable the memory manager activation for the time
being, until we figure out a safe way to enable it.

Note this significantly weakens the signal for this specific test.

Signed-off-by: Francesco Romani <fromani@redhat.com>
---
 test/e2e_node/podresources_test.go | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/test/e2e_node/podresources_test.go b/test/e2e_node/podresources_test.go
index 3f5caaa867d..97aeef35391 100644
--- a/test/e2e_node/podresources_test.go
+++ b/test/e2e_node/podresources_test.go
@@ -952,25 +952,9 @@ var _ = SIGDescribe("POD Resources API", framework.WithSerial(), feature.PodReso
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
 
 	var reservedSystemCPUs cpuset.CPUSet
-	var memoryQuantity resource.Quantity
-	var defaultKubeParams *memoryManagerKubeletParams
 
 	ginkgo.BeforeEach(func() {
 		reservedSystemCPUs = cpuset.New(1)
-		memoryQuantity = resource.MustParse("1100Mi")
-		defaultKubeParams = &memoryManagerKubeletParams{
-			systemReservedMemory: []kubeletconfig.MemoryReservation{
-				{
-					NumaNode: 0,
-					Limits: v1.ResourceList{
-						resourceMemory: memoryQuantity,
-					},
-				},
-			},
-			systemReserved: map[string]string{resourceMemory: "500Mi"},
-			kubeReserved:   map[string]string{resourceMemory: "500Mi"},
-			evictionHard:   map[string]string{evictionHardMemory: "100Mi"},
-		}
 	})
 
 	ginkgo.Context("with SRIOV devices in the system", func() {
@@ -1253,9 +1237,6 @@ var _ = SIGDescribe("POD Resources API", framework.WithSerial(), feature.PodReso
 		ginkgo.When("listing with restricted list output enabled", func() {
 
 			tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
-				kubeParams := *defaultKubeParams
-				kubeParams.policy = staticPolicy
-				updateKubeletConfigWithMemoryManagerParams(initialConfig, &kubeParams)
 				initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
 				initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 10 * time.Minute} // set it long enough it is practically disabled
 				cpus := reservedSystemCPUs.String()
@@ -1536,9 +1517,6 @@ var _ = SIGDescribe("POD Resources API", framework.WithSerial(), feature.PodReso
 		ginkgo.When("listing with restricted list output disabled for backward compatible defaults", func() {
 
 			tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
-				kubeParams := *defaultKubeParams
-				kubeParams.policy = staticPolicy
-				updateKubeletConfigWithMemoryManagerParams(initialConfig, &kubeParams)
 				initialConfig.CPUManagerPolicy = string(cpumanager.PolicyStatic)
 				initialConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 10 * time.Minute} // set it long enough it is practically disabled
 				cpus := reservedSystemCPUs.String()