test: parity between cluster and node IPPR e2e tests

Some IPPR cluster e2e tests are missing from node e2e tests. This change
brings parity between them.
This commit is contained in:
Anish Shah 2024-10-17 05:09:56 -07:00
parent d67e6545b1
commit 6203006348

View File

@ -54,6 +54,8 @@ const (
Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight"
CPUPeriod string = "100000"
MinContainerRuntimeVersion string = "1.6.9"
fakeExtendedResource = "dummy.com/dummy"
)
var (
@ -64,18 +66,21 @@ var (
)
type ContainerResources struct {
CPUReq string
CPULim string
MemReq string
MemLim string
EphStorReq string
EphStorLim string
CPUReq string
CPULim string
MemReq string
MemLim string
EphStorReq string
EphStorLim string
ExtendedResourceReq string
ExtendedResourceLim string
}
type ContainerAllocations struct {
CPUAlloc string
MemAlloc string
ephStorAlloc string
CPUAlloc string
MemAlloc string
ephStorAlloc string
ExtendedResourceAlloc string
}
type TestContainerInfo struct {
@ -87,6 +92,28 @@ type TestContainerInfo struct {
RestartCount int32
}
type containerPatch struct {
Name string `json:"name"`
Resources struct {
Requests struct {
CPU string `json:"cpu,omitempty"`
Memory string `json:"memory,omitempty"`
EphStor string `json:"ephemeral-storage,omitempty"`
} `json:"requests"`
Limits struct {
CPU string `json:"cpu,omitempty"`
Memory string `json:"memory,omitempty"`
EphStor string `json:"ephemeral-storage,omitempty"`
} `json:"limits"`
} `json:"resources"`
}
type patchSpec struct {
Spec struct {
Containers []containerPatch `json:"containers"`
} `json:"spec"`
}
func supportsInPlacePodVerticalScaling(ctx context.Context, f *framework.Framework) bool {
node := getLocalNode(ctx, f)
re := regexp.MustCompile("containerd://(.*)")
@ -418,6 +445,100 @@ func waitForPodResizeActuation(ctx context.Context, f *framework.Framework, c cl
return resizedPod
}
func genPatchString(containers []TestContainerInfo) (string, error) {
var patch patchSpec
for _, container := range containers {
var cPatch containerPatch
cPatch.Name = container.Name
cPatch.Resources.Requests.CPU = container.Resources.CPUReq
cPatch.Resources.Requests.Memory = container.Resources.MemReq
cPatch.Resources.Limits.CPU = container.Resources.CPULim
cPatch.Resources.Limits.Memory = container.Resources.MemLim
patch.Spec.Containers = append(patch.Spec.Containers, cPatch)
}
patchBytes, err := json.Marshal(patch)
if err != nil {
return "", err
}
return string(patchBytes), nil
}
func patchNode(ctx context.Context, client clientset.Interface, old *v1.Node, new *v1.Node) error {
oldData, err := json.Marshal(old)
if err != nil {
return err
}
newData, err := json.Marshal(new)
if err != nil {
return err
}
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{})
if err != nil {
return fmt.Errorf("failed to create merge patch for node %q: %w", old.Name, err)
}
_, err = client.CoreV1().Nodes().Patch(ctx, old.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}, "status")
return err
}
func addExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string, extendedResourceQuantity resource.Quantity) {
extendedResource := v1.ResourceName(extendedResourceName)
ginkgo.By("Adding a custom resource")
OriginalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{})
framework.ExpectNoError(err)
node := OriginalNode.DeepCopy()
node.Status.Capacity[extendedResource] = extendedResourceQuantity
node.Status.Allocatable[extendedResource] = extendedResourceQuantity
err = patchNode(context.Background(), clientSet, OriginalNode.DeepCopy(), node)
framework.ExpectNoError(err)
gomega.Eventually(func() error {
node, err = clientSet.CoreV1().Nodes().Get(context.Background(), node.Name, metav1.GetOptions{})
framework.ExpectNoError(err)
fakeResourceCapacity, exists := node.Status.Capacity[extendedResource]
if !exists {
return fmt.Errorf("node %s has no %s resource capacity", node.Name, extendedResourceName)
}
if expectedResource := resource.MustParse("123"); fakeResourceCapacity.Cmp(expectedResource) != 0 {
return fmt.Errorf("node %s has resource capacity %s, expected: %s", node.Name, fakeResourceCapacity.String(), expectedResource.String())
}
return nil
}).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred())
}
func removeExtendedResource(clientSet clientset.Interface, nodeName, extendedResourceName string) {
extendedResource := v1.ResourceName(extendedResourceName)
ginkgo.By("Removing a custom resource")
originalNode, err := clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{})
framework.ExpectNoError(err)
node := originalNode.DeepCopy()
delete(node.Status.Capacity, extendedResource)
delete(node.Status.Allocatable, extendedResource)
err = patchNode(context.Background(), clientSet, originalNode.DeepCopy(), node)
framework.ExpectNoError(err)
gomega.Eventually(func() error {
node, err = clientSet.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{})
framework.ExpectNoError(err)
if _, exists := node.Status.Capacity[extendedResource]; exists {
return fmt.Errorf("node %s has resource capacity %s which is expected to be removed", node.Name, extendedResourceName)
}
return nil
}).WithTimeout(30 * time.Second).WithPolling(time.Second).ShouldNot(gomega.HaveOccurred())
}
func doPodResizeTests() {
f := framework.NewDefaultFramework("pod-resize-test")
var podClient *e2epod.PodClient
@ -426,10 +547,11 @@ func doPodResizeTests() {
})
type testCase struct {
name string
containers []TestContainerInfo
patchString string
expected []TestContainerInfo
name string
containers []TestContainerInfo
patchString string
expected []TestContainerInfo
addExtendedResource bool
}
noRestart := v1.NotRequired
@ -1131,6 +1253,31 @@ func doPodResizeTests() {
},
},
},
{
name: "Guaranteed QoS pod, one container - increase CPU & memory with an extended resource",
containers: []TestContainerInfo{
{
Name: "c1",
Resources: &ContainerResources{CPUReq: "100m", CPULim: "100m", MemReq: "200Mi", MemLim: "200Mi",
ExtendedResourceReq: "1", ExtendedResourceLim: "1"},
CPUPolicy: &noRestart,
MemPolicy: &noRestart,
},
},
patchString: `{"spec":{"containers":[
{"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}}
]}}`,
expected: []TestContainerInfo{
{
Name: "c1",
Resources: &ContainerResources{CPUReq: "200m", CPULim: "200m", MemReq: "400Mi", MemLim: "400Mi",
ExtendedResourceReq: "1", ExtendedResourceLim: "1"},
CPUPolicy: &noRestart,
MemPolicy: &noRestart,
},
},
addExtendedResource: true,
},
}
timeouts := framework.NewTimeoutContext()
@ -1153,6 +1300,20 @@ func doPodResizeTests() {
testPod = makeTestPod(f.Namespace.Name, "testpod", tStamp, tc.containers)
testPod = e2epod.MustMixinRestrictedPodSecurity(testPod)
if tc.addExtendedResource {
nodes, err := e2enode.GetReadySchedulableNodes(context.Background(), f.ClientSet)
framework.ExpectNoError(err)
for _, node := range nodes.Items {
addExtendedResource(f.ClientSet, node.Name, fakeExtendedResource, resource.MustParse("123"))
}
defer func() {
for _, node := range nodes.Items {
removeExtendedResource(f.ClientSet, node.Name, fakeExtendedResource)
}
}()
}
ginkgo.By("creating pod")
newPod := podClient.CreateSync(ctx, testPod)
@ -1161,41 +1322,49 @@ func doPodResizeTests() {
ginkgo.By("verifying initial pod resize policy is as expected")
verifyPodResizePolicy(newPod, tc.containers)
err := e2epod.WaitForPodCondition(ctx, f.ClientSet, newPod.Namespace, newPod.Name, "Ready", timeouts.PodStartShort, testutils.PodRunningReady)
framework.ExpectNoError(err, "pod %s/%s did not go running", newPod.Namespace, newPod.Name)
framework.Logf("pod %s/%s running", newPod.Namespace, newPod.Name)
ginkgo.By("verifying initial pod status resources")
verifyPodStatusResources(newPod, tc.containers)
ginkgo.By("patching pod for resize")
patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name,
types.StrategicMergePatchType, []byte(tc.patchString), metav1.PatchOptions{})
framework.ExpectNoError(pErr, "failed to patch pod for resize")
ginkgo.By("verifying initial cgroup config are as expected")
framework.ExpectNoError(verifyPodContainersCgroupValues(ctx, f, newPod, tc.containers))
ginkgo.By("verifying pod patched for resize")
verifyPodResources(patchedPod, tc.expected)
gomega.Eventually(ctx, verifyPodAllocations, timeouts.PodStartShort, timeouts.Poll).
WithArguments(patchedPod, tc.containers).
Should(gomega.BeNil(), "failed to verify Pod allocations for patchedPod")
patchAndVerify := func(patchString string, expectedContainers []TestContainerInfo, initialContainers []TestContainerInfo, opStr string, isRollback bool) {
ginkgo.By(fmt.Sprintf("patching pod for %s", opStr))
patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(context.TODO(), newPod.Name,
types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{})
framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr))
ginkgo.By("waiting for resize to be actuated")
resizedPod := waitForPodResizeActuation(ctx, f, f.ClientSet, podClient, newPod, patchedPod, tc.expected)
ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr))
verifyPodResources(patchedPod, expectedContainers)
gomega.Eventually(ctx, verifyPodAllocations, timeouts.PodStartShort, timeouts.Poll).
WithArguments(patchedPod, initialContainers).
Should(gomega.BeNil(), "failed to verify Pod allocations for patchedPod")
ginkgo.By("verifying pod resources after resize")
verifyPodResources(resizedPod, tc.expected)
ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr))
resizedPod := waitForPodResizeActuation(ctx, f, podClient, newPod, patchedPod, expectedContainers, initialContainers, isRollback)
ginkgo.By("verifying pod allocations after resize")
gomega.Eventually(ctx, verifyPodAllocations, timeouts.PodStartShort, timeouts.Poll).
WithArguments(resizedPod, tc.expected).
Should(gomega.BeNil(), "failed to verify Pod allocations for resizedPod")
// Check cgroup values only for containerd versions before 1.6.9
ginkgo.By(fmt.Sprintf("verifying pod container's cgroup values after %s", opStr))
framework.ExpectNoError(verifyPodContainersCgroupValues(ctx, f, resizedPod, expectedContainers))
ginkgo.By(fmt.Sprintf("verifying pod resources after %s", opStr))
verifyPodResources(resizedPod, expectedContainers)
ginkgo.By(fmt.Sprintf("verifying pod allocations after %s", opStr))
gomega.Eventually(ctx, verifyPodAllocations, timeouts.PodStartShort, timeouts.Poll).
WithArguments(resizedPod, expectedContainers).
Should(gomega.BeNil(), "failed to verify Pod allocations for resizedPod")
}
patchAndVerify(tc.patchString, tc.expected, tc.containers, "resize", false)
rbPatchStr, err := genPatchString(tc.containers)
framework.ExpectNoError(err)
// Resize has been actuated, test rollback
patchAndVerify(rbPatchStr, tc.containers, tc.expected, "rollback", true)
ginkgo.By("deleting pod")
deletePodSyncByName(ctx, f, newPod.Name)
// we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
// this is in turn needed because we will have an unavoidable (in the current framework) race with the
// reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace)
podClient.DeleteSync(ctx, newPod.Name, metav1.DeleteOptions{}, timeouts.PodDelete)
})
}
}
@ -1286,11 +1455,8 @@ func doPodResizeErrorTests() {
WithArguments(patchedPod, tc.expected).
Should(gomega.BeNil(), "failed to verify Pod allocations for patchedPod")
deletePodSyncByName(ctx, f, newPod.Name)
// we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
// this is in turn needed because we will have an unavoidable (in the current framework) race with the
// reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
waitForAllContainerRemoval(ctx, newPod.Name, newPod.Namespace)
ginkgo.By("deleting pod")
podClient.DeleteSync(ctx, newPod.Name, metav1.DeleteOptions{}, timeouts.PodDelete)
})
}
}
@ -1301,7 +1467,7 @@ func doPodResizeErrorTests() {
// b) api-server in services doesn't start with --enable-admission-plugins=ResourceQuota
// and is not possible to start it from TEST_ARGS
// Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests()
// in test/node/pod_resize_test.go
// in test/e2e/node/pod_resize.go
var _ = SIGDescribe("Pod InPlace Resize Container", framework.WithSerial(), feature.InPlacePodVerticalScaling, "[NodeAlphaFeature:InPlacePodVerticalScaling]", func() {
if !podOnCgroupv2Node {