From d2b803246afb162b72c4d11400f4b26d08cad1a8 Mon Sep 17 00:00:00 2001 From: Gunju Kim Date: Wed, 6 Sep 2023 23:19:50 +0900 Subject: [PATCH] Don't reuse the device allocated to the restartable init container --- pkg/kubelet/cm/devicemanager/manager.go | 10 +- pkg/kubelet/cm/devicemanager/manager_test.go | 127 +++++++++++++++++++ test/e2e_node/device_plugin_test.go | 97 ++++++++++++++ 3 files changed, 233 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go index 3808ccfa0d3..d895c873f9e 100644 --- a/pkg/kubelet/cm/devicemanager/manager.go +++ b/pkg/kubelet/cm/devicemanager/manager.go @@ -44,6 +44,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" + "k8s.io/kubernetes/pkg/kubelet/types" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) @@ -339,7 +340,14 @@ func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error { if err := m.allocateContainerResources(pod, container, m.devicesToReuse[string(pod.UID)]); err != nil { return err } - m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)]) + if !types.IsRestartableInitContainer(&initContainer) { + m.podDevices.addContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)]) + } else { + // If the init container is restartable, we need to keep the + // devices allocated. In other words, we should remove them + // from the devicesToReuse. + m.podDevices.removeContainerAllocatedResources(string(pod.UID), container.Name, m.devicesToReuse[string(pod.UID)]) + } return nil } } diff --git a/pkg/kubelet/cm/devicemanager/manager_test.go b/pkg/kubelet/cm/devicemanager/manager_test.go index 7eb9c53356e..0d0304832b5 100644 --- a/pkg/kubelet/cm/devicemanager/manager_test.go +++ b/pkg/kubelet/cm/devicemanager/manager_test.go @@ -1348,6 +1348,133 @@ func TestInitContainerDeviceAllocation(t *testing.T) { as.Equal(0, normalCont1Devices.Intersection(normalCont2Devices).Len()) } +func TestRestartableInitContainerDeviceAllocation(t *testing.T) { + // Requesting to create a pod that requests resourceName1 in restartable + // init containers and normal containers should succeed with devices + // allocated to init containers not reallocated to normal containers. + oneDevice := resource.NewQuantity(int64(1), resource.DecimalSI) + twoDevice := resource.NewQuantity(int64(2), resource.DecimalSI) + threeDevice := resource.NewQuantity(int64(3), resource.DecimalSI) + res1 := TestResource{ + resourceName: "domain1.com/resource1", + resourceQuantity: *resource.NewQuantity(int64(6), resource.DecimalSI), + devs: checkpoint.DevicesPerNUMA{ + 0: []string{"dev1", "dev2", "dev3", "dev4", "dev5", "dev6"}, + }, + topology: false, + } + testResources := []TestResource{ + res1, + } + as := require.New(t) + podsStub := activePodsStub{ + activePods: []*v1.Pod{}, + } + tmpDir, err := os.MkdirTemp("", "checkpoint") + as.Nil(err) + defer os.RemoveAll(tmpDir) + + testManager, err := getTestManager(tmpDir, podsStub.getActivePods, testResources) + as.Nil(err) + + containerRestartPolicyAlways := v1.ContainerRestartPolicyAlways + podWithPluginResourcesInRestartableInitContainers := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: uuid.NewUUID(), + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: string(uuid.NewUUID()), + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(res1.resourceName): *threeDevice, + }, + }, + }, + { + Name: string(uuid.NewUUID()), + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(res1.resourceName): *oneDevice, + }, + }, + RestartPolicy: &containerRestartPolicyAlways, + }, + { + Name: string(uuid.NewUUID()), + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(res1.resourceName): *twoDevice, + }, + }, + RestartPolicy: &containerRestartPolicyAlways, + }, + }, + Containers: []v1.Container{ + { + Name: string(uuid.NewUUID()), + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(res1.resourceName): *oneDevice, + }, + }, + }, + { + Name: string(uuid.NewUUID()), + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceName(res1.resourceName): *twoDevice, + }, + }, + }, + }, + }, + } + podsStub.updateActivePods([]*v1.Pod{podWithPluginResourcesInRestartableInitContainers}) + for _, container := range podWithPluginResourcesInRestartableInitContainers.Spec.InitContainers { + err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container) + } + for _, container := range podWithPluginResourcesInRestartableInitContainers.Spec.Containers { + err = testManager.Allocate(podWithPluginResourcesInRestartableInitContainers, &container) + } + as.Nil(err) + podUID := string(podWithPluginResourcesInRestartableInitContainers.UID) + regularInitCont1 := podWithPluginResourcesInRestartableInitContainers.Spec.InitContainers[0].Name + restartableInitCont2 := podWithPluginResourcesInRestartableInitContainers.Spec.InitContainers[1].Name + restartableInitCont3 := podWithPluginResourcesInRestartableInitContainers.Spec.InitContainers[2].Name + normalCont1 := podWithPluginResourcesInRestartableInitContainers.Spec.Containers[0].Name + normalCont2 := podWithPluginResourcesInRestartableInitContainers.Spec.Containers[1].Name + regularInitCont1Devices := testManager.podDevices.containerDevices(podUID, regularInitCont1, res1.resourceName) + restartableInitCont2Devices := testManager.podDevices.containerDevices(podUID, restartableInitCont2, res1.resourceName) + restartableInitCont3Devices := testManager.podDevices.containerDevices(podUID, restartableInitCont3, res1.resourceName) + normalCont1Devices := testManager.podDevices.containerDevices(podUID, normalCont1, res1.resourceName) + normalCont2Devices := testManager.podDevices.containerDevices(podUID, normalCont2, res1.resourceName) + as.Equal(3, regularInitCont1Devices.Len()) + as.Equal(1, restartableInitCont2Devices.Len()) + as.Equal(2, restartableInitCont3Devices.Len()) + as.Equal(1, normalCont1Devices.Len()) + as.Equal(2, normalCont2Devices.Len()) + as.True(regularInitCont1Devices.IsSuperset(restartableInitCont2Devices)) + as.True(regularInitCont1Devices.IsSuperset(restartableInitCont3Devices)) + // regularInitCont1Devices are sharable with other containers + + dedicatedContainerDevices := []sets.Set[string]{ + restartableInitCont2Devices, + restartableInitCont3Devices, + normalCont1Devices, + normalCont2Devices, + } + + for i := 0; i < len(dedicatedContainerDevices)-1; i++ { + for j := i + 1; j < len(dedicatedContainerDevices); j++ { + t.Logf("containerDevices[%d] = %v", i, dedicatedContainerDevices[i]) + t.Logf("containerDevices[%d] = %v", j, dedicatedContainerDevices[j]) + as.Empty(dedicatedContainerDevices[i].Intersection(dedicatedContainerDevices[j])) + } + } +} + func TestUpdatePluginResources(t *testing.T) { pod := &v1.Pod{} pod.UID = types.UID("testPod") diff --git a/test/e2e_node/device_plugin_test.go b/test/e2e_node/device_plugin_test.go index 98db64fc40a..561e64af8ef 100644 --- a/test/e2e_node/device_plugin_test.go +++ b/test/e2e_node/device_plugin_test.go @@ -591,6 +591,103 @@ func testDevicePlugin(f *framework.Framework, pluginSockDir string) { framework.Fail(fmt.Sprintf("stale device assignment after pod deletion while kubelet was down allocated=%v error=%v", allocated, err)) } }) + + ginkgo.It("Can schedule a pod with a restartable init container [NodeAlphaFeature:SidecarContainers]", func(ctx context.Context) { + podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" + sleepOneSecond := "1s" + rl := v1.ResourceList{v1.ResourceName(SampleDeviceResourceName): *resource.NewQuantity(1, resource.DecimalSI)} + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "device-plugin-test-" + string(uuid.NewUUID())}, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyAlways, + InitContainers: []v1.Container{ + { + Image: busyboxImage, + Name: "init-1", + Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepOneSecond)}, + Resources: v1.ResourceRequirements{ + Limits: rl, + Requests: rl, + }, + }, + { + Image: busyboxImage, + Name: "restartable-init-2", + Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepIntervalForever)}, + Resources: v1.ResourceRequirements{ + Limits: rl, + Requests: rl, + }, + RestartPolicy: &containerRestartPolicyAlways, + }, + }, + Containers: []v1.Container{{ + Image: busyboxImage, + Name: "regular-1", + Command: []string{"sh", "-c", fmt.Sprintf(podRECMD, sleepIntervalForever)}, + Resources: v1.ResourceRequirements{ + Limits: rl, + Requests: rl, + }, + }}, + }, + } + + pod1 := e2epod.NewPodClient(f).CreateSync(ctx, pod) + deviceIDRE := "stub devices: (Dev-[0-9]+)" + + devID1, err := parseLog(ctx, f, pod1.Name, pod1.Spec.InitContainers[0].Name, deviceIDRE) + framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.InitContainers[0].Name) + gomega.Expect(devID1).To(gomega.Not(gomega.Equal("")), "pod1's init container requested a device but started successfully without") + + devID2, err := parseLog(ctx, f, pod1.Name, pod1.Spec.InitContainers[1].Name, deviceIDRE) + framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.InitContainers[1].Name) + gomega.Expect(devID2).To(gomega.Not(gomega.Equal("")), "pod1's restartable init container requested a device but started successfully without") + + gomega.Expect(devID2).To(gomega.Equal(devID1), "pod1's init container and restartable init container should share the same device") + + devID3, err := parseLog(ctx, f, pod1.Name, pod1.Spec.Containers[0].Name, deviceIDRE) + framework.ExpectNoError(err, "getting logs for pod %q/%q", pod1.Name, pod1.Spec.Containers[0].Name) + gomega.Expect(devID3).To(gomega.Not(gomega.Equal("")), "pod1's regular container requested a device but started successfully without") + + gomega.Expect(devID3).NotTo(gomega.Equal(devID2), "pod1's restartable init container and regular container should not share the same device") + + podResources, err := getV1NodeDevices(ctx) + framework.ExpectNoError(err) + + framework.Logf("PodResources.PodResources:%+v\n", podResources.PodResources) + framework.Logf("len(PodResources.PodResources):%+v", len(podResources.PodResources)) + + gomega.Expect(podResources.PodResources).To(gomega.HaveLen(2)) + + var resourcesForOurPod *kubeletpodresourcesv1.PodResources + for _, res := range podResources.GetPodResources() { + if res.Name == pod1.Name { + resourcesForOurPod = res + } + } + + gomega.Expect(resourcesForOurPod).NotTo(gomega.BeNil()) + + gomega.Expect(resourcesForOurPod.Name).To(gomega.Equal(pod1.Name)) + gomega.Expect(resourcesForOurPod.Namespace).To(gomega.Equal(pod1.Namespace)) + + // Note that the kubelet does not report resources for restartable + // init containers for now. + // See https://github.com/kubernetes/kubernetes/issues/120501. + // TODO: Fix this test to check the resources allocated to the + // restartable init container once the kubelet reports resources + // for restartable init containers. + gomega.Expect(resourcesForOurPod.Containers).To(gomega.HaveLen(1)) + + gomega.Expect(resourcesForOurPod.Containers[0].Name).To(gomega.Equal(pod1.Spec.Containers[0].Name)) + + gomega.Expect(resourcesForOurPod.Containers[0].Devices).To(gomega.HaveLen(1)) + + gomega.Expect(resourcesForOurPod.Containers[0].Devices[0].ResourceName).To(gomega.Equal(SampleDeviceResourceName)) + + gomega.Expect(resourcesForOurPod.Containers[0].Devices[0].DeviceIds).To(gomega.HaveLen(1)) + }) }) }