mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-10 12:32:03 +00:00
Merge pull request #118786 from pohly/dra-test-skip-prepare
dra: kubelet must skip NodePrepareResource if not used by any container
This commit is contained in:
commit
b3d94ae74f
@ -67,23 +67,10 @@ func NewManagerImpl(kubeClient clientset.Interface, stateFileDirectory string) (
|
|||||||
// containerResources on success.
|
// containerResources on success.
|
||||||
func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
|
func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
|
||||||
for i := range pod.Spec.ResourceClaims {
|
for i := range pod.Spec.ResourceClaims {
|
||||||
claimName := resourceclaim.Name(pod, &pod.Spec.ResourceClaims[i])
|
podClaim := &pod.Spec.ResourceClaims[i]
|
||||||
|
claimName := resourceclaim.Name(pod, podClaim)
|
||||||
klog.V(3).InfoS("Processing resource", "claim", claimName, "pod", pod.Name)
|
klog.V(3).InfoS("Processing resource", "claim", claimName, "pod", pod.Name)
|
||||||
|
|
||||||
// Resource is already prepared, add pod UID to it
|
|
||||||
if claimInfo := m.cache.get(claimName, pod.Namespace); claimInfo != nil {
|
|
||||||
// We delay checkpointing of this change until this call
|
|
||||||
// returns successfully. It is OK to do this because we
|
|
||||||
// will only return successfully from this call if the
|
|
||||||
// checkpoint has succeeded. That means if the kubelet is
|
|
||||||
// ever restarted before this checkpoint succeeds, the pod
|
|
||||||
// whose resources are being prepared would never have
|
|
||||||
// started, so it's OK (actually correct) to not include it
|
|
||||||
// in the cache.
|
|
||||||
claimInfo.addPodReference(pod.UID)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Query claim object from the API server
|
// Query claim object from the API server
|
||||||
resourceClaim, err := m.kubeClient.ResourceV1alpha2().ResourceClaims(pod.Namespace).Get(
|
resourceClaim, err := m.kubeClient.ResourceV1alpha2().ResourceClaims(pod.Namespace).Get(
|
||||||
context.TODO(),
|
context.TODO(),
|
||||||
@ -99,6 +86,27 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
|
|||||||
pod.Name, pod.UID, claimName, resourceClaim.UID)
|
pod.Name, pod.UID, claimName, resourceClaim.UID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If no container actually uses the claim, then we don't need
|
||||||
|
// to prepare it.
|
||||||
|
if !claimIsUsedByPod(podClaim, pod) {
|
||||||
|
klog.V(5).InfoS("Skipping unused resource", "claim", claimName, "pod", pod.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is the resource already prepared? Then add the pod UID to it.
|
||||||
|
if claimInfo := m.cache.get(claimName, pod.Namespace); claimInfo != nil {
|
||||||
|
// We delay checkpointing of this change until this call
|
||||||
|
// returns successfully. It is OK to do this because we
|
||||||
|
// will only return successfully from this call if the
|
||||||
|
// checkpoint has succeeded. That means if the kubelet is
|
||||||
|
// ever restarted before this checkpoint succeeds, the pod
|
||||||
|
// whose resources are being prepared would never have
|
||||||
|
// started, so it's OK (actually correct) to not include it
|
||||||
|
// in the cache.
|
||||||
|
claimInfo.addPodReference(pod.UID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Grab the allocation.resourceHandles. If there are no
|
// Grab the allocation.resourceHandles. If there are no
|
||||||
// allocation.resourceHandles, create a single resourceHandle with no
|
// allocation.resourceHandles, create a single resourceHandle with no
|
||||||
// content. This will trigger processing of this claim by a single
|
// content. This will trigger processing of this claim by a single
|
||||||
@ -178,6 +186,34 @@ func (m *ManagerImpl) PrepareResources(pod *v1.Pod) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func claimIsUsedByPod(podClaim *v1.PodResourceClaim, pod *v1.Pod) bool {
|
||||||
|
if claimIsUsedByContainers(podClaim, pod.Spec.InitContainers) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if claimIsUsedByContainers(podClaim, pod.Spec.Containers) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func claimIsUsedByContainers(podClaim *v1.PodResourceClaim, containers []v1.Container) bool {
|
||||||
|
for i := range containers {
|
||||||
|
if claimIsUsedByContainer(podClaim, &containers[i]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func claimIsUsedByContainer(podClaim *v1.PodResourceClaim, container *v1.Container) bool {
|
||||||
|
for _, c := range container.Resources.Claims {
|
||||||
|
if c.Name == podClaim.Name {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// GetResources gets a ContainerInfo object from the claimInfo cache.
|
// GetResources gets a ContainerInfo object from the claimInfo cache.
|
||||||
// This information is used by the caller to update a container config.
|
// This information is used by the caller to update a container config.
|
||||||
func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error) {
|
func (m *ManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*ContainerInfo, error) {
|
||||||
|
@ -61,11 +61,11 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
|||||||
nodes := NewNodes(f, 1, 1)
|
nodes := NewNodes(f, 1, 1)
|
||||||
driver := NewDriver(f, nodes, networkResources) // All tests get their own driver instance.
|
driver := NewDriver(f, nodes, networkResources) // All tests get their own driver instance.
|
||||||
b := newBuilder(f, driver)
|
b := newBuilder(f, driver)
|
||||||
|
|
||||||
ginkgo.It("registers plugin", func() {
|
ginkgo.It("registers plugin", func() {
|
||||||
ginkgo.By("the driver is running")
|
ginkgo.By("the driver is running")
|
||||||
})
|
})
|
||||||
|
|
||||||
// This test does not pass at the moment because kubelet doesn't retry.
|
|
||||||
ginkgo.It("must retry NodePrepareResource", func(ctx context.Context) {
|
ginkgo.It("must retry NodePrepareResource", func(ctx context.Context) {
|
||||||
// We have exactly one host.
|
// We have exactly one host.
|
||||||
m := MethodInstance{driver.Nodenames()[0], NodePrepareResourceMethod}
|
m := MethodInstance{driver.Nodenames()[0], NodePrepareResourceMethod}
|
||||||
@ -95,6 +95,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
|||||||
framework.Fail("NodePrepareResource should have been called again")
|
framework.Fail("NodePrepareResource should have been called again")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.It("must not run a pod if a claim is not reserved for it", func(ctx context.Context) {
|
ginkgo.It("must not run a pod if a claim is not reserved for it", func(ctx context.Context) {
|
||||||
parameters := b.parameters()
|
parameters := b.parameters()
|
||||||
claim := b.externalClaim(resourcev1alpha2.AllocationModeImmediate)
|
claim := b.externalClaim(resourcev1alpha2.AllocationModeImmediate)
|
||||||
@ -118,6 +119,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
|||||||
return nil
|
return nil
|
||||||
}, 20*time.Second, 200*time.Millisecond).Should(gomega.BeNil())
|
}, 20*time.Second, 200*time.Millisecond).Should(gomega.BeNil())
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.It("must unprepare resources for force-deleted pod", func(ctx context.Context) {
|
ginkgo.It("must unprepare resources for force-deleted pod", func(ctx context.Context) {
|
||||||
parameters := b.parameters()
|
parameters := b.parameters()
|
||||||
claim := b.externalClaim(resourcev1alpha2.AllocationModeImmediate)
|
claim := b.externalClaim(resourcev1alpha2.AllocationModeImmediate)
|
||||||
@ -140,6 +142,19 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
|||||||
gomega.Eventually(plugin.GetPreparedResources).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "prepared claims on host %s", host)
|
gomega.Eventually(plugin.GetPreparedResources).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "prepared claims on host %s", host)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
ginkgo.It("must skip NodePrepareResource if not used by any container", func(ctx context.Context) {
|
||||||
|
parameters := b.parameters()
|
||||||
|
pod, template := b.podInline(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||||
|
for i := range pod.Spec.Containers {
|
||||||
|
pod.Spec.Containers[i].Resources.Claims = nil
|
||||||
|
}
|
||||||
|
b.create(ctx, parameters, pod, template)
|
||||||
|
framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod), "start pod")
|
||||||
|
for host, plugin := range b.driver.Nodes {
|
||||||
|
gomega.Expect(plugin.GetPreparedResources()).Should(gomega.BeEmpty(), "not claims should be prepared on host %s while pod is running", host)
|
||||||
|
}
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.Context("driver", func() {
|
ginkgo.Context("driver", func() {
|
||||||
|
Loading…
Reference in New Issue
Block a user