mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-12-07 09:43:15 +00:00
dra: handle scheduled pods in kube-controller-manager
When someone decides that a Pod should definitely run on a specific node, they can create the Pod with spec.nodeName already set. Some custom scheduler might do that. Then kubelet starts to check the pod and (if DRA is enabled) will refuse to run it, either because the claims are still waiting for the first consumer or the pod wasn't added to reservedFor. Both are things the scheduler normally does. Also, if a pod got scheduled while the DRA feature was off in the kube-scheduler, a pod can reach the same state. The resource claim controller can handle these two cases by taking over for the kube-scheduler when nodeName is set. Triggering an allocation is simpler than in the scheduler because all it takes is creating the right PodSchedulingContext with spec.selectedNode set. There's no need to list nodes because that choice was already made, permanently. Adding the pod to reservedFor also isn't hard. What's currently missing is triggering de-allocation of claims to re-allocate them for the desired node. This is not important for claims that get created for the pod from a template and then only get used once, but it might be worthwhile to add de-allocation in the future.
This commit is contained in:
@@ -98,16 +98,31 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
})
|
||||
|
||||
ginkgo.It("must not run a pod if a claim is not reserved for it", func(ctx context.Context) {
|
||||
parameters := b.parameters()
|
||||
claim := b.externalClaim(resourcev1alpha2.AllocationModeImmediate)
|
||||
// Pretend that the resource is allocated and reserved for some other entity.
|
||||
// Until the resourceclaim controller learns to remove reservations for
|
||||
// arbitrary types we can simply fake somthing here.
|
||||
claim := b.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
b.create(ctx, claim)
|
||||
claim, err := f.ClientSet.ResourceV1alpha2().ResourceClaims(f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err, "get claim")
|
||||
claim.Status.Allocation = &resourcev1alpha2.AllocationResult{}
|
||||
claim.Status.DriverName = driver.Name
|
||||
claim.Status.ReservedFor = append(claim.Status.ReservedFor, resourcev1alpha2.ResourceClaimConsumerReference{
|
||||
APIGroup: "example.com",
|
||||
Resource: "some",
|
||||
Name: "thing",
|
||||
UID: "12345",
|
||||
})
|
||||
_, err = f.ClientSet.ResourceV1alpha2().ResourceClaims(f.Namespace.Name).UpdateStatus(ctx, claim, metav1.UpdateOptions{})
|
||||
framework.ExpectNoError(err, "update claim")
|
||||
|
||||
pod := b.podExternal()
|
||||
|
||||
// This bypasses scheduling and therefore the pod gets
|
||||
// to run on the node although it never gets added to
|
||||
// the `ReservedFor` field of the claim.
|
||||
pod.Spec.NodeName = nodes.NodeNames[0]
|
||||
|
||||
b.create(ctx, parameters, claim, pod)
|
||||
b.create(ctx, pod)
|
||||
|
||||
gomega.Consistently(func() error {
|
||||
testPod, err := b.f.ClientSet.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, metav1.GetOptions{})
|
||||
@@ -178,7 +193,9 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
})
|
||||
|
||||
ginkgo.Context("cluster", func() {
|
||||
nodes := NewNodes(f, 1, 4)
|
||||
nodes := NewNodes(f, 1, 1)
|
||||
driver := NewDriver(f, nodes, networkResources)
|
||||
b := newBuilder(f, driver)
|
||||
|
||||
ginkgo.It("truncates the name of a generated resource claim", func(ctx context.Context) {
|
||||
parameters := b.parameters()
|
||||
@@ -208,6 +225,10 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
framework.ExpectNoError(err)
|
||||
framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod))
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.Context("cluster", func() {
|
||||
nodes := NewNodes(f, 1, 4)
|
||||
|
||||
// claimTests tries out several different combinations of pods with
|
||||
// claims, both inline and external.
|
||||
@@ -341,6 +362,32 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
return b.f.ClientSet.ResourceV1alpha2().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
|
||||
}).WithTimeout(f.Timeouts.PodDelete).Should(gomega.HaveField("Status.Allocation", (*resourcev1alpha2.AllocationResult)(nil)))
|
||||
})
|
||||
|
||||
// kube-controller-manager can trigger delayed allocation for pods where the
|
||||
// node name was already selected when creating the pod. For immediate
|
||||
// allocation, the creator has to ensure that the node matches the claims.
|
||||
// This does not work for resource claim templates and only isn't
|
||||
// a problem here because the resource is network-attached and available
|
||||
// on all nodes.
|
||||
|
||||
ginkgo.It("supports scheduled pod referencing inline resource claim", func(ctx context.Context) {
|
||||
parameters := b.parameters()
|
||||
pod, template := b.podInline(allocationMode)
|
||||
pod.Spec.NodeName = nodes.NodeNames[0]
|
||||
b.create(ctx, parameters, pod, template)
|
||||
|
||||
b.testPod(ctx, f.ClientSet, pod)
|
||||
})
|
||||
|
||||
ginkgo.It("supports scheduled pod referencing external resource claim", func(ctx context.Context) {
|
||||
parameters := b.parameters()
|
||||
claim := b.externalClaim(allocationMode)
|
||||
pod := b.podExternal()
|
||||
pod.Spec.NodeName = nodes.NodeNames[0]
|
||||
b.create(ctx, parameters, claim, pod)
|
||||
|
||||
b.testPod(ctx, f.ClientSet, pod)
|
||||
})
|
||||
}
|
||||
|
||||
ginkgo.Context("with delayed allocation and setting ReservedFor", func() {
|
||||
|
||||
@@ -121,9 +121,10 @@ func StartScheduler(ctx context.Context, clientSet clientset.Interface, kubeConf
|
||||
|
||||
func CreateResourceClaimController(ctx context.Context, tb testing.TB, clientSet clientset.Interface, informerFactory informers.SharedInformerFactory) func() {
|
||||
podInformer := informerFactory.Core().V1().Pods()
|
||||
schedulingInformer := informerFactory.Resource().V1alpha2().PodSchedulingContexts()
|
||||
claimInformer := informerFactory.Resource().V1alpha2().ResourceClaims()
|
||||
claimTemplateInformer := informerFactory.Resource().V1alpha2().ResourceClaimTemplates()
|
||||
claimController, err := resourceclaim.NewController(klog.FromContext(ctx), clientSet, podInformer, claimInformer, claimTemplateInformer)
|
||||
claimController, err := resourceclaim.NewController(klog.FromContext(ctx), clientSet, podInformer, schedulingInformer, claimInformer, claimTemplateInformer)
|
||||
if err != nil {
|
||||
tb.Fatalf("Error creating claim controller: %v", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user