mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
scheduler: add ResourceClass events
When filtering fails because a ResourceClass is missing, we can treat the pod as "unschedulable" as long as we then also register a cluster event that wakes up the pod. This is more efficient than periodically retrying.
This commit is contained in:
parent
5269e76990
commit
c682d2b8c5
@ -458,6 +458,15 @@ func addAllEventHandlers(
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
}
|
||||
case framework.ResourceClass:
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
|
||||
if handlerRegistration, err = informerFactory.Resource().V1alpha2().ResourceClasses().Informer().AddEventHandler(
|
||||
buildEvtResHandler(at, framework.ResourceClass, "ResourceClass"),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
handlers = append(handlers, handlerRegistration)
|
||||
}
|
||||
case framework.StorageClass:
|
||||
if at&framework.Add != 0 {
|
||||
if handlerRegistration, err = informerFactory.Storage().V1().StorageClasses().Informer().AddEventHandler(
|
||||
|
@ -272,6 +272,8 @@ func (pl *dynamicResources) EventsToRegister() []framework.ClusterEventWithHint
|
||||
// A resource might depend on node labels for topology filtering.
|
||||
// A new or updated node may make pods schedulable.
|
||||
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel}},
|
||||
// A pod might be waiting for a class to get created or modified.
|
||||
{Event: framework.ClusterEvent{Resource: framework.ResourceClass, ActionType: framework.Add | framework.Update}},
|
||||
}
|
||||
return events
|
||||
}
|
||||
@ -595,7 +597,13 @@ func (pl *dynamicResources) PreFilter(ctx context.Context, state *framework.Cycl
|
||||
// about the specific pod.
|
||||
class, err := pl.classLister.Get(claim.Spec.ResourceClassName)
|
||||
if err != nil {
|
||||
// If the class does not exist, then allocation cannot proceed.
|
||||
// If the class cannot be retrieved, allocation cannot proceed.
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Here we mark the pod as "unschedulable", so it'll sleep in
|
||||
// the unscheduleable queue until a ResourceClass event occurs.
|
||||
return nil, statusUnschedulable(logger, fmt.Sprintf("resource class %s does not exist", claim.Spec.ResourceClassName))
|
||||
}
|
||||
// Other error, retry with backoff.
|
||||
return nil, statusError(logger, fmt.Errorf("look up resource class: %v", err))
|
||||
}
|
||||
if class.SuitableNodes != nil {
|
||||
|
@ -331,7 +331,7 @@ func TestPlugin(t *testing.T) {
|
||||
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim},
|
||||
want: want{
|
||||
prefilter: result{
|
||||
status: framework.AsStatus(fmt.Errorf(`look up resource class: resourceclass.resource.k8s.io "%s" not found`, className)),
|
||||
status: framework.NewStatus(framework.UnschedulableAndUnresolvable, fmt.Sprintf("resource class %s does not exist", className)),
|
||||
},
|
||||
postfilter: result{
|
||||
status: framework.NewStatus(framework.Unschedulable, `no new claims to deallocate`),
|
||||
|
@ -71,6 +71,7 @@ const (
|
||||
PersistentVolumeClaim GVK = "PersistentVolumeClaim"
|
||||
PodSchedulingContext GVK = "PodSchedulingContext"
|
||||
ResourceClaim GVK = "ResourceClaim"
|
||||
ResourceClass GVK = "ResourceClass"
|
||||
StorageClass GVK = "storage.k8s.io/StorageClass"
|
||||
CSINode GVK = "storage.k8s.io/CSINode"
|
||||
CSIDriver GVK = "storage.k8s.io/CSIDriver"
|
||||
|
@ -208,6 +208,68 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
b.testPod(ctx, f.ClientSet, pod)
|
||||
})
|
||||
|
||||
ginkgo.It("retries pod scheduling after creating resource class", func(ctx context.Context) {
|
||||
parameters := b.parameters()
|
||||
pod, template := b.podInline(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
class, err := f.ClientSet.ResourceV1alpha2().ResourceClasses().Get(ctx, template.Spec.Spec.ResourceClassName, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
template.Spec.Spec.ResourceClassName += "-b"
|
||||
b.create(ctx, parameters, template, pod)
|
||||
|
||||
// There's no way to be sure that the scheduler has checked the pod.
|
||||
// But if we sleep for a short while, it's likely and if there are any
|
||||
// bugs that prevent the scheduler from handling creation of the class,
|
||||
// those bugs should show up as test flakes.
|
||||
time.Sleep(time.Second)
|
||||
|
||||
class.UID = ""
|
||||
class.ResourceVersion = ""
|
||||
class.Name = template.Spec.Spec.ResourceClassName
|
||||
b.create(ctx, class)
|
||||
|
||||
b.testPod(ctx, f.ClientSet, pod)
|
||||
})
|
||||
|
||||
ginkgo.It("retries pod scheduling after updating resource class", func(ctx context.Context) {
|
||||
parameters := b.parameters()
|
||||
pod, template := b.podInline(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
|
||||
// First modify the class so that it matches no nodes.
|
||||
class, err := f.ClientSet.ResourceV1alpha2().ResourceClasses().Get(ctx, template.Spec.Spec.ResourceClassName, metav1.GetOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
class.SuitableNodes = &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "no-such-label",
|
||||
Operator: v1.NodeSelectorOpIn,
|
||||
Values: []string{"no-such-value"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
class, err = f.ClientSet.ResourceV1alpha2().ResourceClasses().Update(ctx, class, metav1.UpdateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// Now create the pod.
|
||||
b.create(ctx, parameters, template, pod)
|
||||
|
||||
// There's no way to be sure that the scheduler has checked the pod.
|
||||
// But if we sleep for a short while, it's likely and if there are any
|
||||
// bugs that prevent the scheduler from handling updates of the class,
|
||||
// those bugs should show up as test flakes.
|
||||
time.Sleep(time.Second)
|
||||
|
||||
// Unblock the pod.
|
||||
class.SuitableNodes = nil
|
||||
_, err = f.ClientSet.ResourceV1alpha2().ResourceClasses().Update(ctx, class, metav1.UpdateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
b.testPod(ctx, f.ClientSet, pod)
|
||||
})
|
||||
|
||||
ginkgo.It("runs a pod without a generated resource claim", func(ctx context.Context) {
|
||||
pod, _ /* template */ := b.podInline(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
created := b.create(ctx, pod)
|
||||
|
Loading…
Reference in New Issue
Block a user