DRA scheduler: fix queuing hint support

d66f8f9 added that "plugins have to implement a QueueingHint for Pod/Update
event if the rejection from them could be resolved by updating unscheduled
Pods itself".

This applies to DRA because the name of a generated ResourceClaim must be
recorded in the pod status before the pod can be scheduled.
This commit is contained in:
Patrick Ohly 2024-09-06 16:07:45 +02:00
parent dd4943c831
commit 4a265feb83

View File

@ -399,6 +399,8 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}}, {Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
// Allocation is tracked in ResourceClaims, so any changes may make the pods schedulable. // Allocation is tracked in ResourceClaims, so any changes may make the pods schedulable.
{Event: framework.ClusterEvent{Resource: framework.ResourceClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClaimChange}, {Event: framework.ClusterEvent{Resource: framework.ResourceClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClaimChange},
// Adding the ResourceClaim name to the pod status makes pods waiting for their ResourceClaim schedulable.
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Update}, QueueingHintFn: pl.isSchedulableAfterPodChange},
// A pod might be waiting for a class to get created or modified. // A pod might be waiting for a class to get created or modified.
{Event: framework.ClusterEvent{Resource: framework.DeviceClass, ActionType: framework.Add | framework.Update}}, {Event: framework.ClusterEvent{Resource: framework.DeviceClass, ActionType: framework.Add | framework.Update}},
// Adding or updating a ResourceSlice might make a pod schedulable because new resources became available. // Adding or updating a ResourceSlice might make a pod schedulable because new resources became available.
@ -450,7 +452,10 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
// This is not an unexpected error: we know that // This is not an unexpected error: we know that
// foreachPodResourceClaim only returns errors for "not // foreachPodResourceClaim only returns errors for "not
// schedulable". // schedulable".
logger.V(6).Info("pod is not schedulable after resource claim change", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "reason", err.Error()) if loggerV := logger.V(6); loggerV.Enabled() {
owner := metav1.GetControllerOf(modifiedClaim)
loggerV.Info("pod is not schedulable after resource claim change", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "claimOwner", owner, "reason", err.Error())
}
return framework.QueueSkip, nil return framework.QueueSkip, nil
} }
@ -496,6 +501,33 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
return framework.Queue, nil return framework.Queue, nil
} }
// isSchedulableAfterPodChange is invoked for update pod events reported by
// an informer. It checks whether that change adds the ResourceClaim(s) that the
// pod has been waiting for.
func (pl *dynamicResources) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
_, modifiedPod, err := schedutil.As[*v1.Pod](nil, newObj)
if err != nil {
// Shouldn't happen.
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
}
if pod.UID != modifiedPod.UID {
logger.V(7).Info("pod is not schedulable after change in other pod", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
return framework.QueueSkip, nil
}
if err := pl.foreachPodResourceClaim(modifiedPod, nil); err != nil {
// This is not an unexpected error: we know that
// foreachPodResourceClaim only returns errors for "not
// schedulable".
logger.V(6).Info("pod is not schedulable after being updated", "pod", klog.KObj(pod))
return framework.QueueSkip, nil
}
logger.V(5).Info("pod got updated and is schedulable", "pod", klog.KObj(pod))
return framework.Queue, nil
}
// isSchedulableAfterResourceSliceChange is invoked for add and update slice events reported by // isSchedulableAfterResourceSliceChange is invoked for add and update slice events reported by
// an informer. Such changes can make an unschedulable pod schedulable when the pod requests a device // an informer. Such changes can make an unschedulable pod schedulable when the pod requests a device
// and the change adds a suitable device. // and the change adds a suitable device.