mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 17:30:00 +00:00
scheduler: publish PodSchedulingContext during PreBind
Blocking API calls during a scheduling cycle like the DRA plugin is doing slow down overall scheduling, i.e. also affecting pods which don't use DRA. It is easy to move the blocking calls into a goroutine while the scheduling cycle ends with "pod unschedulable". The hard part is handling an error when those API calls then fail in the background. There is a solution for that (see https://github.com/kubernetes/kubernetes/pull/120963), but it's complex. Instead, publishing the modified PodSchedulingContext can also be done later. In the more common case of a pod which is ready for binding except for its claims, that'll be in PreBind, which runs in a separate goroutine already. In the less common case that a pod cannot be scheduled, that'll be in Unreserve which is still blocking.
This commit is contained in:
parent
5d1509126f
commit
a809a6353b
@ -958,16 +958,15 @@ func (pl *dynamicResources) Reserve(ctx context.Context, cs *framework.CycleStat
|
|||||||
state.podSchedulingState.schedulingCtx.Spec.SelectedNode != nodeName {
|
state.podSchedulingState.schedulingCtx.Spec.SelectedNode != nodeName {
|
||||||
state.podSchedulingState.selectedNode = &nodeName
|
state.podSchedulingState.selectedNode = &nodeName
|
||||||
logger.V(5).Info("start allocation", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
|
logger.V(5).Info("start allocation", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
|
||||||
if err := state.podSchedulingState.publish(ctx, pod, pl.clientset); err != nil {
|
// The actual publish happens in PreBind or Unreserve.
|
||||||
return statusError(logger, err)
|
return nil
|
||||||
}
|
|
||||||
return statusPending(logger, "waiting for resource driver to allocate resource", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// May have been modified earlier in PreScore or above.
|
// May have been modified earlier in PreScore or above.
|
||||||
if err := state.podSchedulingState.publish(ctx, pod, pl.clientset); err != nil {
|
if state.podSchedulingState.isDirty() {
|
||||||
return statusError(logger, err)
|
// The actual publish happens in PreBind or Unreserve.
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// More than one pending claim and not enough information about all of them.
|
// More than one pending claim and not enough information about all of them.
|
||||||
@ -1004,6 +1003,18 @@ func (pl *dynamicResources) Unreserve(ctx context.Context, cs *framework.CycleSt
|
|||||||
}
|
}
|
||||||
|
|
||||||
logger := klog.FromContext(ctx)
|
logger := klog.FromContext(ctx)
|
||||||
|
|
||||||
|
// Was publishing delayed? If yes, do it now.
|
||||||
|
//
|
||||||
|
// The most common scenario is that a different set of potential nodes
|
||||||
|
// was identified. This revised set needs to be published to enable DRA
|
||||||
|
// drivers to provide better guidance for future scheduling attempts.
|
||||||
|
if state.podSchedulingState.isDirty() {
|
||||||
|
if err := state.podSchedulingState.publish(ctx, pod, pl.clientset); err != nil {
|
||||||
|
logger.Error(err, "publish PodSchedulingContext")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, claim := range state.claims {
|
for _, claim := range state.claims {
|
||||||
if claim.Status.Allocation != nil &&
|
if claim.Status.Allocation != nil &&
|
||||||
resourceclaim.IsReservedForPod(pod, claim) {
|
resourceclaim.IsReservedForPod(pod, claim) {
|
||||||
@ -1042,6 +1053,15 @@ func (pl *dynamicResources) PreBind(ctx context.Context, cs *framework.CycleStat
|
|||||||
}
|
}
|
||||||
|
|
||||||
logger := klog.FromContext(ctx)
|
logger := klog.FromContext(ctx)
|
||||||
|
|
||||||
|
// Was publishing delayed? If yes, do it now and then cause binding to stop.
|
||||||
|
if state.podSchedulingState.isDirty() {
|
||||||
|
if err := state.podSchedulingState.publish(ctx, pod, pl.clientset); err != nil {
|
||||||
|
return statusError(logger, err)
|
||||||
|
}
|
||||||
|
return statusPending(logger, "waiting for resource driver", "pod", klog.KObj(pod), "node", klog.ObjectRef{Name: nodeName})
|
||||||
|
}
|
||||||
|
|
||||||
for index, claim := range state.claims {
|
for index, claim := range state.claims {
|
||||||
if !resourceclaim.IsReservedForPod(pod, claim) {
|
if !resourceclaim.IsReservedForPod(pod, claim) {
|
||||||
// The claim might be stale, for example because the claim can get shared and some
|
// The claim might be stale, for example because the claim can get shared and some
|
||||||
|
@ -351,8 +351,8 @@ func TestPlugin(t *testing.T) {
|
|||||||
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim},
|
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim},
|
||||||
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
||||||
want: want{
|
want: want{
|
||||||
reserve: result{
|
prebind: result{
|
||||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver to allocate resource`),
|
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
|
||||||
added: []metav1.Object{schedulingSelectedPotential},
|
added: []metav1.Object{schedulingSelectedPotential},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -365,8 +365,8 @@ func TestPlugin(t *testing.T) {
|
|||||||
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim, pendingDelayedClaim2},
|
claims: []*resourcev1alpha2.ResourceClaim{pendingDelayedClaim, pendingDelayedClaim2},
|
||||||
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
||||||
want: want{
|
want: want{
|
||||||
reserve: result{
|
prebind: result{
|
||||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver to provide information`),
|
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
|
||||||
added: []metav1.Object{schedulingPotential},
|
added: []metav1.Object{schedulingPotential},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -379,8 +379,8 @@ func TestPlugin(t *testing.T) {
|
|||||||
schedulings: []*resourcev1alpha2.PodSchedulingContext{schedulingInfo},
|
schedulings: []*resourcev1alpha2.PodSchedulingContext{schedulingInfo},
|
||||||
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
||||||
want: want{
|
want: want{
|
||||||
reserve: result{
|
prebind: result{
|
||||||
status: framework.NewStatus(framework.Pending, `waiting for resource driver to allocate resource`),
|
status: framework.NewStatus(framework.Pending, `waiting for resource driver`),
|
||||||
changes: change{
|
changes: change{
|
||||||
scheduling: func(in *resourcev1alpha2.PodSchedulingContext) *resourcev1alpha2.PodSchedulingContext {
|
scheduling: func(in *resourcev1alpha2.PodSchedulingContext) *resourcev1alpha2.PodSchedulingContext {
|
||||||
return st.FromPodSchedulingContexts(in).
|
return st.FromPodSchedulingContexts(in).
|
||||||
@ -399,7 +399,7 @@ func TestPlugin(t *testing.T) {
|
|||||||
schedulings: []*resourcev1alpha2.PodSchedulingContext{schedulingInfo},
|
schedulings: []*resourcev1alpha2.PodSchedulingContext{schedulingInfo},
|
||||||
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
classes: []*resourcev1alpha2.ResourceClass{resourceClass},
|
||||||
prepare: prepare{
|
prepare: prepare{
|
||||||
reserve: change{
|
prebind: change{
|
||||||
scheduling: func(in *resourcev1alpha2.PodSchedulingContext) *resourcev1alpha2.PodSchedulingContext {
|
scheduling: func(in *resourcev1alpha2.PodSchedulingContext) *resourcev1alpha2.PodSchedulingContext {
|
||||||
// This does not actually conflict with setting the
|
// This does not actually conflict with setting the
|
||||||
// selected node, but because the plugin is not using
|
// selected node, but because the plugin is not using
|
||||||
@ -411,7 +411,7 @@ func TestPlugin(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
want: want{
|
want: want{
|
||||||
reserve: result{
|
prebind: result{
|
||||||
status: framework.AsStatus(errors.New(`ResourceVersion must match the object that gets updated`)),
|
status: framework.AsStatus(errors.New(`ResourceVersion must match the object that gets updated`)),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user