mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
Scheduler changes to assume volume and pod together, and then bind
volume and pod asynchronously afterwards. This will also make it easier to migrate to the scheduler framework.
This commit is contained in:
parent
37d46a1e3f
commit
01d83fa104
@ -17,7 +17,6 @@ limitations under the License.
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
@ -184,10 +183,6 @@ func (sched *Scheduler) Run() {
|
||||
return
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
|
||||
go sched.config.VolumeBinder.Run(sched.bindVolumesWorker, sched.config.StopEverything)
|
||||
}
|
||||
|
||||
go wait.Until(sched.scheduleOne, 0, sched.config.StopEverything)
|
||||
}
|
||||
|
||||
@ -265,17 +260,12 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
|
||||
return nodeName, err
|
||||
}
|
||||
|
||||
// assumeAndBindVolumes will update the volume cache and then asynchronously bind volumes if required.
|
||||
//
|
||||
// If volume binding is required, then the bind volumes routine will update the pod to send it back through
|
||||
// the scheduler.
|
||||
//
|
||||
// Otherwise, return nil error and continue to assume the pod.
|
||||
// assumeVolumes will update the volume cache with the chosen bindings
|
||||
//
|
||||
// This function modifies assumed if volume binding is required.
|
||||
func (sched *Scheduler) assumeAndBindVolumes(assumed *v1.Pod, host string) error {
|
||||
func (sched *Scheduler) assumeVolumes(assumed *v1.Pod, host string) (allBound bool, err error) {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
|
||||
allBound, bindingRequired, err := sched.config.VolumeBinder.Binder.AssumePodVolumes(assumed, host)
|
||||
allBound, err = sched.config.VolumeBinder.Binder.AssumePodVolumes(assumed, host)
|
||||
if err != nil {
|
||||
sched.config.Error(assumed, err)
|
||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePodVolumes failed: %v", err)
|
||||
@ -285,76 +275,38 @@ func (sched *Scheduler) assumeAndBindVolumes(assumed *v1.Pod, host string) error
|
||||
Reason: "SchedulerError",
|
||||
Message: err.Error(),
|
||||
})
|
||||
return err
|
||||
}
|
||||
if !allBound {
|
||||
err = fmt.Errorf("Volume binding started, waiting for completion")
|
||||
if bindingRequired {
|
||||
if sched.config.Ecache != nil {
|
||||
invalidPredicates := sets.NewString(predicates.CheckVolumeBindingPred)
|
||||
sched.config.Ecache.InvalidatePredicates(invalidPredicates)
|
||||
}
|
||||
|
||||
// bindVolumesWorker() will update the Pod object to put it back in the scheduler queue
|
||||
sched.config.VolumeBinder.BindQueue.Add(assumed)
|
||||
} else {
|
||||
// We are just waiting for PV controller to finish binding, put it back in the
|
||||
// scheduler queue
|
||||
sched.config.Error(assumed, err)
|
||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "FailedScheduling", "%v", err)
|
||||
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
||||
Type: v1.PodScheduled,
|
||||
Status: v1.ConditionFalse,
|
||||
Reason: "VolumeBindingWaiting",
|
||||
})
|
||||
}
|
||||
return err
|
||||
// Invalidate ecache because assumed volumes could have affected the cached
|
||||
// pvs for other pods
|
||||
if sched.config.Ecache != nil {
|
||||
invalidPredicates := sets.NewString(predicates.CheckVolumeBindingPred)
|
||||
sched.config.Ecache.InvalidatePredicates(invalidPredicates)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return
|
||||
}
|
||||
|
||||
// bindVolumesWorker() processes pods queued in assumeAndBindVolumes() and tries to
|
||||
// make the API update for volume binding.
|
||||
// This function runs forever until the volume BindQueue is closed.
|
||||
func (sched *Scheduler) bindVolumesWorker() {
|
||||
workFunc := func() bool {
|
||||
keyObj, quit := sched.config.VolumeBinder.BindQueue.Get()
|
||||
if quit {
|
||||
return true
|
||||
}
|
||||
defer sched.config.VolumeBinder.BindQueue.Done(keyObj)
|
||||
// bindVolumes will make the API update with the assumed bindings and wait until
|
||||
// the PV controller has completely finished the binding operation.
|
||||
//
|
||||
// If binding errors, times out or gets undone, then an error will be returned to
|
||||
// retry scheduling.
|
||||
func (sched *Scheduler) bindVolumes(assumed *v1.Pod) error {
|
||||
var reason string
|
||||
var eventType string
|
||||
|
||||
assumed, ok := keyObj.(*v1.Pod)
|
||||
if !ok {
|
||||
glog.V(4).Infof("Object is not a *v1.Pod")
|
||||
return false
|
||||
glog.V(5).Infof("Trying to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
||||
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
|
||||
if err != nil {
|
||||
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\": %v", assumed.Namespace, assumed.Name, err)
|
||||
|
||||
// Unassume the Pod and retry scheduling
|
||||
if forgetErr := sched.config.SchedulerCache.ForgetPod(assumed); forgetErr != nil {
|
||||
glog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||
}
|
||||
|
||||
// TODO: add metrics
|
||||
var reason string
|
||||
var eventType string
|
||||
|
||||
glog.V(5).Infof("Trying to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
||||
|
||||
// The Pod is always sent back to the scheduler afterwards.
|
||||
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
|
||||
if err != nil {
|
||||
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\": %v", assumed.Namespace, assumed.Name, err)
|
||||
reason = "VolumeBindingFailed"
|
||||
eventType = v1.EventTypeWarning
|
||||
} else {
|
||||
glog.V(4).Infof("Successfully bound volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
||||
reason = "VolumeBindingWaiting"
|
||||
eventType = v1.EventTypeNormal
|
||||
err = fmt.Errorf("Volume binding started, waiting for completion")
|
||||
}
|
||||
|
||||
// Always fail scheduling regardless of binding success.
|
||||
// The Pod needs to be sent back through the scheduler to:
|
||||
// * Retry volume binding if it fails.
|
||||
// * Retry volume binding if dynamic provisioning fails.
|
||||
// * Bind the Pod to the Node once all volumes are bound.
|
||||
reason = "VolumeBindingFailed"
|
||||
eventType = v1.EventTypeWarning
|
||||
sched.config.Error(assumed, err)
|
||||
sched.config.Recorder.Eventf(assumed, eventType, "FailedScheduling", "%v", err)
|
||||
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
||||
@ -362,15 +314,11 @@ func (sched *Scheduler) bindVolumesWorker() {
|
||||
Status: v1.ConditionFalse,
|
||||
Reason: reason,
|
||||
})
|
||||
return false
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
if quit := workFunc(); quit {
|
||||
glog.V(4).Infof("bindVolumesWorker shutting down")
|
||||
break
|
||||
}
|
||||
}
|
||||
glog.V(5).Infof("Success binding volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
|
||||
@ -478,16 +426,12 @@ func (sched *Scheduler) scheduleOne() {
|
||||
|
||||
// Assume volumes first before assuming the pod.
|
||||
//
|
||||
// If no volumes need binding, then nil is returned, and continue to assume the pod.
|
||||
// If all volumes are completely bound, then allBound is true and binding will be skipped.
|
||||
//
|
||||
// Otherwise, error is returned and volume binding is started asynchronously for all of the pod's volumes.
|
||||
// scheduleOne() returns immediately on error, so that it doesn't continue to assume the pod.
|
||||
//
|
||||
// After the asynchronous volume binding updates are made, it will send the pod back through the scheduler for
|
||||
// subsequent passes until all volumes are fully bound.
|
||||
// Otherwise, binding of volumes is started after the pod is assumed, but before pod binding.
|
||||
//
|
||||
// This function modifies 'assumedPod' if volume binding is required.
|
||||
err = sched.assumeAndBindVolumes(assumedPod, suggestedHost)
|
||||
allBound, err := sched.assumeVolumes(assumedPod, suggestedHost)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -499,6 +443,14 @@ func (sched *Scheduler) scheduleOne() {
|
||||
}
|
||||
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
||||
go func() {
|
||||
// Bind volumes first before Pod
|
||||
if !allBound {
|
||||
err = sched.bindVolumes(assumedPod)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err := sched.bind(assumedPod, &v1.Binding{
|
||||
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
|
||||
Target: v1.ObjectReference{
|
||||
|
@ -707,8 +707,7 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
||||
},
|
||||
expectAssumeCalled: true,
|
||||
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
|
||||
|
||||
eventReason: "Scheduled",
|
||||
eventReason: "Scheduled",
|
||||
},
|
||||
{
|
||||
name: "bound/invalid pv affinity",
|
||||
@ -739,28 +738,15 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
||||
expectError: makePredicateError("1 node(s) didn't find available persistent volumes to bind, 1 node(s) had volume node affinity conflict"),
|
||||
},
|
||||
{
|
||||
name: "unbound/found matches",
|
||||
name: "unbound/found matches/bind succeeds",
|
||||
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
||||
FindUnboundSatsified: true,
|
||||
FindBoundSatsified: true,
|
||||
AssumeBindingRequired: true,
|
||||
FindUnboundSatsified: true,
|
||||
FindBoundSatsified: true,
|
||||
},
|
||||
expectAssumeCalled: true,
|
||||
expectBindCalled: true,
|
||||
eventReason: "FailedScheduling",
|
||||
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
|
||||
},
|
||||
{
|
||||
name: "unbound/found matches/already-bound",
|
||||
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
||||
FindUnboundSatsified: true,
|
||||
FindBoundSatsified: true,
|
||||
AssumeBindingRequired: false,
|
||||
},
|
||||
expectAssumeCalled: true,
|
||||
expectBindCalled: false,
|
||||
eventReason: "FailedScheduling",
|
||||
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
|
||||
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
|
||||
eventReason: "Scheduled",
|
||||
},
|
||||
{
|
||||
name: "predicate error",
|
||||
@ -784,10 +770,9 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
||||
{
|
||||
name: "bind error",
|
||||
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
||||
FindUnboundSatsified: true,
|
||||
FindBoundSatsified: true,
|
||||
AssumeBindingRequired: true,
|
||||
BindErr: bindErr,
|
||||
FindUnboundSatsified: true,
|
||||
FindBoundSatsified: true,
|
||||
BindErr: bindErr,
|
||||
},
|
||||
expectAssumeCalled: true,
|
||||
expectBindCalled: true,
|
||||
@ -814,8 +799,6 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
||||
close(eventChan)
|
||||
})
|
||||
|
||||
go fakeVolumeBinder.Run(s.bindVolumesWorker, stop)
|
||||
|
||||
s.scheduleOne()
|
||||
|
||||
// Wait for pod to succeed or fail scheduling
|
||||
|
@ -8,11 +8,9 @@ go_library(
|
||||
deps = [
|
||||
"//pkg/controller/volume/persistentvolume:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/informers/storage/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -20,19 +20,15 @@ import (
|
||||
"time"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
storageinformers "k8s.io/client-go/informers/storage/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
|
||||
)
|
||||
|
||||
// VolumeBinder sets up the volume binding library and manages
|
||||
// the volume binding operations with a queue.
|
||||
// VolumeBinder sets up the volume binding library
|
||||
type VolumeBinder struct {
|
||||
Binder persistentvolume.SchedulerVolumeBinder
|
||||
BindQueue *workqueue.Type
|
||||
Binder persistentvolume.SchedulerVolumeBinder
|
||||
}
|
||||
|
||||
// NewVolumeBinder sets up the volume binding library and binding queue
|
||||
@ -43,27 +39,18 @@ func NewVolumeBinder(
|
||||
storageClassInformer storageinformers.StorageClassInformer) *VolumeBinder {
|
||||
|
||||
return &VolumeBinder{
|
||||
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer),
|
||||
BindQueue: workqueue.NewNamed("podsToBind"),
|
||||
// TODO: what is a good bind timeout value?
|
||||
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer, 10*time.Minute),
|
||||
}
|
||||
}
|
||||
|
||||
// NewFakeVolumeBinder sets up a fake volume binder and binding queue
|
||||
func NewFakeVolumeBinder(config *persistentvolume.FakeVolumeBinderConfig) *VolumeBinder {
|
||||
return &VolumeBinder{
|
||||
Binder: persistentvolume.NewFakeVolumeBinder(config),
|
||||
BindQueue: workqueue.NewNamed("podsToBind"),
|
||||
Binder: persistentvolume.NewFakeVolumeBinder(config),
|
||||
}
|
||||
}
|
||||
|
||||
// Run starts a goroutine to handle the binding queue with the given function.
|
||||
func (b *VolumeBinder) Run(bindWorkFunc func(), stopCh <-chan struct{}) {
|
||||
go wait.Until(bindWorkFunc, time.Second, stopCh)
|
||||
|
||||
<-stopCh
|
||||
b.BindQueue.ShutDown()
|
||||
}
|
||||
|
||||
// DeletePodBindings will delete the cached volume bindings for the given pod.
|
||||
func (b *VolumeBinder) DeletePodBindings(pod *v1.Pod) {
|
||||
cache := b.Binder.GetBindingsCache()
|
||||
|
Loading…
Reference in New Issue
Block a user