mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
Scheduler changes to assume volume and pod together, and then bind
volume and pod asynchronously afterwards. This will also make it easier to migrate to the scheduler framework.
This commit is contained in:
parent
37d46a1e3f
commit
01d83fa104
@ -17,7 +17,6 @@ limitations under the License.
|
|||||||
package scheduler
|
package scheduler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
@ -184,10 +183,6 @@ func (sched *Scheduler) Run() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
|
|
||||||
go sched.config.VolumeBinder.Run(sched.bindVolumesWorker, sched.config.StopEverything)
|
|
||||||
}
|
|
||||||
|
|
||||||
go wait.Until(sched.scheduleOne, 0, sched.config.StopEverything)
|
go wait.Until(sched.scheduleOne, 0, sched.config.StopEverything)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,17 +260,12 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
|
|||||||
return nodeName, err
|
return nodeName, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// assumeAndBindVolumes will update the volume cache and then asynchronously bind volumes if required.
|
// assumeVolumes will update the volume cache with the chosen bindings
|
||||||
//
|
|
||||||
// If volume binding is required, then the bind volumes routine will update the pod to send it back through
|
|
||||||
// the scheduler.
|
|
||||||
//
|
|
||||||
// Otherwise, return nil error and continue to assume the pod.
|
|
||||||
//
|
//
|
||||||
// This function modifies assumed if volume binding is required.
|
// This function modifies assumed if volume binding is required.
|
||||||
func (sched *Scheduler) assumeAndBindVolumes(assumed *v1.Pod, host string) error {
|
func (sched *Scheduler) assumeVolumes(assumed *v1.Pod, host string) (allBound bool, err error) {
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.VolumeScheduling) {
|
||||||
allBound, bindingRequired, err := sched.config.VolumeBinder.Binder.AssumePodVolumes(assumed, host)
|
allBound, err = sched.config.VolumeBinder.Binder.AssumePodVolumes(assumed, host)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
sched.config.Error(assumed, err)
|
sched.config.Error(assumed, err)
|
||||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePodVolumes failed: %v", err)
|
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "AssumePodVolumes failed: %v", err)
|
||||||
@ -285,76 +275,38 @@ func (sched *Scheduler) assumeAndBindVolumes(assumed *v1.Pod, host string) error
|
|||||||
Reason: "SchedulerError",
|
Reason: "SchedulerError",
|
||||||
Message: err.Error(),
|
Message: err.Error(),
|
||||||
})
|
})
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
if !allBound {
|
// Invalidate ecache because assumed volumes could have affected the cached
|
||||||
err = fmt.Errorf("Volume binding started, waiting for completion")
|
// pvs for other pods
|
||||||
if bindingRequired {
|
|
||||||
if sched.config.Ecache != nil {
|
if sched.config.Ecache != nil {
|
||||||
invalidPredicates := sets.NewString(predicates.CheckVolumeBindingPred)
|
invalidPredicates := sets.NewString(predicates.CheckVolumeBindingPred)
|
||||||
sched.config.Ecache.InvalidatePredicates(invalidPredicates)
|
sched.config.Ecache.InvalidatePredicates(invalidPredicates)
|
||||||
}
|
}
|
||||||
|
|
||||||
// bindVolumesWorker() will update the Pod object to put it back in the scheduler queue
|
|
||||||
sched.config.VolumeBinder.BindQueue.Add(assumed)
|
|
||||||
} else {
|
|
||||||
// We are just waiting for PV controller to finish binding, put it back in the
|
|
||||||
// scheduler queue
|
|
||||||
sched.config.Error(assumed, err)
|
|
||||||
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "FailedScheduling", "%v", err)
|
|
||||||
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
|
||||||
Type: v1.PodScheduled,
|
|
||||||
Status: v1.ConditionFalse,
|
|
||||||
Reason: "VolumeBindingWaiting",
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return err
|
return
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// bindVolumesWorker() processes pods queued in assumeAndBindVolumes() and tries to
|
// bindVolumes will make the API update with the assumed bindings and wait until
|
||||||
// make the API update for volume binding.
|
// the PV controller has completely finished the binding operation.
|
||||||
// This function runs forever until the volume BindQueue is closed.
|
//
|
||||||
func (sched *Scheduler) bindVolumesWorker() {
|
// If binding errors, times out or gets undone, then an error will be returned to
|
||||||
workFunc := func() bool {
|
// retry scheduling.
|
||||||
keyObj, quit := sched.config.VolumeBinder.BindQueue.Get()
|
func (sched *Scheduler) bindVolumes(assumed *v1.Pod) error {
|
||||||
if quit {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
defer sched.config.VolumeBinder.BindQueue.Done(keyObj)
|
|
||||||
|
|
||||||
assumed, ok := keyObj.(*v1.Pod)
|
|
||||||
if !ok {
|
|
||||||
glog.V(4).Infof("Object is not a *v1.Pod")
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: add metrics
|
|
||||||
var reason string
|
var reason string
|
||||||
var eventType string
|
var eventType string
|
||||||
|
|
||||||
glog.V(5).Infof("Trying to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
glog.V(5).Infof("Trying to bind volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
||||||
|
|
||||||
// The Pod is always sent back to the scheduler afterwards.
|
|
||||||
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
|
err := sched.config.VolumeBinder.Binder.BindPodVolumes(assumed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\": %v", assumed.Namespace, assumed.Name, err)
|
glog.V(1).Infof("Failed to bind volumes for pod \"%v/%v\": %v", assumed.Namespace, assumed.Name, err)
|
||||||
reason = "VolumeBindingFailed"
|
|
||||||
eventType = v1.EventTypeWarning
|
// Unassume the Pod and retry scheduling
|
||||||
} else {
|
if forgetErr := sched.config.SchedulerCache.ForgetPod(assumed); forgetErr != nil {
|
||||||
glog.V(4).Infof("Successfully bound volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
glog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||||
reason = "VolumeBindingWaiting"
|
|
||||||
eventType = v1.EventTypeNormal
|
|
||||||
err = fmt.Errorf("Volume binding started, waiting for completion")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always fail scheduling regardless of binding success.
|
reason = "VolumeBindingFailed"
|
||||||
// The Pod needs to be sent back through the scheduler to:
|
eventType = v1.EventTypeWarning
|
||||||
// * Retry volume binding if it fails.
|
|
||||||
// * Retry volume binding if dynamic provisioning fails.
|
|
||||||
// * Bind the Pod to the Node once all volumes are bound.
|
|
||||||
sched.config.Error(assumed, err)
|
sched.config.Error(assumed, err)
|
||||||
sched.config.Recorder.Eventf(assumed, eventType, "FailedScheduling", "%v", err)
|
sched.config.Recorder.Eventf(assumed, eventType, "FailedScheduling", "%v", err)
|
||||||
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
||||||
@ -362,15 +314,11 @@ func (sched *Scheduler) bindVolumesWorker() {
|
|||||||
Status: v1.ConditionFalse,
|
Status: v1.ConditionFalse,
|
||||||
Reason: reason,
|
Reason: reason,
|
||||||
})
|
})
|
||||||
return false
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
glog.V(5).Infof("Success binding volumes for pod \"%v/%v\"", assumed.Namespace, assumed.Name)
|
||||||
if quit := workFunc(); quit {
|
return nil
|
||||||
glog.V(4).Infof("bindVolumesWorker shutting down")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
|
// assume signals to the cache that a pod is already in the cache, so that binding can be asynchronous.
|
||||||
@ -478,16 +426,12 @@ func (sched *Scheduler) scheduleOne() {
|
|||||||
|
|
||||||
// Assume volumes first before assuming the pod.
|
// Assume volumes first before assuming the pod.
|
||||||
//
|
//
|
||||||
// If no volumes need binding, then nil is returned, and continue to assume the pod.
|
// If all volumes are completely bound, then allBound is true and binding will be skipped.
|
||||||
//
|
//
|
||||||
// Otherwise, error is returned and volume binding is started asynchronously for all of the pod's volumes.
|
// Otherwise, binding of volumes is started after the pod is assumed, but before pod binding.
|
||||||
// scheduleOne() returns immediately on error, so that it doesn't continue to assume the pod.
|
|
||||||
//
|
|
||||||
// After the asynchronous volume binding updates are made, it will send the pod back through the scheduler for
|
|
||||||
// subsequent passes until all volumes are fully bound.
|
|
||||||
//
|
//
|
||||||
// This function modifies 'assumedPod' if volume binding is required.
|
// This function modifies 'assumedPod' if volume binding is required.
|
||||||
err = sched.assumeAndBindVolumes(assumedPod, suggestedHost)
|
allBound, err := sched.assumeVolumes(assumedPod, suggestedHost)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -499,6 +443,14 @@ func (sched *Scheduler) scheduleOne() {
|
|||||||
}
|
}
|
||||||
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
||||||
go func() {
|
go func() {
|
||||||
|
// Bind volumes first before Pod
|
||||||
|
if !allBound {
|
||||||
|
err = sched.bindVolumes(assumedPod)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err := sched.bind(assumedPod, &v1.Binding{
|
err := sched.bind(assumedPod, &v1.Binding{
|
||||||
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
|
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
|
||||||
Target: v1.ObjectReference{
|
Target: v1.ObjectReference{
|
||||||
|
@ -707,7 +707,6 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expectAssumeCalled: true,
|
expectAssumeCalled: true,
|
||||||
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
|
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
|
||||||
|
|
||||||
eventReason: "Scheduled",
|
eventReason: "Scheduled",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -739,28 +738,15 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
|||||||
expectError: makePredicateError("1 node(s) didn't find available persistent volumes to bind, 1 node(s) had volume node affinity conflict"),
|
expectError: makePredicateError("1 node(s) didn't find available persistent volumes to bind, 1 node(s) had volume node affinity conflict"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "unbound/found matches",
|
name: "unbound/found matches/bind succeeds",
|
||||||
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
||||||
FindUnboundSatsified: true,
|
FindUnboundSatsified: true,
|
||||||
FindBoundSatsified: true,
|
FindBoundSatsified: true,
|
||||||
AssumeBindingRequired: true,
|
|
||||||
},
|
},
|
||||||
expectAssumeCalled: true,
|
expectAssumeCalled: true,
|
||||||
expectBindCalled: true,
|
expectBindCalled: true,
|
||||||
eventReason: "FailedScheduling",
|
expectPodBind: &v1.Binding{ObjectMeta: metav1.ObjectMeta{Name: "foo", UID: types.UID("foo")}, Target: v1.ObjectReference{Kind: "Node", Name: "machine1"}},
|
||||||
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
|
eventReason: "Scheduled",
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "unbound/found matches/already-bound",
|
|
||||||
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
|
||||||
FindUnboundSatsified: true,
|
|
||||||
FindBoundSatsified: true,
|
|
||||||
AssumeBindingRequired: false,
|
|
||||||
},
|
|
||||||
expectAssumeCalled: true,
|
|
||||||
expectBindCalled: false,
|
|
||||||
eventReason: "FailedScheduling",
|
|
||||||
expectError: fmt.Errorf("Volume binding started, waiting for completion"),
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "predicate error",
|
name: "predicate error",
|
||||||
@ -786,7 +772,6 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
|||||||
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
volumeBinderConfig: &persistentvolume.FakeVolumeBinderConfig{
|
||||||
FindUnboundSatsified: true,
|
FindUnboundSatsified: true,
|
||||||
FindBoundSatsified: true,
|
FindBoundSatsified: true,
|
||||||
AssumeBindingRequired: true,
|
|
||||||
BindErr: bindErr,
|
BindErr: bindErr,
|
||||||
},
|
},
|
||||||
expectAssumeCalled: true,
|
expectAssumeCalled: true,
|
||||||
@ -814,8 +799,6 @@ func TestSchedulerWithVolumeBinding(t *testing.T) {
|
|||||||
close(eventChan)
|
close(eventChan)
|
||||||
})
|
})
|
||||||
|
|
||||||
go fakeVolumeBinder.Run(s.bindVolumesWorker, stop)
|
|
||||||
|
|
||||||
s.scheduleOne()
|
s.scheduleOne()
|
||||||
|
|
||||||
// Wait for pod to succeed or fail scheduling
|
// Wait for pod to succeed or fail scheduling
|
||||||
|
@ -8,11 +8,9 @@ go_library(
|
|||||||
deps = [
|
deps = [
|
||||||
"//pkg/controller/volume/persistentvolume:go_default_library",
|
"//pkg/controller/volume/persistentvolume:go_default_library",
|
||||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
|
||||||
"//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
|
"//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
|
||||||
"//staging/src/k8s.io/client-go/informers/storage/v1:go_default_library",
|
"//staging/src/k8s.io/client-go/informers/storage/v1:go_default_library",
|
||||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||||
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,19 +20,15 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
|
||||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||||
storageinformers "k8s.io/client-go/informers/storage/v1"
|
storageinformers "k8s.io/client-go/informers/storage/v1"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/util/workqueue"
|
|
||||||
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
|
"k8s.io/kubernetes/pkg/controller/volume/persistentvolume"
|
||||||
)
|
)
|
||||||
|
|
||||||
// VolumeBinder sets up the volume binding library and manages
|
// VolumeBinder sets up the volume binding library
|
||||||
// the volume binding operations with a queue.
|
|
||||||
type VolumeBinder struct {
|
type VolumeBinder struct {
|
||||||
Binder persistentvolume.SchedulerVolumeBinder
|
Binder persistentvolume.SchedulerVolumeBinder
|
||||||
BindQueue *workqueue.Type
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewVolumeBinder sets up the volume binding library and binding queue
|
// NewVolumeBinder sets up the volume binding library and binding queue
|
||||||
@ -43,8 +39,8 @@ func NewVolumeBinder(
|
|||||||
storageClassInformer storageinformers.StorageClassInformer) *VolumeBinder {
|
storageClassInformer storageinformers.StorageClassInformer) *VolumeBinder {
|
||||||
|
|
||||||
return &VolumeBinder{
|
return &VolumeBinder{
|
||||||
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer),
|
// TODO: what is a good bind timeout value?
|
||||||
BindQueue: workqueue.NewNamed("podsToBind"),
|
Binder: persistentvolume.NewVolumeBinder(client, pvcInformer, pvInformer, storageClassInformer, 10*time.Minute),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,18 +48,9 @@ func NewVolumeBinder(
|
|||||||
func NewFakeVolumeBinder(config *persistentvolume.FakeVolumeBinderConfig) *VolumeBinder {
|
func NewFakeVolumeBinder(config *persistentvolume.FakeVolumeBinderConfig) *VolumeBinder {
|
||||||
return &VolumeBinder{
|
return &VolumeBinder{
|
||||||
Binder: persistentvolume.NewFakeVolumeBinder(config),
|
Binder: persistentvolume.NewFakeVolumeBinder(config),
|
||||||
BindQueue: workqueue.NewNamed("podsToBind"),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run starts a goroutine to handle the binding queue with the given function.
|
|
||||||
func (b *VolumeBinder) Run(bindWorkFunc func(), stopCh <-chan struct{}) {
|
|
||||||
go wait.Until(bindWorkFunc, time.Second, stopCh)
|
|
||||||
|
|
||||||
<-stopCh
|
|
||||||
b.BindQueue.ShutDown()
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeletePodBindings will delete the cached volume bindings for the given pod.
|
// DeletePodBindings will delete the cached volume bindings for the given pod.
|
||||||
func (b *VolumeBinder) DeletePodBindings(pod *v1.Pod) {
|
func (b *VolumeBinder) DeletePodBindings(pod *v1.Pod) {
|
||||||
cache := b.Binder.GetBindingsCache()
|
cache := b.Binder.GetBindingsCache()
|
||||||
|
Loading…
Reference in New Issue
Block a user