mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 06:27:05 +00:00
Merge pull request #46048 from jayunit100/metrics-definitions
Automatic merge from submit-queue Disambiguate schedule, assume, and bind in functions as well as in Addresses my comments in #45972 about how these metrics need to be disambiguated. - separates schedule, assume, and bind. - renames variables like `dest` to be explicit. - removes the logging statement to occur outside of the timed portion of the metric measurement. Generally makes `sheduleOne` a happy function to read :)
This commit is contained in:
commit
027c31e7dc
@ -34,6 +34,7 @@ import (
|
|||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/util"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/util"
|
||||||
|
|
||||||
|
"fmt"
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -158,17 +159,9 @@ func (sched *Scheduler) Config() *Config {
|
|||||||
return sched.config
|
return sched.config
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sched *Scheduler) scheduleOne() {
|
// schedule implements the scheduling algorithm and returns the suggested host.
|
||||||
pod := sched.config.NextPod()
|
func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
|
||||||
if pod.DeletionTimestamp != nil {
|
host, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
|
||||||
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
|
||||||
glog.V(3).Infof("Skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
glog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
|
|
||||||
start := time.Now()
|
|
||||||
dest, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name)
|
glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name)
|
||||||
sched.config.Error(pod, err)
|
sched.config.Error(pod, err)
|
||||||
@ -179,16 +172,19 @@ func (sched *Scheduler) scheduleOne() {
|
|||||||
Reason: v1.PodReasonUnschedulable,
|
Reason: v1.PodReasonUnschedulable,
|
||||||
Message: err.Error(),
|
Message: err.Error(),
|
||||||
})
|
})
|
||||||
return
|
return "", err
|
||||||
}
|
}
|
||||||
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
return host, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
|
||||||
|
func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
|
||||||
// Optimistically assume that the binding will succeed and send it to apiserver
|
// Optimistically assume that the binding will succeed and send it to apiserver
|
||||||
// in the background.
|
// in the background.
|
||||||
// If the binding fails, scheduler will release resources allocated to assumed pod
|
// If the binding fails, scheduler will release resources allocated to assumed pod
|
||||||
// immediately.
|
// immediately.
|
||||||
assumed := *pod
|
assumed := *pod
|
||||||
assumed.Spec.NodeName = dest
|
assumed.Spec.NodeName = host
|
||||||
if err := sched.config.SchedulerCache.AssumePod(&assumed); err != nil {
|
if err := sched.config.SchedulerCache.AssumePod(&assumed); err != nil {
|
||||||
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
glog.Errorf("scheduler cache AssumePod failed: %v", err)
|
||||||
// TODO: This means that a given pod is already in cache (which means it
|
// TODO: This means that a given pod is already in cache (which means it
|
||||||
@ -197,51 +193,85 @@ func (sched *Scheduler) scheduleOne() {
|
|||||||
// fix the problem, but should reduce its impact), we simply return here,
|
// fix the problem, but should reduce its impact), we simply return here,
|
||||||
// as binding doesn't make sense anyway.
|
// as binding doesn't make sense anyway.
|
||||||
// This should be fixed properly though.
|
// This should be fixed properly though.
|
||||||
return
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optimistically assume that the binding will succeed, so we need to invalidate affected
|
// Optimistically assume that the binding will succeed, so we need to invalidate affected
|
||||||
// predicates in equivalence cache.
|
// predicates in equivalence cache.
|
||||||
// If the binding fails, these invalidated item will not break anything.
|
// If the binding fails, these invalidated item will not break anything.
|
||||||
if sched.config.Ecache != nil {
|
if sched.config.Ecache != nil {
|
||||||
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(pod, dest)
|
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(pod, host)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bind binds a pod to a given node defined in a binding object. We expect this to run asynchronously, so we
|
||||||
|
// handle binding metrics internally.
|
||||||
|
func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
|
||||||
|
bindingStart := time.Now()
|
||||||
|
// If binding succeeded then PodScheduled condition will be updated in apiserver so that
|
||||||
|
// it's atomic with setting host.
|
||||||
|
err := sched.config.Binder.Bind(b)
|
||||||
|
if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
|
||||||
|
return fmt.Errorf("scheduler cache FinishBinding failed: %v", err)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name)
|
||||||
|
if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil {
|
||||||
|
return fmt.Errorf("scheduler cache ForgetPod failed: %v", err)
|
||||||
|
}
|
||||||
|
sched.config.Error(assumed, err)
|
||||||
|
sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
|
||||||
|
sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
|
||||||
|
Type: v1.PodScheduled,
|
||||||
|
Status: v1.ConditionFalse,
|
||||||
|
Reason: "BindingRejected",
|
||||||
|
})
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
|
||||||
|
sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// scheduleOne does the entire scheduling workflow for a single pod. It is serialized on the scheduling algorithm's host fitting.
|
||||||
|
func (sched *Scheduler) scheduleOne() {
|
||||||
|
pod := sched.config.NextPod()
|
||||||
|
if pod.DeletionTimestamp != nil {
|
||||||
|
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
||||||
|
glog.V(3).Infof("Skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
go func() {
|
glog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
|
||||||
defer func() {
|
|
||||||
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
|
|
||||||
}()
|
|
||||||
|
|
||||||
b := &v1.Binding{
|
// Synchronously attempt to find a fit for the pod.
|
||||||
|
start := time.Now()
|
||||||
|
suggestedHost, err := sched.schedule(pod)
|
||||||
|
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
|
||||||
|
// This allows us to keep scheduling without waiting on binding to occur.
|
||||||
|
err = sched.assume(pod, suggestedHost)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
|
||||||
|
go func() {
|
||||||
|
err := sched.bind(pod, &v1.Binding{
|
||||||
ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
|
ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
|
||||||
Target: v1.ObjectReference{
|
Target: v1.ObjectReference{
|
||||||
Kind: "Node",
|
Kind: "Node",
|
||||||
Name: dest,
|
Name: suggestedHost,
|
||||||
},
|
},
|
||||||
}
|
})
|
||||||
|
metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
|
||||||
bindingStart := time.Now()
|
|
||||||
// If binding succeeded then PodScheduled condition will be updated in apiserver so that
|
|
||||||
// it's atomic with setting host.
|
|
||||||
err := sched.config.Binder.Bind(b)
|
|
||||||
if err := sched.config.SchedulerCache.FinishBinding(&assumed); err != nil {
|
|
||||||
glog.Errorf("scheduler cache FinishBinding failed: %v", err)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(1).Infof("Failed to bind pod: %v/%v", pod.Namespace, pod.Name)
|
glog.Errorf("Internal error binding pod: (%v)", err)
|
||||||
if err := sched.config.SchedulerCache.ForgetPod(&assumed); err != nil {
|
|
||||||
glog.Errorf("scheduler cache ForgetPod failed: %v", err)
|
|
||||||
}
|
|
||||||
sched.config.Error(pod, err)
|
|
||||||
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
|
|
||||||
sched.config.PodConditionUpdater.Update(pod, &v1.PodCondition{
|
|
||||||
Type: v1.PodScheduled,
|
|
||||||
Status: v1.ConditionFalse,
|
|
||||||
Reason: "BindingRejected",
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
|
|
||||||
sched.config.Recorder.Eventf(pod, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", pod.Name, dest)
|
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user