Merge pull request #46048 from jayunit100/metrics-definitions

Automatic merge from submit-queue Disambiguate schedule, assume, and bind in functions as well as in Addresses my comments in #45972 about how these metrics need to be disambiguated. - separates schedule, assume, and bind. - renames variables like `dest` to be explicit. - removes the logging statement to occur outside of the timed portion of the metric measurement. Generally makes `sheduleOne` a happy function to read :)
2025-07-29 06:27:05 +00:00 · 2017-05-22 10:39:00 -07:00 · 2017-05-22 10:39:00 -07:00 · 027c31e7dc
commit 027c31e7dc
parent 16b5093feb ecb962e658
1 changed files with 75 additions and 45 deletions
--- a/plugin/pkg/scheduler/scheduler.go
+++ b/plugin/pkg/scheduler/scheduler.go
@ -34,6 +34,7 @@ import (
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/util"
 	"fmt"
 	"github.com/golang/glog"
 )
@ -158,17 +159,9 @@ func (sched *Scheduler) Config() *Config {
 	return sched.config
 }
-func (sched *Scheduler) scheduleOne() {
+// schedule implements the scheduling algorithm and returns the suggested host.
-	pod := sched.config.NextPod()
+func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
-	if pod.DeletionTimestamp != nil {
+	host, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
 		sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
 		glog.V(3).Infof("Skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
 		return
 	}
 	glog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
 	start := time.Now()
 	dest, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
 	if err != nil {
 		glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name)
 		sched.config.Error(pod, err)
@ -179,16 +172,19 @@ func (sched *Scheduler) scheduleOne() {
 			Reason:  v1.PodReasonUnschedulable,
 			Message: err.Error(),
 		})
-		return
+		return "", err
 	}
-	metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
+	return host, err
 }
 // assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
 func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
 	// Optimistically assume that the binding will succeed and send it to apiserver
 	// in the background.
 	// If the binding fails, scheduler will release resources allocated to assumed pod
 	// immediately.
 	assumed := *pod
-	assumed.Spec.NodeName = dest
+	assumed.Spec.NodeName = host
 	if err := sched.config.SchedulerCache.AssumePod(&assumed); err != nil {
 		glog.Errorf("scheduler cache AssumePod failed: %v", err)
 		// TODO: This means that a given pod is already in cache (which means it
@ -197,51 +193,85 @@ func (sched *Scheduler) scheduleOne() {
 		// fix the problem, but should reduce its impact), we simply return here,
 		// as binding doesn't make sense anyway.
 		// This should be fixed properly though.
-		return
+		return err
 	}
 	// Optimistically assume that the binding will succeed, so we need to invalidate affected
 	// predicates in equivalence cache.
 	// If the binding fails, these invalidated item will not break anything.
 	if sched.config.Ecache != nil {
-		sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(pod, dest)
+		sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(pod, host)
 	}
 	return nil
 }
 // bind binds a pod to a given node defined in a binding object.  We expect this to run asynchronously, so we
 // handle binding metrics internally.
 func (sched *Scheduler) bind(assumed *v1.Pod, b *v1.Binding) error {
 	bindingStart := time.Now()
 	// If binding succeeded then PodScheduled condition will be updated in apiserver so that
 	// it's atomic with setting host.
 	err := sched.config.Binder.Bind(b)
 	if err := sched.config.SchedulerCache.FinishBinding(assumed); err != nil {
 		return fmt.Errorf("scheduler cache FinishBinding failed: %v", err)
 	}
 	if err != nil {
 		glog.V(1).Infof("Failed to bind pod: %v/%v", assumed.Namespace, assumed.Name)
 		if err := sched.config.SchedulerCache.ForgetPod(assumed); err != nil {
 			return fmt.Errorf("scheduler cache ForgetPod failed: %v", err)
 		}
 		sched.config.Error(assumed, err)
 		sched.config.Recorder.Eventf(assumed, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
 		sched.config.PodConditionUpdater.Update(assumed, &v1.PodCondition{
 			Type:   v1.PodScheduled,
 			Status: v1.ConditionFalse,
 			Reason: "BindingRejected",
 		})
 		return err
 	}
 	metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
 	sched.config.Recorder.Eventf(assumed, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", assumed.Name, b.Target.Name)
 	return nil
 }
 // scheduleOne does the entire scheduling workflow for a single pod.  It is serialized on the scheduling algorithm's host fitting.
 func (sched *Scheduler) scheduleOne() {
 	pod := sched.config.NextPod()
 	if pod.DeletionTimestamp != nil {
 		sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
 		glog.V(3).Infof("Skip schedule deleting pod: %v/%v", pod.Namespace, pod.Name)
 		return
 	}
-	go func() {
+	glog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
 		defer func() {
 			metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
 		}()
-		b := &v1.Binding{
+	// Synchronously attempt to find a fit for the pod.
 	start := time.Now()
 	suggestedHost, err := sched.schedule(pod)
 	metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
 	if err != nil {
 		return
 	}
 	// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
 	// This allows us to keep scheduling without waiting on binding to occur.
 	err = sched.assume(pod, suggestedHost)
 	if err != nil {
 		return
 	}
 	// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
 	go func() {
 		err := sched.bind(pod, &v1.Binding{
 			ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
 			Target: v1.ObjectReference{
 				Kind: "Node",
-				Name: dest,
+				Name: suggestedHost,
 			},
-		}
+		})
-
+		metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
 		bindingStart := time.Now()
 		// If binding succeeded then PodScheduled condition will be updated in apiserver so that
 		// it's atomic with setting host.
 		err := sched.config.Binder.Bind(b)
 		if err := sched.config.SchedulerCache.FinishBinding(&assumed); err != nil {
 			glog.Errorf("scheduler cache FinishBinding failed: %v", err)
 		}
 		if err != nil {
-			glog.V(1).Infof("Failed to bind pod: %v/%v", pod.Namespace, pod.Name)
+			glog.Errorf("Internal error binding pod: (%v)", err)
 			if err := sched.config.SchedulerCache.ForgetPod(&assumed); err != nil {
 				glog.Errorf("scheduler cache ForgetPod failed: %v", err)
 			}
 			sched.config.Error(pod, err)
 			sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
 			sched.config.PodConditionUpdater.Update(pod, &v1.PodCondition{
 				Type:   v1.PodScheduled,
 				Status: v1.ConditionFalse,
 				Reason: "BindingRejected",
 			})
 			return
 		}
 		metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
 		sched.config.Recorder.Eventf(pod, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", pod.Name, dest)
 	}()
 }