From 902c711fb49f9f4951074aa61bb012b68fa0fe4f Mon Sep 17 00:00:00 2001 From: Heba Elayoty Date: Mon, 15 May 2023 12:20:44 -0700 Subject: [PATCH] Unset gated pod info timestamp in addToActiveQ Signed-off-by: Heba Elayoty --- pkg/scheduler/framework/types.go | 2 +- .../internal/queue/scheduling_queue.go | 6 +++- .../internal/queue/scheduling_queue_test.go | 36 +++++++++++++++++-- pkg/scheduler/schedule_one.go | 5 +-- 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index 6f9e9b295da..87a5cf6dd82 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -106,7 +106,7 @@ type QueuedPodInfo struct { // back to the queue multiple times before it's successfully scheduled. // It shouldn't be updated once initialized. It's used to record the e2e scheduling // latency for a pod. - InitialAttemptTimestamp time.Time + InitialAttemptTimestamp *time.Time // If a Pod failed in a scheduling cycle, record the plugin names it failed by. UnschedulablePlugins sets.Set[string] // Whether the Pod is scheduling gated (by PreEnqueuePlugins) or not. diff --git a/pkg/scheduler/internal/queue/scheduling_queue.go b/pkg/scheduler/internal/queue/scheduling_queue.go index 15eb581c15f..b864a464249 100644 --- a/pkg/scheduler/internal/queue/scheduling_queue.go +++ b/pkg/scheduler/internal/queue/scheduling_queue.go @@ -388,6 +388,10 @@ func (p *PriorityQueue) addToActiveQ(logger klog.Logger, pInfo *framework.Queued p.unschedulablePods.addOrUpdate(pInfo) return false, nil } + if pInfo.InitialAttemptTimestamp == nil { + now := p.clock.Now() + pInfo.InitialAttemptTimestamp = &now + } if err := p.activeQ.Add(pInfo); err != nil { logger.Error(err, "Error adding pod to the active queue", "pod", klog.KObj(pInfo.Pod)) return false, err @@ -903,7 +907,7 @@ func (p *PriorityQueue) newQueuedPodInfo(pod *v1.Pod, plugins ...string) *framew return &framework.QueuedPodInfo{ PodInfo: podInfo, Timestamp: now, - InitialAttemptTimestamp: now, + InitialAttemptTimestamp: nil, UnschedulablePlugins: sets.New(plugins...), } } diff --git a/pkg/scheduler/internal/queue/scheduling_queue_test.go b/pkg/scheduler/internal/queue/scheduling_queue_test.go index 08f482b1646..6f3a6883ddc 100644 --- a/pkg/scheduler/internal/queue/scheduling_queue_test.go +++ b/pkg/scheduler/internal/queue/scheduling_queue_test.go @@ -1896,6 +1896,38 @@ func TestPerPodSchedulingMetrics(t *testing.T) { t.Fatalf("Failed to pop a pod %v", err) } checkPerPodSchedulingMetrics("Attempt twice with update", t, pInfo, 2, timestamp) + + // Case 4: A gated pod is created and scheduled after lifting gate. The queue operations are + // Add gated pod -> check unschedulablePods -> lift gate & update pod -> Pop. + c = testingclock.NewFakeClock(timestamp) + // Create a queue with PreEnqueuePlugin + m := map[string][]framework.PreEnqueuePlugin{"": {&preEnqueuePlugin{allowlists: []string{"foo"}}}} + queue = NewTestQueue(ctx, newDefaultQueueSort(), WithClock(c), WithPreEnqueuePluginMap(m), WithPluginMetricsSamplePercent(0)) + + // Create a pod without PreEnqueuePlugin label. + gatedPod := st.MakePod().Name("gated-test-pod").Namespace("test-ns").UID("test-uid").Obj() + err = queue.Add(logger, gatedPod) + if err != nil { + t.Fatalf("Failed to add a pod %v", err) + } + // Check pod is added to the unschedulablePods queue. + if getUnschedulablePod(queue, gatedPod) != gatedPod { + t.Errorf("Pod %v was not found in the unschedulablePods.", gatedPod.Name) + } + // Override clock to get different InitialAttemptTimestamp + c.Step(1 * time.Minute) + + // Update pod with the required label to get it out of unschedulablePods queue. + updateGatedPod := gatedPod.DeepCopy() + updateGatedPod.Labels = map[string]string{"foo": ""} + queue.Update(logger, gatedPod, updateGatedPod) + + pInfo, err = queue.Pop() + if err != nil { + t.Fatalf("Failed to pop a pod %v", err) + } + + checkPerPodSchedulingMetrics("Attempt once/gated", t, pInfo, 1, timestamp.Add(1*time.Minute)) } func TestIncomingPodsMetrics(t *testing.T) { @@ -1992,8 +2024,8 @@ func checkPerPodSchedulingMetrics(name string, t *testing.T, pInfo *framework.Qu if pInfo.Attempts != wantAttempts { t.Errorf("[%s] Pod schedule attempt unexpected, got %v, want %v", name, pInfo.Attempts, wantAttempts) } - if pInfo.InitialAttemptTimestamp != wantInitialAttemptTs { - t.Errorf("[%s] Pod initial schedule attempt timestamp unexpected, got %v, want %v", name, pInfo.InitialAttemptTimestamp, wantInitialAttemptTs) + if *pInfo.InitialAttemptTimestamp != wantInitialAttemptTs { + t.Errorf("[%s] Pod initial schedule attempt timestamp unexpected, got %v, want %v", name, *pInfo.InitialAttemptTimestamp, wantInitialAttemptTs) } } diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index 6c0b69b789e..9b594a2c927 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -258,8 +258,9 @@ func (sched *Scheduler) bindingCycle( logger.V(2).Info("Successfully bound pod to node", "pod", klog.KObj(assumedPod), "node", scheduleResult.SuggestedHost, "evaluatedNodes", scheduleResult.EvaluatedNodes, "feasibleNodes", scheduleResult.FeasibleNodes) metrics.PodScheduled(fwk.ProfileName(), metrics.SinceInSeconds(start)) metrics.PodSchedulingAttempts.Observe(float64(assumedPodInfo.Attempts)) - metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(assumedPodInfo.InitialAttemptTimestamp)) - + if assumedPodInfo.InitialAttemptTimestamp != nil { + metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp)) + } // Run "postbind" plugins. fwk.RunPostBindPlugins(ctx, state, assumedPod, scheduleResult.SuggestedHost)