From c74d045c4b7825acd277e3178802cfe4d8a6be82 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 1 Sep 2023 08:30:13 +0200 Subject: [PATCH] scheduler_perf: show name of one pending pod in error message If pods get stuck, then giving the name of one makes it possible to search for it in the log output. Without the name it's hard to figure out which pods got stuck. --- .../scheduler_perf/scheduler_perf_test.go | 17 +++++++++++++-- test/integration/scheduler_perf/util.go | 21 ++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/test/integration/scheduler_perf/scheduler_perf_test.go b/test/integration/scheduler_perf/scheduler_perf_test.go index 58b671b9c63..6332a77e2a3 100644 --- a/test/integration/scheduler_perf/scheduler_perf_test.go +++ b/test/integration/scheduler_perf/scheduler_perf_test.go @@ -50,6 +50,7 @@ import ( "k8s.io/component-base/featuregate" featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/apis/config" "k8s.io/kubernetes/pkg/scheduler/apis/config/scheme" "k8s.io/kubernetes/pkg/scheduler/apis/config/validation" @@ -1312,13 +1313,15 @@ func createPods(ctx context.Context, tb testing.TB, namespace string, cpo *creat // namespace are scheduled. Times out after 10 minutes because even at the // lowest observed QPS of ~10 pods/sec, a 5000-node test should complete. func waitUntilPodsScheduledInNamespace(ctx context.Context, tb testing.TB, podInformer coreinformers.PodInformer, namespace string, wantCount int) error { - return wait.PollUntilContextTimeout(ctx, 1*time.Second, 10*time.Minute, true, func(ctx context.Context) (bool, error) { + var pendingPod *v1.Pod + + err := wait.PollUntilContextTimeout(ctx, 1*time.Second, 10*time.Minute, true, func(ctx context.Context) (bool, error) { select { case <-ctx.Done(): return true, ctx.Err() default: } - scheduled, err := getScheduledPods(podInformer, namespace) + scheduled, unscheduled, err := getScheduledPods(podInformer, namespace) if err != nil { return false, err } @@ -1327,8 +1330,18 @@ func waitUntilPodsScheduledInNamespace(ctx context.Context, tb testing.TB, podIn return true, nil } tb.Logf("namespace: %s, pods: want %d, got %d", namespace, wantCount, len(scheduled)) + if len(unscheduled) > 0 { + pendingPod = unscheduled[0] + } else { + pendingPod = nil + } return false, nil }) + + if err != nil && pendingPod != nil { + err = fmt.Errorf("at least pod %s is not scheduled: %v", klog.KObj(pendingPod), err) + } + return err } // waitUntilPodsScheduled blocks until the all pods in the given namespaces are diff --git a/test/integration/scheduler_perf/util.go b/test/integration/scheduler_perf/util.go index 327f1dd4310..0199dadd134 100644 --- a/test/integration/scheduler_perf/util.go +++ b/test/integration/scheduler_perf/util.go @@ -147,23 +147,28 @@ func mustSetupCluster(ctx context.Context, tb testing.TB, config *config.KubeSch return informerFactory, client, dynClient } -// Returns the list of scheduled pods in the specified namespaces. +// Returns the list of scheduled and unscheduled pods in the specified namespaces. // Note that no namespaces specified matches all namespaces. -func getScheduledPods(podInformer coreinformers.PodInformer, namespaces ...string) ([]*v1.Pod, error) { +func getScheduledPods(podInformer coreinformers.PodInformer, namespaces ...string) ([]*v1.Pod, []*v1.Pod, error) { pods, err := podInformer.Lister().List(labels.Everything()) if err != nil { - return nil, err + return nil, nil, err } s := sets.New(namespaces...) scheduled := make([]*v1.Pod, 0, len(pods)) + unscheduled := make([]*v1.Pod, 0, len(pods)) for i := range pods { pod := pods[i] - if len(pod.Spec.NodeName) > 0 && (len(s) == 0 || s.Has(pod.Namespace)) { - scheduled = append(scheduled, pod) + if len(s) == 0 || s.Has(pod.Namespace) { + if len(pod.Spec.NodeName) > 0 { + scheduled = append(scheduled, pod) + } else { + unscheduled = append(unscheduled, pod) + } } } - return scheduled, nil + return scheduled, unscheduled, nil } // DataItem is the data point. @@ -355,7 +360,7 @@ func newThroughputCollector(tb testing.TB, podInformer coreinformers.PodInformer } func (tc *throughputCollector) run(ctx context.Context) { - podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...) + podsScheduled, _, err := getScheduledPods(tc.podInformer, tc.namespaces...) if err != nil { klog.Fatalf("%v", err) } @@ -372,7 +377,7 @@ func (tc *throughputCollector) run(ctx context.Context) { return case <-ticker.C: now := time.Now() - podsScheduled, err := getScheduledPods(tc.podInformer, tc.namespaces...) + podsScheduled, _, err := getScheduledPods(tc.podInformer, tc.namespaces...) if err != nil { klog.Fatalf("%v", err) }