From 32c14da90253507ec266a3992a4758d8e5c3d125 Mon Sep 17 00:00:00 2001 From: Jian Zeng Date: Thu, 13 May 2021 23:44:26 +0800 Subject: [PATCH 1/2] fix(metrics-proxy): wait for enough component pods to show up first Signed-off-by: Jian Zeng --- test/e2e/framework/metrics/metrics_proxy.go | 56 +++++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/test/e2e/framework/metrics/metrics_proxy.go b/test/e2e/framework/metrics/metrics_proxy.go index f096238cf8f..ae46fff4ea3 100644 --- a/test/e2e/framework/metrics/metrics_proxy.go +++ b/test/e2e/framework/metrics/metrics_proxy.go @@ -34,38 +34,50 @@ import ( ) type componentInfo struct { + Name string Port int IP string } // SetupMetricsProxy creates a nginx Pod to expose metrics from the secure port of kube-scheduler and kube-controller-manager in tests. func SetupMetricsProxy(c clientset.Interface) error { - podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{}) - if err != nil { - return err - } var infos []componentInfo - for _, pod := range podList.Items { - switch { - case strings.HasPrefix(pod.Name, "kube-scheduler-"): - infos = append(infos, componentInfo{ - Port: kubeSchedulerPort, - IP: pod.Status.PodIP, - }) - case strings.HasPrefix(pod.Name, "kube-controller-manager-"): - infos = append(infos, componentInfo{ - Port: kubeControllerManagerPort, - IP: pod.Status.PodIP, - }) + // The component pods might take some time to show up. + err := wait.PollImmediate(time.Second*5, time.Minute*5, func() (bool, error) { + podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + return false, fmt.Errorf("list pods in ns %s: %w", metav1.NamespaceSystem, err) } - if len(infos) == 2 { - break + var foundComponents []componentInfo + for _, pod := range podList.Items { + switch { + case strings.HasPrefix(pod.Name, "kube-scheduler-"): + foundComponents = append(foundComponents, componentInfo{ + Name: pod.Name, + Port: kubeSchedulerPort, + IP: pod.Status.PodIP, + }) + case strings.HasPrefix(pod.Name, "kube-controller-manager-"): + foundComponents = append(foundComponents, componentInfo{ + Name: pod.Name, + Port: kubeControllerManagerPort, + IP: pod.Status.PodIP, + }) + } } + if len(foundComponents) != 2 { + klog.Infof("Only %d components found. Will retry.", len(foundComponents)) + klog.Infof("Found components: %v", foundComponents) + return false, nil + } + infos = foundComponents + return true, nil + }) + if err != nil { + return fmt.Errorf("missing component pods: %w", err) } - if len(infos) == 0 { - klog.Warningf("Can't find any pods in namespace %s to grab metrics from", metav1.NamespaceSystem) - return nil - } + + klog.Infof("Found components: %v", infos) const name = metricsProxyPod _, err = c.CoreV1().ServiceAccounts(metav1.NamespaceSystem).Create(context.TODO(), &v1.ServiceAccount{ From 781c65a40c2c8fa9625f66feab3328fde0f0bd1d Mon Sep 17 00:00:00 2001 From: Jian Zeng Date: Thu, 27 May 2021 16:06:34 +0800 Subject: [PATCH 2/2] fix: skip pods with empty ip --- test/e2e/framework/metrics/metrics_proxy.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/e2e/framework/metrics/metrics_proxy.go b/test/e2e/framework/metrics/metrics_proxy.go index ae46fff4ea3..afd04db61ab 100644 --- a/test/e2e/framework/metrics/metrics_proxy.go +++ b/test/e2e/framework/metrics/metrics_proxy.go @@ -50,6 +50,9 @@ func SetupMetricsProxy(c clientset.Interface) error { } var foundComponents []componentInfo for _, pod := range podList.Items { + if len(pod.Status.PodIP) == 0 { + continue + } switch { case strings.HasPrefix(pod.Name, "kube-scheduler-"): foundComponents = append(foundComponents, componentInfo{