From f5d915b594771f416227373af509729e358cd951 Mon Sep 17 00:00:00 2001 From: Swati Sehgal Date: Wed, 20 Sep 2023 13:11:13 +0100 Subject: [PATCH] topology-mgr: metrics: Deflake Topology Manager metrics e2e tests On local execution of Topology Manager metrics tests, the tests pass rate was 100%. Yet, we can see that the Topology Manager metrics tests are failing in upstream CI consistently: https://testgrid.k8s.io/sig-node-presubmits#pr-kubelet-serial-gce-e2e-topology-manager. From the logs, it was identified that these failures are because of timeouts, so we are increasing the default timeout as well as polling interval frequency of obtaining KubeletMetrics to deflake this test. We have noticed a similar flake in case of CPU manager metrics tests as well: https://prow.k8s.io/view/gs/kubernetes-jenkins/pr-logs/directory/pull-kubernetes-node-kubelet-serial-cpu-manager/1701615009836044288. Once it is confirmed that the issue is resolved for Topology Manager test, we will be fix this for CPU Manager as well in a follow-up PR. Signed-off-by: Swati Sehgal --- test/e2e_node/topology_manager_metrics_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/e2e_node/topology_manager_metrics_test.go b/test/e2e_node/topology_manager_metrics_test.go index ab1c71d697d..8e6480ec123 100644 --- a/test/e2e_node/topology_manager_metrics_test.go +++ b/test/e2e_node/topology_manager_metrics_test.go @@ -92,9 +92,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial] [Feature:TopologyManager] }) ginkgo.By("Giving the Kubelet time to start up and produce metrics") - gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics) ginkgo.By("Ensuring the metrics match the expectations a few more times") - gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics) }) ginkgo.It("should report admission failures when the topology manager alignment is known to fail", func(ctx context.Context) { @@ -118,9 +118,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial] [Feature:TopologyManager] }) ginkgo.By("Giving the Kubelet time to start up and produce metrics") - gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics) ginkgo.By("Ensuring the metrics match the expectations a few more times") - gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics) }) ginkgo.It("should not report any admission failures when the topology manager alignment is expected to succeed", func(ctx context.Context) { @@ -144,9 +144,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial] [Feature:TopologyManager] }) ginkgo.By("Giving the Kubelet time to start up and produce metrics") - gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics) ginkgo.By("Ensuring the metrics match the expectations a few more times") - gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics) + gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics) }) }) })