topology-mgr: metrics: Deflake Topology Manager metrics e2e tests

On local execution of Topology Manager metrics tests, the tests pass rate was 100%.
Yet, we can see that the Topology Manager metrics tests are failing in upstream
CI consistently: https://testgrid.k8s.io/sig-node-presubmits#pr-kubelet-serial-gce-e2e-topology-manager.

From the logs, it was identified that these failures are because of timeouts,
so we are increasing the default timeout as well as polling interval frequency
of obtaining KubeletMetrics to deflake this test.

We have noticed a similar flake in case of CPU manager metrics tests as well:
https://prow.k8s.io/view/gs/kubernetes-jenkins/pr-logs/directory/pull-kubernetes-node-kubelet-serial-cpu-manager/1701615009836044288.
Once it is confirmed that the issue is resolved for Topology Manager test,
we will be fix this for CPU Manager as well in a follow-up PR.

Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
This commit is contained in:
Swati Sehgal 2023-09-20 13:11:13 +01:00
parent 09200e9c92
commit f5d915b594

View File

@ -92,9 +92,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial] [Feature:TopologyManager]
})
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
ginkgo.By("Ensuring the metrics match the expectations a few more times")
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
})
ginkgo.It("should report admission failures when the topology manager alignment is known to fail", func(ctx context.Context) {
@ -118,9 +118,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial] [Feature:TopologyManager]
})
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
ginkgo.By("Ensuring the metrics match the expectations a few more times")
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
})
ginkgo.It("should not report any admission failures when the topology manager alignment is expected to succeed", func(ctx context.Context) {
@ -144,9 +144,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial] [Feature:TopologyManager]
})
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
gomega.Eventually(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
ginkgo.By("Ensuring the metrics match the expectations a few more times")
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
})
})
})