From 83717f5286df8aa718bd0dd4d1ffed6e0f7238da Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Sun, 22 Oct 2017 09:49:50 -0400 Subject: [PATCH] Fix detach metric flake by not using exact equals Also poll for detach value increase. --- test/e2e/storage/volume_metrics.go | 76 +++++++++++++++++++----------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/test/e2e/storage/volume_metrics.go b/test/e2e/storage/volume_metrics.go index 3a152f806ec..44c4fb34704 100644 --- a/test/e2e/storage/volume_metrics.go +++ b/test/e2e/storage/volume_metrics.go @@ -97,33 +97,9 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() { framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name) framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod)) - backoff := wait.Backoff{ - Duration: 10 * time.Second, - Factor: 1.2, - Steps: 21, - } + updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber) - updatedStorageMetrics := make(map[string]int64) - - waitErr := wait.ExponentialBackoff(backoff, func() (bool, error) { - updatedMetrics, err := metricsGrabber.GrabFromControllerManager() - - if err != nil { - framework.Logf("Error fetching controller-manager metrics") - return false, err - } - updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics) - metricCount := len(updatedStorageMetrics) - // Usually a pod deletion does not mean immediate volume detach - // we will have to retry to verify volume_detach metrics - _, detachMetricFound := updatedStorageMetrics["volume_detach"] - if metricCount < 3 || !detachMetricFound { - framework.Logf("Volume metrics not collected yet, going to retry") - return false, nil - } - return true, nil - }) - Expect(waitErr).NotTo(HaveOccurred(), "Error fetching storage c-m metrics : %v", waitErr) + Expect(len(updatedStorageMetrics)).ToNot(Equal(0), "Error fetching c-m updated storage metrics") volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"} @@ -190,6 +166,48 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() { }) }) +func waitForDetachAndGrabMetrics(oldMetrics map[string]int64, metricsGrabber *metrics.MetricsGrabber) map[string]int64 { + backoff := wait.Backoff{ + Duration: 10 * time.Second, + Factor: 1.2, + Steps: 21, + } + + updatedStorageMetrics := make(map[string]int64) + oldDetachCount, ok := oldMetrics["volume_detach"] + if !ok { + oldDetachCount = 0 + } + + verifyMetricFunc := func() (bool, error) { + updatedMetrics, err := metricsGrabber.GrabFromControllerManager() + + if err != nil { + framework.Logf("Error fetching controller-manager metrics") + return false, err + } + + updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics) + newDetachCount, ok := updatedStorageMetrics["volume_detach"] + + // if detach metrics are not yet there, we need to retry + if !ok { + return false, nil + } + + // if old Detach count is more or equal to new detach count, that means detach + // event has not been observed yet. + if oldDetachCount >= newDetachCount { + return false, nil + } + return true, nil + } + + waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc) + Expect(waitErr).NotTo(HaveOccurred(), "Timeout error fetching storage c-m metrics : %v", waitErr) + return updatedStorageMetrics +} + func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64, metricName string) { oldCount, ok := oldMetrics[metricName] // if metric does not exist in oldMap, it probably hasn't been emitted yet. @@ -199,8 +217,10 @@ func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64, newCount, ok := newMetrics[metricName] Expect(ok).To(BeTrue(), "Error getting updated metrics for %s", metricName) - - Expect(oldCount + 1).To(Equal(newCount)) + // It appears that in a busy cluster some spurious detaches are unavoidable + // even if the test is run serially. We really just verify if new count + // is greater than old count + Expect(newCount).To(BeNumerically(">", oldCount), "New count %d should be more than old count %d for action %s", newCount, oldCount, metricName) } func getControllerStorageMetrics(ms metrics.ControllerManagerMetrics) map[string]int64 {