Merge pull request #54377 from gnufied/fix-detach-metric-flake

Automatic merge from submit-queue (batch tested with PRs 54107, 54184, 54377, 54094, 54111). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fix detach metric flake by not using exact equals

Also poll for detach value increase.

Fixes https://github.com/kubernetes/kubernetes/issues/52871 

I have ran these tests for more than 3 hours in a tight loop and did not see it flake. The changes here include dropping exact equality test and making sure we poll for increase in detach metric count.


```release-note
None
```
This commit is contained in:
Kubernetes Submit Queue 2017-10-24 15:59:09 -07:00 committed by GitHub
commit 47b4f0ed8a

View File

@ -97,33 +97,9 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name)
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod))
backoff := wait.Backoff{
Duration: 10 * time.Second,
Factor: 1.2,
Steps: 21,
}
updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber)
updatedStorageMetrics := make(map[string]int64)
waitErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()
if err != nil {
framework.Logf("Error fetching controller-manager metrics")
return false, err
}
updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
metricCount := len(updatedStorageMetrics)
// Usually a pod deletion does not mean immediate volume detach
// we will have to retry to verify volume_detach metrics
_, detachMetricFound := updatedStorageMetrics["volume_detach"]
if metricCount < 3 || !detachMetricFound {
framework.Logf("Volume metrics not collected yet, going to retry")
return false, nil
}
return true, nil
})
Expect(waitErr).NotTo(HaveOccurred(), "Error fetching storage c-m metrics : %v", waitErr)
Expect(len(updatedStorageMetrics)).ToNot(Equal(0), "Error fetching c-m updated storage metrics")
volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"}
@ -190,6 +166,48 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
})
})
func waitForDetachAndGrabMetrics(oldMetrics map[string]int64, metricsGrabber *metrics.MetricsGrabber) map[string]int64 {
backoff := wait.Backoff{
Duration: 10 * time.Second,
Factor: 1.2,
Steps: 21,
}
updatedStorageMetrics := make(map[string]int64)
oldDetachCount, ok := oldMetrics["volume_detach"]
if !ok {
oldDetachCount = 0
}
verifyMetricFunc := func() (bool, error) {
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()
if err != nil {
framework.Logf("Error fetching controller-manager metrics")
return false, err
}
updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
newDetachCount, ok := updatedStorageMetrics["volume_detach"]
// if detach metrics are not yet there, we need to retry
if !ok {
return false, nil
}
// if old Detach count is more or equal to new detach count, that means detach
// event has not been observed yet.
if oldDetachCount >= newDetachCount {
return false, nil
}
return true, nil
}
waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
Expect(waitErr).NotTo(HaveOccurred(), "Timeout error fetching storage c-m metrics : %v", waitErr)
return updatedStorageMetrics
}
func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64, metricName string) {
oldCount, ok := oldMetrics[metricName]
// if metric does not exist in oldMap, it probably hasn't been emitted yet.
@ -199,8 +217,10 @@ func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64,
newCount, ok := newMetrics[metricName]
Expect(ok).To(BeTrue(), "Error getting updated metrics for %s", metricName)
Expect(oldCount + 1).To(Equal(newCount))
// It appears that in a busy cluster some spurious detaches are unavoidable
// even if the test is run serially. We really just verify if new count
// is greater than old count
Expect(newCount).To(BeNumerically(">", oldCount), "New count %d should be more than old count %d for action %s", newCount, oldCount, metricName)
}
func getControllerStorageMetrics(ms metrics.ControllerManagerMetrics) map[string]int64 {