mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Merge pull request #54377 from gnufied/fix-detach-metric-flake
Automatic merge from submit-queue (batch tested with PRs 54107, 54184, 54377, 54094, 54111). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix detach metric flake by not using exact equals Also poll for detach value increase. Fixes https://github.com/kubernetes/kubernetes/issues/52871 I have ran these tests for more than 3 hours in a tight loop and did not see it flake. The changes here include dropping exact equality test and making sure we poll for increase in detach metric count. ```release-note None ```
This commit is contained in:
commit
47b4f0ed8a
@ -97,33 +97,9 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
|
||||
framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name)
|
||||
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod))
|
||||
|
||||
backoff := wait.Backoff{
|
||||
Duration: 10 * time.Second,
|
||||
Factor: 1.2,
|
||||
Steps: 21,
|
||||
}
|
||||
updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber)
|
||||
|
||||
updatedStorageMetrics := make(map[string]int64)
|
||||
|
||||
waitErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
||||
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()
|
||||
|
||||
if err != nil {
|
||||
framework.Logf("Error fetching controller-manager metrics")
|
||||
return false, err
|
||||
}
|
||||
updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
|
||||
metricCount := len(updatedStorageMetrics)
|
||||
// Usually a pod deletion does not mean immediate volume detach
|
||||
// we will have to retry to verify volume_detach metrics
|
||||
_, detachMetricFound := updatedStorageMetrics["volume_detach"]
|
||||
if metricCount < 3 || !detachMetricFound {
|
||||
framework.Logf("Volume metrics not collected yet, going to retry")
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
Expect(waitErr).NotTo(HaveOccurred(), "Error fetching storage c-m metrics : %v", waitErr)
|
||||
Expect(len(updatedStorageMetrics)).ToNot(Equal(0), "Error fetching c-m updated storage metrics")
|
||||
|
||||
volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"}
|
||||
|
||||
@ -190,6 +166,48 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
|
||||
})
|
||||
})
|
||||
|
||||
func waitForDetachAndGrabMetrics(oldMetrics map[string]int64, metricsGrabber *metrics.MetricsGrabber) map[string]int64 {
|
||||
backoff := wait.Backoff{
|
||||
Duration: 10 * time.Second,
|
||||
Factor: 1.2,
|
||||
Steps: 21,
|
||||
}
|
||||
|
||||
updatedStorageMetrics := make(map[string]int64)
|
||||
oldDetachCount, ok := oldMetrics["volume_detach"]
|
||||
if !ok {
|
||||
oldDetachCount = 0
|
||||
}
|
||||
|
||||
verifyMetricFunc := func() (bool, error) {
|
||||
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()
|
||||
|
||||
if err != nil {
|
||||
framework.Logf("Error fetching controller-manager metrics")
|
||||
return false, err
|
||||
}
|
||||
|
||||
updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
|
||||
newDetachCount, ok := updatedStorageMetrics["volume_detach"]
|
||||
|
||||
// if detach metrics are not yet there, we need to retry
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// if old Detach count is more or equal to new detach count, that means detach
|
||||
// event has not been observed yet.
|
||||
if oldDetachCount >= newDetachCount {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
|
||||
Expect(waitErr).NotTo(HaveOccurred(), "Timeout error fetching storage c-m metrics : %v", waitErr)
|
||||
return updatedStorageMetrics
|
||||
}
|
||||
|
||||
func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64, metricName string) {
|
||||
oldCount, ok := oldMetrics[metricName]
|
||||
// if metric does not exist in oldMap, it probably hasn't been emitted yet.
|
||||
@ -199,8 +217,10 @@ func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64,
|
||||
|
||||
newCount, ok := newMetrics[metricName]
|
||||
Expect(ok).To(BeTrue(), "Error getting updated metrics for %s", metricName)
|
||||
|
||||
Expect(oldCount + 1).To(Equal(newCount))
|
||||
// It appears that in a busy cluster some spurious detaches are unavoidable
|
||||
// even if the test is run serially. We really just verify if new count
|
||||
// is greater than old count
|
||||
Expect(newCount).To(BeNumerically(">", oldCount), "New count %d should be more than old count %d for action %s", newCount, oldCount, metricName)
|
||||
}
|
||||
|
||||
func getControllerStorageMetrics(ms metrics.ControllerManagerMetrics) map[string]int64 {
|
||||
|
Loading…
Reference in New Issue
Block a user