mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 07:47:56 +00:00
Merge pull request #54377 from gnufied/fix-detach-metric-flake
Automatic merge from submit-queue (batch tested with PRs 54107, 54184, 54377, 54094, 54111). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix detach metric flake by not using exact equals Also poll for detach value increase. Fixes https://github.com/kubernetes/kubernetes/issues/52871 I have ran these tests for more than 3 hours in a tight loop and did not see it flake. The changes here include dropping exact equality test and making sure we poll for increase in detach metric count. ```release-note None ```
This commit is contained in:
commit
47b4f0ed8a
@ -97,33 +97,9 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
|
|||||||
framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name)
|
framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name)
|
||||||
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod))
|
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod))
|
||||||
|
|
||||||
backoff := wait.Backoff{
|
updatedStorageMetrics := waitForDetachAndGrabMetrics(storageOpMetrics, metricsGrabber)
|
||||||
Duration: 10 * time.Second,
|
|
||||||
Factor: 1.2,
|
|
||||||
Steps: 21,
|
|
||||||
}
|
|
||||||
|
|
||||||
updatedStorageMetrics := make(map[string]int64)
|
Expect(len(updatedStorageMetrics)).ToNot(Equal(0), "Error fetching c-m updated storage metrics")
|
||||||
|
|
||||||
waitErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
|
||||||
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
framework.Logf("Error fetching controller-manager metrics")
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
|
|
||||||
metricCount := len(updatedStorageMetrics)
|
|
||||||
// Usually a pod deletion does not mean immediate volume detach
|
|
||||||
// we will have to retry to verify volume_detach metrics
|
|
||||||
_, detachMetricFound := updatedStorageMetrics["volume_detach"]
|
|
||||||
if metricCount < 3 || !detachMetricFound {
|
|
||||||
framework.Logf("Volume metrics not collected yet, going to retry")
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
return true, nil
|
|
||||||
})
|
|
||||||
Expect(waitErr).NotTo(HaveOccurred(), "Error fetching storage c-m metrics : %v", waitErr)
|
|
||||||
|
|
||||||
volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"}
|
volumeOperations := []string{"volume_provision", "volume_detach", "volume_attach"}
|
||||||
|
|
||||||
@ -190,6 +166,48 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
func waitForDetachAndGrabMetrics(oldMetrics map[string]int64, metricsGrabber *metrics.MetricsGrabber) map[string]int64 {
|
||||||
|
backoff := wait.Backoff{
|
||||||
|
Duration: 10 * time.Second,
|
||||||
|
Factor: 1.2,
|
||||||
|
Steps: 21,
|
||||||
|
}
|
||||||
|
|
||||||
|
updatedStorageMetrics := make(map[string]int64)
|
||||||
|
oldDetachCount, ok := oldMetrics["volume_detach"]
|
||||||
|
if !ok {
|
||||||
|
oldDetachCount = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
verifyMetricFunc := func() (bool, error) {
|
||||||
|
updatedMetrics, err := metricsGrabber.GrabFromControllerManager()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
framework.Logf("Error fetching controller-manager metrics")
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
updatedStorageMetrics = getControllerStorageMetrics(updatedMetrics)
|
||||||
|
newDetachCount, ok := updatedStorageMetrics["volume_detach"]
|
||||||
|
|
||||||
|
// if detach metrics are not yet there, we need to retry
|
||||||
|
if !ok {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// if old Detach count is more or equal to new detach count, that means detach
|
||||||
|
// event has not been observed yet.
|
||||||
|
if oldDetachCount >= newDetachCount {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
|
||||||
|
Expect(waitErr).NotTo(HaveOccurred(), "Timeout error fetching storage c-m metrics : %v", waitErr)
|
||||||
|
return updatedStorageMetrics
|
||||||
|
}
|
||||||
|
|
||||||
func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64, metricName string) {
|
func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64, metricName string) {
|
||||||
oldCount, ok := oldMetrics[metricName]
|
oldCount, ok := oldMetrics[metricName]
|
||||||
// if metric does not exist in oldMap, it probably hasn't been emitted yet.
|
// if metric does not exist in oldMap, it probably hasn't been emitted yet.
|
||||||
@ -199,8 +217,10 @@ func verifyMetricCount(oldMetrics map[string]int64, newMetrics map[string]int64,
|
|||||||
|
|
||||||
newCount, ok := newMetrics[metricName]
|
newCount, ok := newMetrics[metricName]
|
||||||
Expect(ok).To(BeTrue(), "Error getting updated metrics for %s", metricName)
|
Expect(ok).To(BeTrue(), "Error getting updated metrics for %s", metricName)
|
||||||
|
// It appears that in a busy cluster some spurious detaches are unavoidable
|
||||||
Expect(oldCount + 1).To(Equal(newCount))
|
// even if the test is run serially. We really just verify if new count
|
||||||
|
// is greater than old count
|
||||||
|
Expect(newCount).To(BeNumerically(">", oldCount), "New count %d should be more than old count %d for action %s", newCount, oldCount, metricName)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getControllerStorageMetrics(ms metrics.ControllerManagerMetrics) map[string]int64 {
|
func getControllerStorageMetrics(ms metrics.ControllerManagerMetrics) map[string]int64 {
|
||||||
|
Loading…
Reference in New Issue
Block a user