From c0d9bdaf5e2890697428deb8129f937cbc649909 Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Wed, 27 Sep 2017 17:49:05 -0700 Subject: [PATCH 1/2] Improve PVC ref volume metric test robustness This test has been flaking. The current working theory is that volume stats collection didn't run in time to grab the metrics from the newly created pod. Made the following changes: - Added more logs to help debug future failures - Poll metrics a few additional times before failing the test --- test/e2e/storage/volume_metrics.go | 43 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/test/e2e/storage/volume_metrics.go b/test/e2e/storage/volume_metrics.go index a2e3f37a80a..1d6bc0cc882 100644 --- a/test/e2e/storage/volume_metrics.go +++ b/test/e2e/storage/volume_metrics.go @@ -145,16 +145,6 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() { pod, err = c.CoreV1().Pods(ns).Get(pod.Name, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred()) - // Wait for `VolumeStatsAggPeriod' to grab metrics - time.Sleep(1 * time.Minute) - - // Grab kubelet metrics from the node the pod was scheduled on - kubeMetrics, err := metricsGrabber.GrabFromKubelet(pod.Spec.NodeName) - Expect(err).NotTo(HaveOccurred(), "Error getting kubelet metrics : %v", err) - - framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name) - framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod)) - // Verify volume stat metrics were collected for the referenced PVC volumeStatKeys := []string{ kubeletmetrics.VolumeStatsUsedBytesKey, @@ -165,10 +155,26 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() { kubeletmetrics.VolumeStatsInodesUsedKey, } - for _, key := range volumeStatKeys { - kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) - verifyVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) - } + waitErr := wait.Poll(30*time.Second, 5*time.Minute, func() (bool, error) { + // Grab kubelet metrics from the node the pod was scheduled on + kubeMetrics, err := metricsGrabber.GrabFromKubelet(pod.Spec.NodeName) + if err != nil { + framework.Logf("Error fetching kubelet metrics") + return false, err + } + + for _, key := range volumeStatKeys { + kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) + if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) { + return false, nil + } + } + return true, nil + }) + Expect(waitErr).NotTo(HaveOccurred(), "Error finding volume metrics : %v", waitErr) + + framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name) + framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod)) }) }) @@ -202,12 +208,15 @@ func getControllerStorageMetrics(ms metrics.ControllerManagerMetrics) map[string return result } -// Verifies the specified metrics are in `kubeletMetrics` -func verifyVolumeStatMetric(metricKeyName string, namespace string, pvcName string, kubeletMetrics metrics.KubeletMetrics) { +// Finds the sample in the specified metric from `KubeletMetrics` tagged with +// the specified namespace and pvc name +func findVolumeStatMetric(metricKeyName string, namespace string, pvcName string, kubeletMetrics metrics.KubeletMetrics) bool { found := false errCount := 0 + framework.Logf("Looking for sample in metric `%s` tagged with namespace `%s`, PVC `%s`", metricKeyName, namespace, pvcName) if samples, ok := kubeletMetrics[metricKeyName]; ok { for _, sample := range samples { + framework.Logf("Found sample %s", sample.String()) samplePVC, ok := sample.Metric["persistentvolumeclaim"] if !ok { framework.Logf("Error getting pvc for metric %s, sample %s", metricKeyName, sample.String()) @@ -226,5 +235,5 @@ func verifyVolumeStatMetric(metricKeyName string, namespace string, pvcName stri } } Expect(errCount).To(Equal(0), "Found invalid samples") - Expect(found).To(BeTrue(), "PVC %s, Namespace %s not found for %s", pvcName, namespace, metricKeyName) + return found } From efdae2060f761a542b02b8312b4e5ae3bd2e4871 Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Wed, 27 Sep 2017 19:36:04 -0700 Subject: [PATCH 2/2] Address review comments --- test/e2e/storage/volume_metrics.go | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/test/e2e/storage/volume_metrics.go b/test/e2e/storage/volume_metrics.go index 1d6bc0cc882..e6620296042 100644 --- a/test/e2e/storage/volume_metrics.go +++ b/test/e2e/storage/volume_metrics.go @@ -154,25 +154,33 @@ var _ = SIGDescribe("[Serial] Volume metrics", func() { kubeletmetrics.VolumeStatsInodesFreeKey, kubeletmetrics.VolumeStatsInodesUsedKey, } - + // Poll kubelet metrics waiting for the volume to be picked up + // by the volume stats collector + var kubeMetrics metrics.KubeletMetrics waitErr := wait.Poll(30*time.Second, 5*time.Minute, func() (bool, error) { + framework.Logf("Grabbing Kubelet metrics") // Grab kubelet metrics from the node the pod was scheduled on - kubeMetrics, err := metricsGrabber.GrabFromKubelet(pod.Spec.NodeName) + var err error + kubeMetrics, err = metricsGrabber.GrabFromKubelet(pod.Spec.NodeName) if err != nil { framework.Logf("Error fetching kubelet metrics") return false, err } - - for _, key := range volumeStatKeys { - kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) - if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) { - return false, nil - } + key := volumeStatKeys[0] + kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) + if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) { + return false, nil } return true, nil }) Expect(waitErr).NotTo(HaveOccurred(), "Error finding volume metrics : %v", waitErr) + for _, key := range volumeStatKeys { + kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) + found := findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) + Expect(found).To(BeTrue(), "PVC %s, Namespace %s not found for %s", pvc.Name, pvc.Namespace, kubeletKeyName) + } + framework.Logf("Deleting pod %q/%q", pod.Namespace, pod.Name) framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod)) })