From 99a015705ba516802fcde6c6f5252e767c05db79 Mon Sep 17 00:00:00 2001 From: Jonathan Basseri Date: Fri, 10 Jan 2020 19:08:46 -0800 Subject: [PATCH] storage e2e: Add context to timeout errors Many times an e2e test fails with an unexpected error, "timed out waiting for the condition". Useful information may be in the test logs, but debugging e2e test failures will be much faster if we add context to errors when they happen. This change makes sure we add context to all errors returned from helpers like wait.Poll(). --- test/e2e/storage/csi_mock_volume.go | 10 ++++- test/e2e/storage/detach_mounted.go | 26 ++++++++--- test/e2e/storage/drivers/csi.go | 6 ++- test/e2e/storage/mounted_volume_resize.go | 11 ++++- test/e2e/storage/pd.go | 6 +-- test/e2e/storage/persistent_volumes-local.go | 31 ++++++++----- test/e2e/storage/regional_pd.go | 11 +++-- test/e2e/storage/testsuites/subpath.go | 6 ++- test/e2e/storage/testsuites/volume_expand.go | 45 ++++++++++++++----- test/e2e/storage/testsuites/volumelimits.go | 17 ++++--- test/e2e/storage/utils/utils.go | 2 +- test/e2e/storage/volume_metrics.go | 13 +++--- test/e2e/storage/volume_provisioning.go | 25 ++++++----- test/e2e/storage/vsphere/vsphere_utils.go | 36 +++++++-------- .../vsphere/vsphere_volume_node_poweroff.go | 19 ++++---- 15 files changed, 164 insertions(+), 100 deletions(-) diff --git a/test/e2e/storage/csi_mock_volume.go b/test/e2e/storage/csi_mock_volume.go index 5c28bdc4612..82a23372e37 100644 --- a/test/e2e/storage/csi_mock_volume.go +++ b/test/e2e/storage/csi_mock_volume.go @@ -610,7 +610,10 @@ func waitForMaxVolumeCondition(pod *v1.Pod, cs clientset.Interface) error { } return false, nil }) - return waitErr + if waitErr != nil { + return fmt.Errorf("error waiting for pod %s/%s to have max volume condition: %v", pod.Namespace, pod.Name, waitErr) + } + return nil } func checkCSINodeForLimits(nodeName string, driverName string, cs clientset.Interface) (int32, error) { @@ -627,7 +630,10 @@ func checkCSINodeForLimits(nodeName string, driverName string, cs clientset.Inte } return false, nil }) - return attachLimit, waitErr + if waitErr != nil { + return 0, fmt.Errorf("error waiting for non-zero volume limit of driver %s on node %s: %v", driverName, nodeName, waitErr) + } + return attachLimit, nil } func startPausePod(cs clientset.Interface, t testsuites.StorageClassTest, node e2epod.NodeSelection, ns string) (*storagev1.StorageClass, *v1.PersistentVolumeClaim, *v1.Pod) { diff --git a/test/e2e/storage/detach_mounted.go b/test/e2e/storage/detach_mounted.go index 7a797f7e241..72aa5e7e17e 100644 --- a/test/e2e/storage/detach_mounted.go +++ b/test/e2e/storage/detach_mounted.go @@ -130,23 +130,27 @@ func getUniqueVolumeName(pod *v1.Pod, driverName string) string { } func waitForVolumesNotInUse(client clientset.Interface, nodeName, volumeName string) error { - return wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) { + waitErr := wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) { node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error fetching node %s with %v", nodeName, err) } - volumeInUSe := node.Status.VolumesInUse - for _, volume := range volumeInUSe { + volumeInUse := node.Status.VolumesInUse + for _, volume := range volumeInUse { if string(volume) == volumeName { return false, nil } } return true, nil }) + if waitErr != nil { + return fmt.Errorf("error waiting for volumes to not be in use: %v", waitErr) + } + return nil } func waitForVolumesAttached(client clientset.Interface, nodeName, volumeName string) error { - return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { + waitErr := wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error fetching node %s with %v", nodeName, err) @@ -159,22 +163,30 @@ func waitForVolumesAttached(client clientset.Interface, nodeName, volumeName str } return false, nil }) + if waitErr != nil { + return fmt.Errorf("error waiting for volume %v to attach to node %v: %v", volumeName, nodeName, waitErr) + } + return nil } func waitForVolumesInUse(client clientset.Interface, nodeName, volumeName string) error { - return wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) { + waitErr := wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) { node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error fetching node %s with %v", nodeName, err) } - volumeInUSe := node.Status.VolumesInUse - for _, volume := range volumeInUSe { + volumeInUse := node.Status.VolumesInUse + for _, volume := range volumeInUse { if string(volume) == volumeName { return true, nil } } return false, nil }) + if waitErr != nil { + return fmt.Errorf("error waiting for volume %v to be in use on node %v: %v", volumeName, nodeName, waitErr) + } + return nil } func getFlexVolumePod(volumeSource v1.VolumeSource, nodeName string) *v1.Pod { diff --git a/test/e2e/storage/drivers/csi.go b/test/e2e/storage/drivers/csi.go index 0a24a73b251..6a6fa3c82e5 100644 --- a/test/e2e/storage/drivers/csi.go +++ b/test/e2e/storage/drivers/csi.go @@ -490,7 +490,7 @@ func waitForCSIDriverRegistrationOnAllNodes(driverName string, cs clientset.Inte func waitForCSIDriverRegistrationOnNode(nodeName string, driverName string, cs clientset.Interface) error { const csiNodeRegisterTimeout = 1 * time.Minute - return wait.PollImmediate(10*time.Second, csiNodeRegisterTimeout, func() (bool, error) { + waitErr := wait.PollImmediate(10*time.Second, csiNodeRegisterTimeout, func() (bool, error) { csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{}) if err != nil && !apierrors.IsNotFound(err) { return false, err @@ -502,4 +502,8 @@ func waitForCSIDriverRegistrationOnNode(nodeName string, driverName string, cs c } return false, nil }) + if waitErr != nil { + return fmt.Errorf("error waiting for CSI driver %s registration on node %s: %v", driverName, nodeName, waitErr) + } + return nil } diff --git a/test/e2e/storage/mounted_volume_resize.go b/test/e2e/storage/mounted_volume_resize.go index 3008575aa6f..e4344ffbf48 100644 --- a/test/e2e/storage/mounted_volume_resize.go +++ b/test/e2e/storage/mounted_volume_resize.go @@ -17,6 +17,7 @@ limitations under the License. package storage import ( + "fmt" "time" "github.com/onsi/ginkgo" @@ -170,6 +171,9 @@ func waitForDeploymentToRecreatePod(client clientset.Interface, deployment *apps var runningPod v1.Pod waitErr := wait.PollImmediate(10*time.Second, 5*time.Minute, func() (bool, error) { podList, err := e2edeploy.GetPodsForDeployment(client, deployment) + if err != nil { + return false, fmt.Errorf("failed to get pods for deployment: %v", err) + } for _, pod := range podList.Items { switch pod.Status.Phase { case v1.PodRunning: @@ -179,7 +183,10 @@ func waitForDeploymentToRecreatePod(client clientset.Interface, deployment *apps return false, conditions.ErrPodCompleted } } - return false, err + return false, nil }) - return runningPod, waitErr + if waitErr != nil { + return runningPod, fmt.Errorf("error waiting for recreated pod: %v", waitErr) + } + return runningPod, nil } diff --git a/test/e2e/storage/pd.go b/test/e2e/storage/pd.go index 36b7c584e16..58f2500900e 100644 --- a/test/e2e/storage/pd.go +++ b/test/e2e/storage/pd.go @@ -428,13 +428,13 @@ var _ = utils.SIGDescribe("Pod Disks", func() { } ginkgo.By("evicting host0Pod") err = wait.PollImmediate(framework.Poll, podEvictTimeout, func() (bool, error) { - err = cs.CoreV1().Pods(ns).Evict(evictTarget) - if err != nil { + if err := cs.CoreV1().Pods(ns).Evict(evictTarget); err != nil { + framework.Logf("Failed to evict host0Pod, ignoring error: %v", err) return false, nil } return true, nil }) - framework.ExpectNoError(err, fmt.Sprintf("failed to evict host0Pod after %v", podEvictTimeout)) + framework.ExpectNoError(err, "failed to evict host0Pod after %v", podEvictTimeout) } ginkgo.By("waiting for pd to detach from host0") diff --git a/test/e2e/storage/persistent_volumes-local.go b/test/e2e/storage/persistent_volumes-local.go index 77f4f57b266..960e9b74693 100644 --- a/test/e2e/storage/persistent_volumes-local.go +++ b/test/e2e/storage/persistent_volumes-local.go @@ -570,7 +570,8 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() { }() ginkgo.By("Waiting for all pods to complete successfully") - err := wait.PollImmediate(time.Second, 5*time.Minute, func() (done bool, err error) { + const completeTimeout = 5 * time.Minute + waitErr := wait.PollImmediate(time.Second, completeTimeout, func() (done bool, err error) { podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{}) if err != nil { return false, err @@ -589,15 +590,15 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() { delete(pods, pod.Name) numFinished++ framework.Logf("%v/%v pods finished", numFinished, totalPods) - case v1.PodFailed: case v1.PodUnknown: return false, fmt.Errorf("pod %v is in %v phase", pod.Name, pod.Status.Phase) + case v1.PodFailed: } } return numFinished == totalPods, nil }) - framework.ExpectNoError(err) + framework.ExpectNoError(waitErr, "some pods failed to complete within %v", completeTimeout) }) }) @@ -648,7 +649,8 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() { pods[pod.Name] = pod } ginkgo.By("Wait for all pods are running") - err = wait.PollImmediate(time.Second, 5*time.Minute, func() (done bool, err error) { + const runningTimeout = 5 * time.Minute + waitErr := wait.PollImmediate(time.Second, runningTimeout, func() (done bool, err error) { podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{}) if err != nil { return false, err @@ -662,7 +664,7 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() { } return runningPods == count, nil }) - framework.ExpectNoError(err) + framework.ExpectNoError(waitErr, "Some pods are not running within %v", runningTimeout) }) }) }) @@ -918,17 +920,26 @@ func createLocalPVCsPVs(config *localTestConfig, volumes []*localTestVolume, mod verifyLocalVolume(config, volume) } } else { - // Verify PVCs are not bound + // Verify PVCs are not bound by waiting for phase==bound with a timeout and asserting that we hit the timeout. // There isn't really a great way to verify this without making the test be slow... - err = wait.PollImmediate(time.Second, 10*time.Second, func() (done bool, err error) { + const bindTimeout = 10 * time.Second + waitErr := wait.PollImmediate(time.Second, bindTimeout, func() (done bool, err error) { for _, volume := range volumes { pvc, err := config.client.CoreV1().PersistentVolumeClaims(volume.pvc.Namespace).Get(volume.pvc.Name, metav1.GetOptions{}) - framework.ExpectNoError(err) - framework.ExpectEqual(pvc.Status.Phase, v1.ClaimPending) + if err != nil { + return false, fmt.Errorf("failed to get PVC %s/%s: %v", volume.pvc.Namespace, volume.pvc.Name, err) + } + if pvc.Status.Phase != v1.ClaimPending { + return true, nil + } } return false, nil }) - framework.ExpectError(err) + if waitErr == wait.ErrWaitTimeout { + framework.Logf("PVCs were not bound within %v (that's good)", bindTimeout) + waitErr = nil + } + framework.ExpectNoError(waitErr, "Error making sure PVCs are not bound") } } diff --git a/test/e2e/storage/regional_pd.go b/test/e2e/storage/regional_pd.go index 4265ba560bb..e79d8040a21 100644 --- a/test/e2e/storage/regional_pd.go +++ b/test/e2e/storage/regional_pd.go @@ -252,18 +252,17 @@ func testZonalFailover(c clientset.Interface, ns string) { } else { otherZone = cloudZones[0] } - err = wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) { - framework.Logf("checking whether new pod is scheduled in zone %q", otherZone) - pod = getPod(c, ns, regionalPDLabels) - nodeName = pod.Spec.NodeName - node, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + waitErr := wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) { + framework.Logf("Checking whether new pod is scheduled in zone %q", otherZone) + pod := getPod(c, ns, regionalPDLabels) + node, err := c.CoreV1().Nodes().Get(pod.Spec.NodeName, metav1.GetOptions{}) if err != nil { return false, nil } newPodZone := node.Labels[v1.LabelZoneFailureDomain] return newPodZone == otherZone, nil }) - framework.ExpectNoError(err, "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err) + framework.ExpectNoError(waitErr, "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err) err = waitForStatefulSetReplicasReady(statefulSet.Name, ns, c, 3*time.Second, framework.RestartPodReadyAgainTimeout) if err != nil { diff --git a/test/e2e/storage/testsuites/subpath.go b/test/e2e/storage/testsuites/subpath.go index 79fe65d7278..5069a86c358 100644 --- a/test/e2e/storage/testsuites/subpath.go +++ b/test/e2e/storage/testsuites/subpath.go @@ -759,7 +759,7 @@ func waitForPodSubpathError(f *framework.Framework, pod *v1.Pod, allowContainerT return fmt.Errorf("failed to find container that uses subpath") } - return wait.PollImmediate(framework.Poll, framework.PodStartTimeout, func() (bool, error) { + waitErr := wait.PollImmediate(framework.Poll, framework.PodStartTimeout, func() (bool, error) { pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}) if err != nil { return false, err @@ -786,6 +786,10 @@ func waitForPodSubpathError(f *framework.Framework, pod *v1.Pod, allowContainerT } return false, nil }) + if waitErr != nil { + return fmt.Errorf("error waiting for pod subpath error to occur: %v", waitErr) + } + return nil } // Tests that the existing subpath mount is detected when a container restarts diff --git a/test/e2e/storage/testsuites/volume_expand.go b/test/e2e/storage/testsuites/volume_expand.go index c982f87b1dd..b0e06f19455 100644 --- a/test/e2e/storage/testsuites/volume_expand.go +++ b/test/e2e/storage/testsuites/volume_expand.go @@ -265,22 +265,32 @@ func ExpandPVCSize(origPVC *v1.PersistentVolumeClaim, size resource.Quantity, c pvcName := origPVC.Name updatedPVC := origPVC.DeepCopy() + // Retry the update on error, until we hit a timeout. + // TODO: Determine whether "retry with timeout" is appropriate here. Maybe we should only retry on version conflict. + var lastUpdateError error waitErr := wait.PollImmediate(resizePollInterval, 30*time.Second, func() (bool, error) { var err error updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Get(pvcName, metav1.GetOptions{}) if err != nil { - return false, fmt.Errorf("error fetching pvc %q for resizing with %v", pvcName, err) + return false, fmt.Errorf("error fetching pvc %q for resizing: %v", pvcName, err) } updatedPVC.Spec.Resources.Requests[v1.ResourceStorage] = size updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Update(updatedPVC) - if err == nil { - return true, nil + if err != nil { + framework.Logf("Error updating pvc %s: %v", pvcName, err) + lastUpdateError = err + return false, nil } - framework.Logf("Error updating pvc %s with %v", pvcName, err) - return false, nil + return true, nil }) - return updatedPVC, waitErr + if waitErr == wait.ErrWaitTimeout { + return nil, fmt.Errorf("timed out attempting to update PVC size. last update error: %v", lastUpdateError) + } + if waitErr != nil { + return nil, fmt.Errorf("failed to expand PVC size (check logs for error): %v", waitErr) + } + return updatedPVC, nil } // WaitForResizingCondition waits for the pvc condition to be PersistentVolumeClaimResizing @@ -290,7 +300,7 @@ func WaitForResizingCondition(pvc *v1.PersistentVolumeClaim, c clientset.Interfa updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pvc.Name, metav1.GetOptions{}) if err != nil { - return false, fmt.Errorf("error fetching pvc %q for checking for resize status : %v", pvc.Name, err) + return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %v", pvc.Name, err) } pvcConditions := updatedPVC.Status.Conditions @@ -301,13 +311,16 @@ func WaitForResizingCondition(pvc *v1.PersistentVolumeClaim, c clientset.Interfa } return false, nil }) - return waitErr + if waitErr != nil { + return fmt.Errorf("error waiting for pvc %q to have resize status: %v", pvc.Name, waitErr) + } + return nil } // WaitForControllerVolumeResize waits for the controller resize to be finished func WaitForControllerVolumeResize(pvc *v1.PersistentVolumeClaim, c clientset.Interface, duration time.Duration) error { pvName := pvc.Spec.VolumeName - return wait.PollImmediate(resizePollInterval, duration, func() (bool, error) { + waitErr := wait.PollImmediate(resizePollInterval, duration, func() (bool, error) { pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage] pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{}) @@ -323,6 +336,10 @@ func WaitForControllerVolumeResize(pvc *v1.PersistentVolumeClaim, c clientset.In } return false, nil }) + if waitErr != nil { + return fmt.Errorf("error while waiting for controller resize to finish: %v", waitErr) + } + return nil } // WaitForPendingFSResizeCondition waits for pvc to have resize condition @@ -347,7 +364,10 @@ func WaitForPendingFSResizeCondition(pvc *v1.PersistentVolumeClaim, c clientset. } return false, nil }) - return updatedPVC, waitErr + if waitErr != nil { + return nil, fmt.Errorf("error waiting for pvc %q to have filesystem resize status: %v", pvc.Name, waitErr) + } + return updatedPVC, nil } // WaitForFSResize waits for the filesystem in the pv to be resized @@ -370,5 +390,8 @@ func WaitForFSResize(pvc *v1.PersistentVolumeClaim, c clientset.Interface) (*v1. } return false, nil }) - return updatedPVC, waitErr + if waitErr != nil { + return nil, fmt.Errorf("error waiting for pvc %q filesystem resize to finish: %v", pvc.Name, waitErr) + } + return updatedPVC, nil } diff --git a/test/e2e/storage/testsuites/volumelimits.go b/test/e2e/storage/testsuites/volumelimits.go index 4405335fbf3..58bf8c3f844 100644 --- a/test/e2e/storage/testsuites/volumelimits.go +++ b/test/e2e/storage/testsuites/volumelimits.go @@ -178,7 +178,7 @@ func (t *volumeLimitsTestSuite) DefineTests(driver TestDriver, pattern testpatte framework.ExpectNoError(err) ginkgo.By("Waiting for all PVCs to get Bound") - l.pvNames, err = waitForAllPVCsPhase(l.cs, testSlowMultiplier*e2epv.PVBindingTimeout, l.pvcs) + l.pvNames, err = waitForAllPVCsBound(l.cs, testSlowMultiplier*e2epv.PVBindingTimeout, l.pvcs) framework.ExpectNoError(err) ginkgo.By("Waiting for the pod Running") @@ -268,7 +268,8 @@ func cleanupTest(cs clientset.Interface, ns string, runningPodName, unschedulabl return nil } -func waitForAllPVCsPhase(cs clientset.Interface, timeout time.Duration, pvcs []*v1.PersistentVolumeClaim) (sets.String, error) { +// waitForAllPVCsBound waits until the given PVCs are all bound. It then returns the bound PVC names as a set. +func waitForAllPVCsBound(cs clientset.Interface, timeout time.Duration, pvcs []*v1.PersistentVolumeClaim) (sets.String, error) { pvNames := sets.NewString() err := wait.Poll(5*time.Second, timeout, func() (bool, error) { unbound := 0 @@ -289,7 +290,10 @@ func waitForAllPVCsPhase(cs clientset.Interface, timeout time.Duration, pvcs []* } return true, nil }) - return pvNames, err + if err != nil { + return nil, fmt.Errorf("error waiting for all PVCs to be bound: %v", err) + } + return pvNames, nil } func getNodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) { @@ -327,7 +331,7 @@ func getInTreeNodeLimits(cs clientset.Interface, nodeName string, driverInfo *Dr } func getCSINodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) { - // Wait in a loop, the driver might just have been installed and kubelet takes a while to publish everything. + // Retry with a timeout, the driver might just have been installed and kubelet takes a while to publish everything. var limit int err := wait.PollImmediate(2*time.Second, csiNodeInfoTimeout, func() (bool, error) { csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{}) @@ -355,5 +359,8 @@ func getCSINodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName st limit = int(*csiDriver.Allocatable.Count) return true, nil }) - return limit, err + if err != nil { + return 0, fmt.Errorf("could not get CSINode limit for driver %s: %v", driverInfo.Name, err) + } + return limit, nil } diff --git a/test/e2e/storage/utils/utils.go b/test/e2e/storage/utils/utils.go index 7b7c69e5ba5..90bace32667 100644 --- a/test/e2e/storage/utils/utils.go +++ b/test/e2e/storage/utils/utils.go @@ -583,7 +583,7 @@ func PrivilegedTestPSPClusterRoleBinding(client clientset.Interface, _, err := roleBindingClient.Get(binding.GetName(), metav1.GetOptions{}) return apierrors.IsNotFound(err), nil }) - framework.ExpectNoError(err, "Timed out waiting for deletion: %v", err) + framework.ExpectNoError(err, "Timed out waiting for RBAC binding %s deletion: %v", binding.GetName(), err) if teardown { continue diff --git a/test/e2e/storage/volume_metrics.go b/test/e2e/storage/volume_metrics.go index aa602c42b92..b6505737890 100644 --- a/test/e2e/storage/volume_metrics.go +++ b/test/e2e/storage/volume_metrics.go @@ -226,6 +226,8 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { kubeletmetrics.VolumeStatsInodesFreeKey, kubeletmetrics.VolumeStatsInodesUsedKey, } + key := volumeStatKeys[0] + kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) // Poll kubelet metrics waiting for the volume to be picked up // by the volume stats collector var kubeMetrics metrics.KubeletMetrics @@ -238,14 +240,12 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() { framework.Logf("Error fetching kubelet metrics") return false, err } - key := volumeStatKeys[0] - kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) { return false, nil } return true, nil }) - framework.ExpectNoError(waitErr, "Error finding volume metrics : %v", waitErr) + framework.ExpectNoError(waitErr, "Unable to find metric %s for PVC %s/%s", kubeletKeyName, pvc.Namespace, pvc.Name) for _, key := range volumeStatKeys { kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) @@ -566,7 +566,7 @@ func waitForDetachAndGrabMetrics(oldMetrics *storageControllerMetrics, metricsGr } waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc) - framework.ExpectNoError(waitErr, "Timeout error fetching storage c-m metrics : %v", waitErr) + framework.ExpectNoError(waitErr, "Unable to get updated metrics for plugin %s", pluginName) return updatedStorageMetrics } @@ -697,8 +697,7 @@ func waitForPVControllerSync(metricsGrabber *metrics.Grabber, metricName, dimens return len(getPVControllerMetrics(updatedMetrics, metricName, dimension)) > 0, nil } waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc) - framework.ExpectNoError(waitErr, - "Timeout error fetching pv controller metrics : %v", waitErr) + framework.ExpectNoError(waitErr, "Unable to get pv controller metrics") } func getPVControllerMetrics(ms metrics.ControllerManagerMetrics, metricName, dimension string) map[string]int64 { @@ -787,5 +786,5 @@ func waitForADControllerStatesMetrics(metricsGrabber *metrics.Grabber, metricNam return true, nil } waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc) - framework.ExpectNoError(waitErr, "Timeout error fetching A/D controller metrics : %v", waitErr) + framework.ExpectNoError(waitErr, "Unable to get A/D controller metrics") } diff --git a/test/e2e/storage/volume_provisioning.go b/test/e2e/storage/volume_provisioning.go index 29f7ec8e325..52b7686986a 100644 --- a/test/e2e/storage/volume_provisioning.go +++ b/test/e2e/storage/volume_provisioning.go @@ -513,18 +513,10 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() { ginkgo.By(fmt.Sprintf("Checking for residual PersistentVolumes associated with StorageClass %s", class.Name)) residualPVs, err = waitForProvisionedVolumesDeleted(c, class.Name) - framework.ExpectNoError(err) // Cleanup the test resources before breaking defer deleteProvisionedVolumesAndDisks(c, residualPVs) + framework.ExpectNoError(err, "PersistentVolumes were not deleted as expected. %d remain", len(residualPVs)) - // Report indicators of regression - if len(residualPVs) > 0 { - framework.Logf("Remaining PersistentVolumes:") - for i, pv := range residualPVs { - framework.Logf("\t%d) %s", i+1, pv.Name) - } - framework.Failf("Expected 0 PersistentVolumes remaining. Found %d", len(residualPVs)) - } framework.Logf("0 PersistentVolumes remain.") }) @@ -821,7 +813,9 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() { // ClaimProvisionTimeout in the very same loop. err = wait.Poll(time.Second, framework.ClaimProvisionTimeout, func() (bool, error) { events, err := c.CoreV1().Events(claim.Namespace).List(metav1.ListOptions{}) - framework.ExpectNoError(err) + if err != nil { + return false, fmt.Errorf("could not list PVC events in %s: %v", claim.Namespace, err) + } for _, event := range events.Items { if strings.Contains(event.Message, "failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key") { return true, nil @@ -843,7 +837,7 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() { framework.Logf("The test missed event about failed provisioning, but checked that no volume was provisioned for %v", framework.ClaimProvisionTimeout) err = nil } - framework.ExpectNoError(err) + framework.ExpectNoError(err, "Error waiting for PVC to fail provisioning: %v", err) }) }) }) @@ -1033,7 +1027,10 @@ func waitForProvisionedVolumesDeleted(c clientset.Interface, scName string) ([]* } return true, nil // No PVs remain }) - return remainingPVs, err + if err != nil { + return remainingPVs, fmt.Errorf("Error waiting for PVs to be deleted: %v", err) + } + return nil, nil } // deleteStorageClass deletes the passed in StorageClass and catches errors other than "Not Found" @@ -1046,6 +1043,10 @@ func deleteStorageClass(c clientset.Interface, className string) { // deleteProvisionedVolumes [gce||gke only] iteratively deletes persistent volumes and attached GCE PDs. func deleteProvisionedVolumesAndDisks(c clientset.Interface, pvs []*v1.PersistentVolume) { + framework.Logf("Remaining PersistentVolumes:") + for i, pv := range pvs { + framework.Logf("\t%d) %s", i+1, pv.Name) + } for _, pv := range pvs { framework.ExpectNoError(e2epv.DeletePDWithRetry(pv.Spec.PersistentVolumeSource.GCEPersistentDisk.PDName)) framework.ExpectNoError(e2epv.DeletePersistentVolume(c, pv.Name)) diff --git a/test/e2e/storage/vsphere/vsphere_utils.go b/test/e2e/storage/vsphere/vsphere_utils.go index be70abc5ac9..f550b41c8d8 100644 --- a/test/e2e/storage/vsphere/vsphere_utils.go +++ b/test/e2e/storage/vsphere/vsphere_utils.go @@ -69,12 +69,10 @@ const ( // Wait until vsphere volumes are detached from the list of nodes or time out after 5 minutes func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error { var ( - err error - disksAttached = true detachTimeout = 5 * time.Minute detachPollTime = 10 * time.Second ) - err = wait.Poll(detachPollTime, detachTimeout, func() (bool, error) { + waitErr := wait.Poll(detachPollTime, detachTimeout, func() (bool, error) { attachedResult, err := disksAreAttached(nodeVolumes) if err != nil { return false, err @@ -82,20 +80,19 @@ func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error { for nodeName, nodeVolumes := range attachedResult { for volumePath, attached := range nodeVolumes { if attached { - framework.Logf("Waiting for volumes %q to detach from %q.", volumePath, string(nodeName)) + framework.Logf("Volume %q is still attached to %q.", volumePath, string(nodeName)) return false, nil } } } - disksAttached = false framework.Logf("Volume are successfully detached from all the nodes: %+v", nodeVolumes) return true, nil }) - if err != nil { - return err - } - if disksAttached { - return fmt.Errorf("Gave up waiting for volumes to detach after %v", detachTimeout) + if waitErr != nil { + if waitErr == wait.ErrWaitTimeout { + return fmt.Errorf("volumes have not detached after %v: %v", detachTimeout, waitErr) + } + return fmt.Errorf("error waiting for volumes to detach: %v", waitErr) } return nil } @@ -103,8 +100,6 @@ func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error { // Wait until vsphere vmdk moves to expected state on the given node, or time out after 6 minutes func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState volumeState) error { var ( - err error - diskAttached bool currentState volumeState timeout = 6 * time.Minute pollTime = 10 * time.Second @@ -120,8 +115,8 @@ func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState volumeStateDetached: "detached from", } - err = wait.Poll(pollTime, timeout, func() (bool, error) { - diskAttached, err = diskIsAttached(volumePath, nodeName) + waitErr := wait.Poll(pollTime, timeout, func() (bool, error) { + diskAttached, err := diskIsAttached(volumePath, nodeName) if err != nil { return true, err } @@ -134,14 +129,13 @@ func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState framework.Logf("Waiting for Volume %q to be %s %q.", volumePath, attachedStateMsg[expectedState], nodeName) return false, nil }) - if err != nil { - return err + if waitErr != nil { + if waitErr == wait.ErrWaitTimeout { + return fmt.Errorf("volume %q is not %s %q after %v: %v", volumePath, attachedStateMsg[expectedState], nodeName, timeout, waitErr) + } + return fmt.Errorf("error waiting for volume %q to be %s %q: %v", volumePath, attachedStateMsg[expectedState], nodeName, waitErr) } - - if currentState != expectedState { - err = fmt.Errorf("Gave up waiting for Volume %q to be %s %q after %v", volumePath, attachedStateMsg[expectedState], nodeName, timeout) - } - return err + return nil } // Wait until vsphere vmdk is attached from the given node or time out after 6 minutes diff --git a/test/e2e/storage/vsphere/vsphere_volume_node_poweroff.go b/test/e2e/storage/vsphere/vsphere_volume_node_poweroff.go index 5a477fb06dc..b69617210d5 100644 --- a/test/e2e/storage/vsphere/vsphere_volume_node_poweroff.go +++ b/test/e2e/storage/vsphere/vsphere_volume_node_poweroff.go @@ -147,20 +147,18 @@ var _ = utils.SIGDescribe("Node Poweroff [Feature:vsphere] [Slow] [Disruptive]", // Wait until the pod failed over to a different node, or time out after 3 minutes func waitForPodToFailover(client clientset.Interface, deployment *appsv1.Deployment, oldNode string) (string, error) { var ( - err error - newNode string timeout = 3 * time.Minute pollTime = 10 * time.Second ) - err = wait.Poll(pollTime, timeout, func() (bool, error) { - newNode, err = getNodeForDeployment(client, deployment) + waitErr := wait.Poll(pollTime, timeout, func() (bool, error) { + currentNode, err := getNodeForDeployment(client, deployment) if err != nil { return true, err } - if newNode != oldNode { - framework.Logf("The pod has been failed over from %q to %q", oldNode, newNode) + if currentNode != oldNode { + framework.Logf("The pod has been failed over from %q to %q", oldNode, currentNode) return true, nil } @@ -168,12 +166,11 @@ func waitForPodToFailover(client clientset.Interface, deployment *appsv1.Deploym return false, nil }) - if err != nil { - if err == wait.ErrWaitTimeout { - framework.Logf("Time out after waiting for %v", timeout) + if waitErr != nil { + if waitErr == wait.ErrWaitTimeout { + return "", fmt.Errorf("pod has not failed over after %v: %v", timeout, waitErr) } - framework.Logf("Pod did not fail over from %q with error: %v", oldNode, err) - return "", err + return "", fmt.Errorf("pod did not fail over from %q: %v", oldNode, waitErr) } return getNodeForDeployment(client, deployment)