storage e2e: Add context to timeout errors

Many times an e2e test fails with an unexpected error, "timed out waiting for the condition". Useful information may be in the test logs, but debugging e2e test failures will be much faster if we add context to errors when they happen. This change makes sure we add context to all errors returned from helpers like wait.Poll().
2025-07-19 18:02:01 +00:00 · 2020-01-10 19:08:46 -08:00 · 2020-01-10 19:08:46 -08:00 · 99a015705b
commit 99a015705b
parent ace98e1382
15 changed files with 164 additions and 100 deletions
--- a/test/e2e/storage/csi_mock_volume.go
+++ b/test/e2e/storage/csi_mock_volume.go
@ -610,7 +610,10 @@ func waitForMaxVolumeCondition(pod *v1.Pod, cs clientset.Interface) error {
 		}
 		return false, nil
 	})
-	return waitErr
+	if waitErr != nil {
+		return fmt.Errorf("error waiting for pod %s/%s to have max volume condition: %v", pod.Namespace, pod.Name, waitErr)
+	}
+	return nil
 }

 func checkCSINodeForLimits(nodeName string, driverName string, cs clientset.Interface) (int32, error) {
@ -627,7 +630,10 @@ func checkCSINodeForLimits(nodeName string, driverName string, cs clientset.Inte
 		}
 		return false, nil
 	})
-	return attachLimit, waitErr
+	if waitErr != nil {
+		return 0, fmt.Errorf("error waiting for non-zero volume limit of driver %s on node %s: %v", driverName, nodeName, waitErr)
+	}
+	return attachLimit, nil
 }

 func startPausePod(cs clientset.Interface, t testsuites.StorageClassTest, node e2epod.NodeSelection, ns string) (*storagev1.StorageClass, *v1.PersistentVolumeClaim, *v1.Pod) {
--- a/test/e2e/storage/detach_mounted.go
+++ b/test/e2e/storage/detach_mounted.go
@ -130,23 +130,27 @@ func getUniqueVolumeName(pod *v1.Pod, driverName string) string {
 }

 func waitForVolumesNotInUse(client clientset.Interface, nodeName, volumeName string) error {
-	return wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) {
+	waitErr := wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) {
 		node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
 		if err != nil {
 			return false, fmt.Errorf("error fetching node %s with %v", nodeName, err)
 		}
-		volumeInUSe := node.Status.VolumesInUse
-		for _, volume := range volumeInUSe {
+		volumeInUse := node.Status.VolumesInUse
+		for _, volume := range volumeInUse {
 			if string(volume) == volumeName {
 				return false, nil
 			}
 		}
 		return true, nil
 	})
+	if waitErr != nil {
+		return fmt.Errorf("error waiting for volumes to not be in use: %v", waitErr)
+	}
+	return nil
 }

 func waitForVolumesAttached(client clientset.Interface, nodeName, volumeName string) error {
-	return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) {
+	waitErr := wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) {
 		node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
 		if err != nil {
 			return false, fmt.Errorf("error fetching node %s with %v", nodeName, err)
@ -159,22 +163,30 @@ func waitForVolumesAttached(client clientset.Interface, nodeName, volumeName str
 		}
 		return false, nil
 	})
+	if waitErr != nil {
+		return fmt.Errorf("error waiting for volume %v to attach to node %v: %v", volumeName, nodeName, waitErr)
+	}
+	return nil
 }

 func waitForVolumesInUse(client clientset.Interface, nodeName, volumeName string) error {
-	return wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) {
+	waitErr := wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) {
 		node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
 		if err != nil {
 			return false, fmt.Errorf("error fetching node %s with %v", nodeName, err)
 		}
-		volumeInUSe := node.Status.VolumesInUse
-		for _, volume := range volumeInUSe {
+		volumeInUse := node.Status.VolumesInUse
+		for _, volume := range volumeInUse {
 			if string(volume) == volumeName {
 				return true, nil
 			}
 		}
 		return false, nil
 	})
+	if waitErr != nil {
+		return fmt.Errorf("error waiting for volume %v to be in use on node %v: %v", volumeName, nodeName, waitErr)
+	}
+	return nil
 }

 func getFlexVolumePod(volumeSource v1.VolumeSource, nodeName string) *v1.Pod {
--- a/test/e2e/storage/drivers/csi.go
+++ b/test/e2e/storage/drivers/csi.go
@ -490,7 +490,7 @@ func waitForCSIDriverRegistrationOnAllNodes(driverName string, cs clientset.Inte
 func waitForCSIDriverRegistrationOnNode(nodeName string, driverName string, cs clientset.Interface) error {
 	const csiNodeRegisterTimeout = 1 * time.Minute

-	return wait.PollImmediate(10*time.Second, csiNodeRegisterTimeout, func() (bool, error) {
+	waitErr := wait.PollImmediate(10*time.Second, csiNodeRegisterTimeout, func() (bool, error) {
 		csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{})
 		if err != nil && !apierrors.IsNotFound(err) {
 			return false, err
@ -502,4 +502,8 @@ func waitForCSIDriverRegistrationOnNode(nodeName string, driverName string, cs c
 		}
 		return false, nil
 	})
+	if waitErr != nil {
+		return fmt.Errorf("error waiting for CSI driver %s registration on node %s: %v", driverName, nodeName, waitErr)
+	}
+	return nil
 }
--- a/test/e2e/storage/mounted_volume_resize.go
+++ b/test/e2e/storage/mounted_volume_resize.go
@ -17,6 +17,7 @@ limitations under the License.
 package storage

 import (
+	"fmt"
 	"time"

 	"github.com/onsi/ginkgo"
@ -170,6 +171,9 @@ func waitForDeploymentToRecreatePod(client clientset.Interface, deployment *apps
 	var runningPod v1.Pod
 	waitErr := wait.PollImmediate(10*time.Second, 5*time.Minute, func() (bool, error) {
 		podList, err := e2edeploy.GetPodsForDeployment(client, deployment)
+		if err != nil {
+			return false, fmt.Errorf("failed to get pods for deployment: %v", err)
+		}
 		for _, pod := range podList.Items {
 			switch pod.Status.Phase {
 			case v1.PodRunning:
@ -179,7 +183,10 @@ func waitForDeploymentToRecreatePod(client clientset.Interface, deployment *apps
 				return false, conditions.ErrPodCompleted
 			}
 		}
-		return false, err
+		return false, nil
 	})
-	return runningPod, waitErr
+	if waitErr != nil {
+		return runningPod, fmt.Errorf("error waiting for recreated pod: %v", waitErr)
+	}
+	return runningPod, nil
 }
--- a/test/e2e/storage/pd.go
+++ b/test/e2e/storage/pd.go
@ -428,13 +428,13 @@ var _ = utils.SIGDescribe("Pod Disks", func() {
 					}
 					ginkgo.By("evicting host0Pod")
 					err = wait.PollImmediate(framework.Poll, podEvictTimeout, func() (bool, error) {
-						err = cs.CoreV1().Pods(ns).Evict(evictTarget)
-						if err != nil {
+						if err := cs.CoreV1().Pods(ns).Evict(evictTarget); err != nil {
+							framework.Logf("Failed to evict host0Pod, ignoring error: %v", err)
 							return false, nil
 						}
 						return true, nil
 					})
-					framework.ExpectNoError(err, fmt.Sprintf("failed to evict host0Pod after %v", podEvictTimeout))
+					framework.ExpectNoError(err, "failed to evict host0Pod after %v", podEvictTimeout)
 				}

 				ginkgo.By("waiting for pd to detach from host0")
--- a/test/e2e/storage/persistent_volumes-local.go
+++ b/test/e2e/storage/persistent_volumes-local.go
@ -570,7 +570,8 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
 			}()

 			ginkgo.By("Waiting for all pods to complete successfully")
-			err := wait.PollImmediate(time.Second, 5*time.Minute, func() (done bool, err error) {
+			const completeTimeout = 5 * time.Minute
+			waitErr := wait.PollImmediate(time.Second, completeTimeout, func() (done bool, err error) {
 				podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{})
 				if err != nil {
 					return false, err
@ -589,15 +590,15 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
 						delete(pods, pod.Name)
 						numFinished++
 						framework.Logf("%v/%v pods finished", numFinished, totalPods)
-					case v1.PodFailed:
 					case v1.PodUnknown:
 						return false, fmt.Errorf("pod %v is in %v phase", pod.Name, pod.Status.Phase)
+					case v1.PodFailed:
 					}
 				}

 				return numFinished == totalPods, nil
 			})
-			framework.ExpectNoError(err)
+			framework.ExpectNoError(waitErr, "some pods failed to complete within %v", completeTimeout)
 		})
 	})

@ -648,7 +649,8 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
 				pods[pod.Name] = pod
 			}
 			ginkgo.By("Wait for all pods are running")
-			err = wait.PollImmediate(time.Second, 5*time.Minute, func() (done bool, err error) {
+			const runningTimeout = 5 * time.Minute
+			waitErr := wait.PollImmediate(time.Second, runningTimeout, func() (done bool, err error) {
 				podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{})
 				if err != nil {
 					return false, err
@ -662,7 +664,7 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
 				}
 				return runningPods == count, nil
 			})
-			framework.ExpectNoError(err)
+			framework.ExpectNoError(waitErr, "Some pods are not running within %v", runningTimeout)
 		})
 	})
 })
@ -918,17 +920,26 @@ func createLocalPVCsPVs(config *localTestConfig, volumes []*localTestVolume, mod
 			verifyLocalVolume(config, volume)
 		}
 	} else {
-		// Verify PVCs are not bound
+		// Verify PVCs are not bound by waiting for phase==bound with a timeout and asserting that we hit the timeout.
 		// There isn't really a great way to verify this without making the test be slow...
-		err = wait.PollImmediate(time.Second, 10*time.Second, func() (done bool, err error) {
+		const bindTimeout = 10 * time.Second
+		waitErr := wait.PollImmediate(time.Second, bindTimeout, func() (done bool, err error) {
 			for _, volume := range volumes {
 				pvc, err := config.client.CoreV1().PersistentVolumeClaims(volume.pvc.Namespace).Get(volume.pvc.Name, metav1.GetOptions{})
-				framework.ExpectNoError(err)
-				framework.ExpectEqual(pvc.Status.Phase, v1.ClaimPending)
+				if err != nil {
+					return false, fmt.Errorf("failed to get PVC %s/%s: %v", volume.pvc.Namespace, volume.pvc.Name, err)
+				}
+				if pvc.Status.Phase != v1.ClaimPending {
+					return true, nil
+				}
 			}
 			return false, nil
 		})
-		framework.ExpectError(err)
+		if waitErr == wait.ErrWaitTimeout {
+			framework.Logf("PVCs were not bound within %v (that's good)", bindTimeout)
+			waitErr = nil
+		}
+		framework.ExpectNoError(waitErr, "Error making sure PVCs are not bound")
 	}
 }

--- a/test/e2e/storage/regional_pd.go
+++ b/test/e2e/storage/regional_pd.go
@ -252,18 +252,17 @@ func testZonalFailover(c clientset.Interface, ns string) {
 	} else {
 		otherZone = cloudZones[0]
 	}
-	err = wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) {
-		framework.Logf("checking whether new pod is scheduled in zone %q", otherZone)
-		pod = getPod(c, ns, regionalPDLabels)
-		nodeName = pod.Spec.NodeName
-		node, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
+	waitErr := wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) {
+		framework.Logf("Checking whether new pod is scheduled in zone %q", otherZone)
+		pod := getPod(c, ns, regionalPDLabels)
+		node, err := c.CoreV1().Nodes().Get(pod.Spec.NodeName, metav1.GetOptions{})
 		if err != nil {
 			return false, nil
 		}
 		newPodZone := node.Labels[v1.LabelZoneFailureDomain]
 		return newPodZone == otherZone, nil
 	})
-	framework.ExpectNoError(err, "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err)
+	framework.ExpectNoError(waitErr, "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err)

 	err = waitForStatefulSetReplicasReady(statefulSet.Name, ns, c, 3*time.Second, framework.RestartPodReadyAgainTimeout)
 	if err != nil {
--- a/test/e2e/storage/testsuites/subpath.go
+++ b/test/e2e/storage/testsuites/subpath.go
@ -759,7 +759,7 @@ func waitForPodSubpathError(f *framework.Framework, pod *v1.Pod, allowContainerT
 		return fmt.Errorf("failed to find container that uses subpath")
 	}

-	return wait.PollImmediate(framework.Poll, framework.PodStartTimeout, func() (bool, error) {
+	waitErr := wait.PollImmediate(framework.Poll, framework.PodStartTimeout, func() (bool, error) {
 		pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{})
 		if err != nil {
 			return false, err
@ -786,6 +786,10 @@ func waitForPodSubpathError(f *framework.Framework, pod *v1.Pod, allowContainerT
 		}
 		return false, nil
 	})
+	if waitErr != nil {
+		return fmt.Errorf("error waiting for pod subpath error to occur: %v", waitErr)
+	}
+	return nil
 }

 // Tests that the existing subpath mount is detected when a container restarts
--- a/test/e2e/storage/testsuites/volume_expand.go
+++ b/test/e2e/storage/testsuites/volume_expand.go
@ -265,22 +265,32 @@ func ExpandPVCSize(origPVC *v1.PersistentVolumeClaim, size resource.Quantity, c
 	pvcName := origPVC.Name
 	updatedPVC := origPVC.DeepCopy()

+	// Retry the update on error, until we hit a timeout.
+	// TODO: Determine whether "retry with timeout" is appropriate here. Maybe we should only retry on version conflict.
+	var lastUpdateError error
 	waitErr := wait.PollImmediate(resizePollInterval, 30*time.Second, func() (bool, error) {
 		var err error
 		updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Get(pvcName, metav1.GetOptions{})
 		if err != nil {
-			return false, fmt.Errorf("error fetching pvc %q for resizing with %v", pvcName, err)
+			return false, fmt.Errorf("error fetching pvc %q for resizing: %v", pvcName, err)
 		}

 		updatedPVC.Spec.Resources.Requests[v1.ResourceStorage] = size
 		updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Update(updatedPVC)
-		if err == nil {
-			return true, nil
+		if err != nil {
+			framework.Logf("Error updating pvc %s: %v", pvcName, err)
+			lastUpdateError = err
+			return false, nil
 		}
-		framework.Logf("Error updating pvc %s with %v", pvcName, err)
-		return false, nil
+		return true, nil
 	})
-	return updatedPVC, waitErr
+	if waitErr == wait.ErrWaitTimeout {
+		return nil, fmt.Errorf("timed out attempting to update PVC size. last update error: %v", lastUpdateError)
+	}
+	if waitErr != nil {
+		return nil, fmt.Errorf("failed to expand PVC size (check logs for error): %v", waitErr)
+	}
+	return updatedPVC, nil
 }

 // WaitForResizingCondition waits for the pvc condition to be PersistentVolumeClaimResizing
@ -290,7 +300,7 @@ func WaitForResizingCondition(pvc *v1.PersistentVolumeClaim, c clientset.Interfa
 		updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pvc.Name, metav1.GetOptions{})

 		if err != nil {
-			return false, fmt.Errorf("error fetching pvc %q for checking for resize status : %v", pvc.Name, err)
+			return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %v", pvc.Name, err)
 		}

 		pvcConditions := updatedPVC.Status.Conditions
@ -301,13 +311,16 @@ func WaitForResizingCondition(pvc *v1.PersistentVolumeClaim, c clientset.Interfa
 		}
 		return false, nil
 	})
-	return waitErr
+	if waitErr != nil {
+		return fmt.Errorf("error waiting for pvc %q to have resize status: %v", pvc.Name, waitErr)
+	}
+	return nil
 }

 // WaitForControllerVolumeResize waits for the controller resize to be finished
 func WaitForControllerVolumeResize(pvc *v1.PersistentVolumeClaim, c clientset.Interface, duration time.Duration) error {
 	pvName := pvc.Spec.VolumeName
-	return wait.PollImmediate(resizePollInterval, duration, func() (bool, error) {
+	waitErr := wait.PollImmediate(resizePollInterval, duration, func() (bool, error) {
 		pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage]

 		pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
@ -323,6 +336,10 @@ func WaitForControllerVolumeResize(pvc *v1.PersistentVolumeClaim, c clientset.In
 		}
 		return false, nil
 	})
+	if waitErr != nil {
+		return fmt.Errorf("error while waiting for controller resize to finish: %v", waitErr)
+	}
+	return nil
 }

 // WaitForPendingFSResizeCondition waits for pvc to have resize condition
@ -347,7 +364,10 @@ func WaitForPendingFSResizeCondition(pvc *v1.PersistentVolumeClaim, c clientset.
 		}
 		return false, nil
 	})
-	return updatedPVC, waitErr
+	if waitErr != nil {
+		return nil, fmt.Errorf("error waiting for pvc %q to have filesystem resize status: %v", pvc.Name, waitErr)
+	}
+	return updatedPVC, nil
 }

 // WaitForFSResize waits for the filesystem in the pv to be resized
@ -370,5 +390,8 @@ func WaitForFSResize(pvc *v1.PersistentVolumeClaim, c clientset.Interface) (*v1.
 		}
 		return false, nil
 	})
-	return updatedPVC, waitErr
+	if waitErr != nil {
+		return nil, fmt.Errorf("error waiting for pvc %q filesystem resize to finish: %v", pvc.Name, waitErr)
+	}
+	return updatedPVC, nil
 }
--- a/test/e2e/storage/testsuites/volumelimits.go
+++ b/test/e2e/storage/testsuites/volumelimits.go
@ -178,7 +178,7 @@ func (t *volumeLimitsTestSuite) DefineTests(driver TestDriver, pattern testpatte
 		framework.ExpectNoError(err)

 		ginkgo.By("Waiting for all PVCs to get Bound")
-		l.pvNames, err = waitForAllPVCsPhase(l.cs, testSlowMultiplier*e2epv.PVBindingTimeout, l.pvcs)
+		l.pvNames, err = waitForAllPVCsBound(l.cs, testSlowMultiplier*e2epv.PVBindingTimeout, l.pvcs)
 		framework.ExpectNoError(err)

 		ginkgo.By("Waiting for the pod Running")
@ -268,7 +268,8 @@ func cleanupTest(cs clientset.Interface, ns string, runningPodName, unschedulabl
 	return nil
 }

-func waitForAllPVCsPhase(cs clientset.Interface, timeout time.Duration, pvcs []*v1.PersistentVolumeClaim) (sets.String, error) {
+// waitForAllPVCsBound waits until the given PVCs are all bound. It then returns the bound PVC names as a set.
+func waitForAllPVCsBound(cs clientset.Interface, timeout time.Duration, pvcs []*v1.PersistentVolumeClaim) (sets.String, error) {
 	pvNames := sets.NewString()
 	err := wait.Poll(5*time.Second, timeout, func() (bool, error) {
 		unbound := 0
@ -289,7 +290,10 @@ func waitForAllPVCsPhase(cs clientset.Interface, timeout time.Duration, pvcs []*
 		}
 		return true, nil
 	})
-	return pvNames, err
+	if err != nil {
+		return nil, fmt.Errorf("error waiting for all PVCs to be bound: %v", err)
+	}
+	return pvNames, nil
 }

 func getNodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) {
@ -327,7 +331,7 @@ func getInTreeNodeLimits(cs clientset.Interface, nodeName string, driverInfo *Dr
 }

 func getCSINodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) {
-	// Wait in a loop, the driver might just have been installed and kubelet takes a while to publish everything.
+	// Retry with a timeout, the driver might just have been installed and kubelet takes a while to publish everything.
 	var limit int
 	err := wait.PollImmediate(2*time.Second, csiNodeInfoTimeout, func() (bool, error) {
 		csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{})
@ -355,5 +359,8 @@ func getCSINodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName st
 		limit = int(*csiDriver.Allocatable.Count)
 		return true, nil
 	})
-	return limit, err
+	if err != nil {
+		return 0, fmt.Errorf("could not get CSINode limit for driver %s: %v", driverInfo.Name, err)
+	}
+	return limit, nil
 }
--- a/test/e2e/storage/utils/utils.go
+++ b/test/e2e/storage/utils/utils.go
@ -583,7 +583,7 @@ func PrivilegedTestPSPClusterRoleBinding(client clientset.Interface,
 			_, err := roleBindingClient.Get(binding.GetName(), metav1.GetOptions{})
 			return apierrors.IsNotFound(err), nil
 		})
-		framework.ExpectNoError(err, "Timed out waiting for deletion: %v", err)
+		framework.ExpectNoError(err, "Timed out waiting for RBAC binding %s deletion: %v", binding.GetName(), err)

 		if teardown {
 			continue
--- a/test/e2e/storage/volume_metrics.go
+++ b/test/e2e/storage/volume_metrics.go
@ -226,6 +226,8 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
 			kubeletmetrics.VolumeStatsInodesFreeKey,
 			kubeletmetrics.VolumeStatsInodesUsedKey,
 		}
+		key := volumeStatKeys[0]
+		kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key)
 		// Poll kubelet metrics waiting for the volume to be picked up
 		// by the volume stats collector
 		var kubeMetrics metrics.KubeletMetrics
@ -238,14 +240,12 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
 				framework.Logf("Error fetching kubelet metrics")
 				return false, err
 			}
-			key := volumeStatKeys[0]
-			kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key)
 			if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) {
 				return false, nil
 			}
 			return true, nil
 		})
-		framework.ExpectNoError(waitErr, "Error finding volume metrics : %v", waitErr)
+		framework.ExpectNoError(waitErr, "Unable to find metric %s for PVC %s/%s", kubeletKeyName, pvc.Namespace, pvc.Name)

 		for _, key := range volumeStatKeys {
 			kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key)
@ -566,7 +566,7 @@ func waitForDetachAndGrabMetrics(oldMetrics *storageControllerMetrics, metricsGr
 	}

 	waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
-	framework.ExpectNoError(waitErr, "Timeout error fetching storage c-m metrics : %v", waitErr)
+	framework.ExpectNoError(waitErr, "Unable to get updated metrics for plugin %s", pluginName)
 	return updatedStorageMetrics
 }

@ -697,8 +697,7 @@ func waitForPVControllerSync(metricsGrabber *metrics.Grabber, metricName, dimens
 		return len(getPVControllerMetrics(updatedMetrics, metricName, dimension)) > 0, nil
 	}
 	waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
-	framework.ExpectNoError(waitErr,
-		"Timeout error fetching pv controller metrics : %v", waitErr)
+	framework.ExpectNoError(waitErr, "Unable to get pv controller metrics")
 }

 func getPVControllerMetrics(ms metrics.ControllerManagerMetrics, metricName, dimension string) map[string]int64 {
@ -787,5 +786,5 @@ func waitForADControllerStatesMetrics(metricsGrabber *metrics.Grabber, metricNam
 		return true, nil
 	}
 	waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
-	framework.ExpectNoError(waitErr, "Timeout error fetching A/D controller metrics : %v", waitErr)
+	framework.ExpectNoError(waitErr, "Unable to get A/D controller metrics")
 }
--- a/test/e2e/storage/volume_provisioning.go
+++ b/test/e2e/storage/volume_provisioning.go
@ -513,18 +513,10 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {

 			ginkgo.By(fmt.Sprintf("Checking for residual PersistentVolumes associated with StorageClass %s", class.Name))
 			residualPVs, err = waitForProvisionedVolumesDeleted(c, class.Name)
-			framework.ExpectNoError(err)
 			// Cleanup the test resources before breaking
 			defer deleteProvisionedVolumesAndDisks(c, residualPVs)
+			framework.ExpectNoError(err, "PersistentVolumes were not deleted as expected. %d remain", len(residualPVs))

-			// Report indicators of regression
-			if len(residualPVs) > 0 {
-				framework.Logf("Remaining PersistentVolumes:")
-				for i, pv := range residualPVs {
-					framework.Logf("\t%d) %s", i+1, pv.Name)
-				}
-				framework.Failf("Expected 0 PersistentVolumes remaining. Found %d", len(residualPVs))
-			}
 			framework.Logf("0 PersistentVolumes remain.")
 		})

@ -821,7 +813,9 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {
 			// ClaimProvisionTimeout in the very same loop.
 			err = wait.Poll(time.Second, framework.ClaimProvisionTimeout, func() (bool, error) {
 				events, err := c.CoreV1().Events(claim.Namespace).List(metav1.ListOptions{})
-				framework.ExpectNoError(err)
+				if err != nil {
+					return false, fmt.Errorf("could not list PVC events in %s: %v", claim.Namespace, err)
+				}
 				for _, event := range events.Items {
 					if strings.Contains(event.Message, "failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key") {
 						return true, nil
@ -843,7 +837,7 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {
 				framework.Logf("The test missed event about failed provisioning, but checked that no volume was provisioned for %v", framework.ClaimProvisionTimeout)
 				err = nil
 			}
-			framework.ExpectNoError(err)
+			framework.ExpectNoError(err, "Error waiting for PVC to fail provisioning: %v", err)
 		})
 	})
 })
@ -1033,7 +1027,10 @@ func waitForProvisionedVolumesDeleted(c clientset.Interface, scName string) ([]*
 		}
 		return true, nil // No PVs remain
 	})
-	return remainingPVs, err
+	if err != nil {
+		return remainingPVs, fmt.Errorf("Error waiting for PVs to be deleted: %v", err)
+	}
+	return nil, nil
 }

 // deleteStorageClass deletes the passed in StorageClass and catches errors other than "Not Found"
@ -1046,6 +1043,10 @@ func deleteStorageClass(c clientset.Interface, className string) {

 // deleteProvisionedVolumes [gce||gke only]  iteratively deletes persistent volumes and attached GCE PDs.
 func deleteProvisionedVolumesAndDisks(c clientset.Interface, pvs []*v1.PersistentVolume) {
+	framework.Logf("Remaining PersistentVolumes:")
+	for i, pv := range pvs {
+		framework.Logf("\t%d) %s", i+1, pv.Name)
+	}
 	for _, pv := range pvs {
 		framework.ExpectNoError(e2epv.DeletePDWithRetry(pv.Spec.PersistentVolumeSource.GCEPersistentDisk.PDName))
 		framework.ExpectNoError(e2epv.DeletePersistentVolume(c, pv.Name))
--- a/test/e2e/storage/vsphere/vsphere_utils.go
+++ b/test/e2e/storage/vsphere/vsphere_utils.go
@ -69,12 +69,10 @@ const (
 // Wait until vsphere volumes are detached from the list of nodes or time out after 5 minutes
 func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error {
 	var (
-		err            error
-		disksAttached  = true
 		detachTimeout  = 5 * time.Minute
 		detachPollTime = 10 * time.Second
 	)
-	err = wait.Poll(detachPollTime, detachTimeout, func() (bool, error) {
+	waitErr := wait.Poll(detachPollTime, detachTimeout, func() (bool, error) {
 		attachedResult, err := disksAreAttached(nodeVolumes)
 		if err != nil {
 			return false, err
@ -82,20 +80,19 @@ func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error {
 		for nodeName, nodeVolumes := range attachedResult {
 			for volumePath, attached := range nodeVolumes {
 				if attached {
-					framework.Logf("Waiting for volumes %q to detach from %q.", volumePath, string(nodeName))
+					framework.Logf("Volume %q is still attached to %q.", volumePath, string(nodeName))
 					return false, nil
 				}
 			}
 		}
-		disksAttached = false
 		framework.Logf("Volume are successfully detached from all the nodes: %+v", nodeVolumes)
 		return true, nil
 	})
-	if err != nil {
-		return err
-	}
-	if disksAttached {
-		return fmt.Errorf("Gave up waiting for volumes to detach after %v", detachTimeout)
+	if waitErr != nil {
+		if waitErr == wait.ErrWaitTimeout {
+			return fmt.Errorf("volumes have not detached after %v: %v", detachTimeout, waitErr)
+		}
+		return fmt.Errorf("error waiting for volumes to detach: %v", waitErr)
 	}
 	return nil
 }
@ -103,8 +100,6 @@ func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error {
 // Wait until vsphere vmdk moves to expected state on the given node, or time out after 6 minutes
 func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState volumeState) error {
 	var (
-		err          error
-		diskAttached bool
 		currentState volumeState
 		timeout      = 6 * time.Minute
 		pollTime     = 10 * time.Second
@ -120,8 +115,8 @@ func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState
 		volumeStateDetached: "detached from",
 	}

-	err = wait.Poll(pollTime, timeout, func() (bool, error) {
-		diskAttached, err = diskIsAttached(volumePath, nodeName)
+	waitErr := wait.Poll(pollTime, timeout, func() (bool, error) {
+		diskAttached, err := diskIsAttached(volumePath, nodeName)
 		if err != nil {
 			return true, err
 		}
@ -134,14 +129,13 @@ func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState
 		framework.Logf("Waiting for Volume %q to be %s %q.", volumePath, attachedStateMsg[expectedState], nodeName)
 		return false, nil
 	})
-	if err != nil {
-		return err
+	if waitErr != nil {
+		if waitErr == wait.ErrWaitTimeout {
+			return fmt.Errorf("volume %q is not %s %q after %v: %v", volumePath, attachedStateMsg[expectedState], nodeName, timeout, waitErr)
+		}
+		return fmt.Errorf("error waiting for volume %q to be %s %q: %v", volumePath, attachedStateMsg[expectedState], nodeName, waitErr)
 	}
-
-	if currentState != expectedState {
-		err = fmt.Errorf("Gave up waiting for Volume %q to be %s %q after %v", volumePath, attachedStateMsg[expectedState], nodeName, timeout)
-	}
-	return err
+	return nil
 }

 // Wait until vsphere vmdk is attached from the given node or time out after 6 minutes
--- a/test/e2e/storage/vsphere/vsphere_volume_node_poweroff.go
+++ b/test/e2e/storage/vsphere/vsphere_volume_node_poweroff.go
@ -147,20 +147,18 @@ var _ = utils.SIGDescribe("Node Poweroff [Feature:vsphere] [Slow] [Disruptive]",
 // Wait until the pod failed over to a different node, or time out after 3 minutes
 func waitForPodToFailover(client clientset.Interface, deployment *appsv1.Deployment, oldNode string) (string, error) {
 	var (
-		err      error
-		newNode  string
 		timeout  = 3 * time.Minute
 		pollTime = 10 * time.Second
 	)

-	err = wait.Poll(pollTime, timeout, func() (bool, error) {
-		newNode, err = getNodeForDeployment(client, deployment)
+	waitErr := wait.Poll(pollTime, timeout, func() (bool, error) {
+		currentNode, err := getNodeForDeployment(client, deployment)
 		if err != nil {
 			return true, err
 		}

-		if newNode != oldNode {
-			framework.Logf("The pod has been failed over from %q to %q", oldNode, newNode)
+		if currentNode != oldNode {
+			framework.Logf("The pod has been failed over from %q to %q", oldNode, currentNode)
 			return true, nil
 		}

@ -168,12 +166,11 @@ func waitForPodToFailover(client clientset.Interface, deployment *appsv1.Deploym
 		return false, nil
 	})

-	if err != nil {
-		if err == wait.ErrWaitTimeout {
-			framework.Logf("Time out after waiting for %v", timeout)
+	if waitErr != nil {
+		if waitErr == wait.ErrWaitTimeout {
+			return "", fmt.Errorf("pod has not failed over after %v: %v", timeout, waitErr)
 		}
-		framework.Logf("Pod did not fail over from %q with error: %v", oldNode, err)
-		return "", err
+		return "", fmt.Errorf("pod did not fail over from %q: %v", oldNode, waitErr)
 	}

 	return getNodeForDeployment(client, deployment)