storage e2e: Add context to timeout errors

Many times an e2e test fails with an unexpected error,
"timed out waiting for the condition".

Useful information may be in the test logs, but debugging e2e test
failures will be much faster if we add context to errors when they
happen.

This change makes sure we add context to all errors returned from
helpers like wait.Poll().
This commit is contained in:
Jonathan Basseri 2020-01-10 19:08:46 -08:00
parent ace98e1382
commit 99a015705b
15 changed files with 164 additions and 100 deletions

View File

@ -610,7 +610,10 @@ func waitForMaxVolumeCondition(pod *v1.Pod, cs clientset.Interface) error {
} }
return false, nil return false, nil
}) })
return waitErr if waitErr != nil {
return fmt.Errorf("error waiting for pod %s/%s to have max volume condition: %v", pod.Namespace, pod.Name, waitErr)
}
return nil
} }
func checkCSINodeForLimits(nodeName string, driverName string, cs clientset.Interface) (int32, error) { func checkCSINodeForLimits(nodeName string, driverName string, cs clientset.Interface) (int32, error) {
@ -627,7 +630,10 @@ func checkCSINodeForLimits(nodeName string, driverName string, cs clientset.Inte
} }
return false, nil return false, nil
}) })
return attachLimit, waitErr if waitErr != nil {
return 0, fmt.Errorf("error waiting for non-zero volume limit of driver %s on node %s: %v", driverName, nodeName, waitErr)
}
return attachLimit, nil
} }
func startPausePod(cs clientset.Interface, t testsuites.StorageClassTest, node e2epod.NodeSelection, ns string) (*storagev1.StorageClass, *v1.PersistentVolumeClaim, *v1.Pod) { func startPausePod(cs clientset.Interface, t testsuites.StorageClassTest, node e2epod.NodeSelection, ns string) (*storagev1.StorageClass, *v1.PersistentVolumeClaim, *v1.Pod) {

View File

@ -130,23 +130,27 @@ func getUniqueVolumeName(pod *v1.Pod, driverName string) string {
} }
func waitForVolumesNotInUse(client clientset.Interface, nodeName, volumeName string) error { func waitForVolumesNotInUse(client clientset.Interface, nodeName, volumeName string) error {
return wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) { waitErr := wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) {
node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if err != nil { if err != nil {
return false, fmt.Errorf("error fetching node %s with %v", nodeName, err) return false, fmt.Errorf("error fetching node %s with %v", nodeName, err)
} }
volumeInUSe := node.Status.VolumesInUse volumeInUse := node.Status.VolumesInUse
for _, volume := range volumeInUSe { for _, volume := range volumeInUse {
if string(volume) == volumeName { if string(volume) == volumeName {
return false, nil return false, nil
} }
} }
return true, nil return true, nil
}) })
if waitErr != nil {
return fmt.Errorf("error waiting for volumes to not be in use: %v", waitErr)
}
return nil
} }
func waitForVolumesAttached(client clientset.Interface, nodeName, volumeName string) error { func waitForVolumesAttached(client clientset.Interface, nodeName, volumeName string) error {
return wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) { waitErr := wait.PollImmediate(2*time.Second, 2*time.Minute, func() (bool, error) {
node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if err != nil { if err != nil {
return false, fmt.Errorf("error fetching node %s with %v", nodeName, err) return false, fmt.Errorf("error fetching node %s with %v", nodeName, err)
@ -159,22 +163,30 @@ func waitForVolumesAttached(client clientset.Interface, nodeName, volumeName str
} }
return false, nil return false, nil
}) })
if waitErr != nil {
return fmt.Errorf("error waiting for volume %v to attach to node %v: %v", volumeName, nodeName, waitErr)
}
return nil
} }
func waitForVolumesInUse(client clientset.Interface, nodeName, volumeName string) error { func waitForVolumesInUse(client clientset.Interface, nodeName, volumeName string) error {
return wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) { waitErr := wait.PollImmediate(10*time.Second, 60*time.Second, func() (bool, error) {
node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) node, err := client.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if err != nil { if err != nil {
return false, fmt.Errorf("error fetching node %s with %v", nodeName, err) return false, fmt.Errorf("error fetching node %s with %v", nodeName, err)
} }
volumeInUSe := node.Status.VolumesInUse volumeInUse := node.Status.VolumesInUse
for _, volume := range volumeInUSe { for _, volume := range volumeInUse {
if string(volume) == volumeName { if string(volume) == volumeName {
return true, nil return true, nil
} }
} }
return false, nil return false, nil
}) })
if waitErr != nil {
return fmt.Errorf("error waiting for volume %v to be in use on node %v: %v", volumeName, nodeName, waitErr)
}
return nil
} }
func getFlexVolumePod(volumeSource v1.VolumeSource, nodeName string) *v1.Pod { func getFlexVolumePod(volumeSource v1.VolumeSource, nodeName string) *v1.Pod {

View File

@ -490,7 +490,7 @@ func waitForCSIDriverRegistrationOnAllNodes(driverName string, cs clientset.Inte
func waitForCSIDriverRegistrationOnNode(nodeName string, driverName string, cs clientset.Interface) error { func waitForCSIDriverRegistrationOnNode(nodeName string, driverName string, cs clientset.Interface) error {
const csiNodeRegisterTimeout = 1 * time.Minute const csiNodeRegisterTimeout = 1 * time.Minute
return wait.PollImmediate(10*time.Second, csiNodeRegisterTimeout, func() (bool, error) { waitErr := wait.PollImmediate(10*time.Second, csiNodeRegisterTimeout, func() (bool, error) {
csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{}) csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{})
if err != nil && !apierrors.IsNotFound(err) { if err != nil && !apierrors.IsNotFound(err) {
return false, err return false, err
@ -502,4 +502,8 @@ func waitForCSIDriverRegistrationOnNode(nodeName string, driverName string, cs c
} }
return false, nil return false, nil
}) })
if waitErr != nil {
return fmt.Errorf("error waiting for CSI driver %s registration on node %s: %v", driverName, nodeName, waitErr)
}
return nil
} }

View File

@ -17,6 +17,7 @@ limitations under the License.
package storage package storage
import ( import (
"fmt"
"time" "time"
"github.com/onsi/ginkgo" "github.com/onsi/ginkgo"
@ -170,6 +171,9 @@ func waitForDeploymentToRecreatePod(client clientset.Interface, deployment *apps
var runningPod v1.Pod var runningPod v1.Pod
waitErr := wait.PollImmediate(10*time.Second, 5*time.Minute, func() (bool, error) { waitErr := wait.PollImmediate(10*time.Second, 5*time.Minute, func() (bool, error) {
podList, err := e2edeploy.GetPodsForDeployment(client, deployment) podList, err := e2edeploy.GetPodsForDeployment(client, deployment)
if err != nil {
return false, fmt.Errorf("failed to get pods for deployment: %v", err)
}
for _, pod := range podList.Items { for _, pod := range podList.Items {
switch pod.Status.Phase { switch pod.Status.Phase {
case v1.PodRunning: case v1.PodRunning:
@ -179,7 +183,10 @@ func waitForDeploymentToRecreatePod(client clientset.Interface, deployment *apps
return false, conditions.ErrPodCompleted return false, conditions.ErrPodCompleted
} }
} }
return false, err return false, nil
}) })
return runningPod, waitErr if waitErr != nil {
return runningPod, fmt.Errorf("error waiting for recreated pod: %v", waitErr)
}
return runningPod, nil
} }

View File

@ -428,13 +428,13 @@ var _ = utils.SIGDescribe("Pod Disks", func() {
} }
ginkgo.By("evicting host0Pod") ginkgo.By("evicting host0Pod")
err = wait.PollImmediate(framework.Poll, podEvictTimeout, func() (bool, error) { err = wait.PollImmediate(framework.Poll, podEvictTimeout, func() (bool, error) {
err = cs.CoreV1().Pods(ns).Evict(evictTarget) if err := cs.CoreV1().Pods(ns).Evict(evictTarget); err != nil {
if err != nil { framework.Logf("Failed to evict host0Pod, ignoring error: %v", err)
return false, nil return false, nil
} }
return true, nil return true, nil
}) })
framework.ExpectNoError(err, fmt.Sprintf("failed to evict host0Pod after %v", podEvictTimeout)) framework.ExpectNoError(err, "failed to evict host0Pod after %v", podEvictTimeout)
} }
ginkgo.By("waiting for pd to detach from host0") ginkgo.By("waiting for pd to detach from host0")

View File

@ -570,7 +570,8 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
}() }()
ginkgo.By("Waiting for all pods to complete successfully") ginkgo.By("Waiting for all pods to complete successfully")
err := wait.PollImmediate(time.Second, 5*time.Minute, func() (done bool, err error) { const completeTimeout = 5 * time.Minute
waitErr := wait.PollImmediate(time.Second, completeTimeout, func() (done bool, err error) {
podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{}) podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{})
if err != nil { if err != nil {
return false, err return false, err
@ -589,15 +590,15 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
delete(pods, pod.Name) delete(pods, pod.Name)
numFinished++ numFinished++
framework.Logf("%v/%v pods finished", numFinished, totalPods) framework.Logf("%v/%v pods finished", numFinished, totalPods)
case v1.PodFailed:
case v1.PodUnknown: case v1.PodUnknown:
return false, fmt.Errorf("pod %v is in %v phase", pod.Name, pod.Status.Phase) return false, fmt.Errorf("pod %v is in %v phase", pod.Name, pod.Status.Phase)
case v1.PodFailed:
} }
} }
return numFinished == totalPods, nil return numFinished == totalPods, nil
}) })
framework.ExpectNoError(err) framework.ExpectNoError(waitErr, "some pods failed to complete within %v", completeTimeout)
}) })
}) })
@ -648,7 +649,8 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
pods[pod.Name] = pod pods[pod.Name] = pod
} }
ginkgo.By("Wait for all pods are running") ginkgo.By("Wait for all pods are running")
err = wait.PollImmediate(time.Second, 5*time.Minute, func() (done bool, err error) { const runningTimeout = 5 * time.Minute
waitErr := wait.PollImmediate(time.Second, runningTimeout, func() (done bool, err error) {
podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{}) podsList, err := config.client.CoreV1().Pods(config.ns).List(metav1.ListOptions{})
if err != nil { if err != nil {
return false, err return false, err
@ -662,7 +664,7 @@ var _ = utils.SIGDescribe("PersistentVolumes-local ", func() {
} }
return runningPods == count, nil return runningPods == count, nil
}) })
framework.ExpectNoError(err) framework.ExpectNoError(waitErr, "Some pods are not running within %v", runningTimeout)
}) })
}) })
}) })
@ -918,17 +920,26 @@ func createLocalPVCsPVs(config *localTestConfig, volumes []*localTestVolume, mod
verifyLocalVolume(config, volume) verifyLocalVolume(config, volume)
} }
} else { } else {
// Verify PVCs are not bound // Verify PVCs are not bound by waiting for phase==bound with a timeout and asserting that we hit the timeout.
// There isn't really a great way to verify this without making the test be slow... // There isn't really a great way to verify this without making the test be slow...
err = wait.PollImmediate(time.Second, 10*time.Second, func() (done bool, err error) { const bindTimeout = 10 * time.Second
waitErr := wait.PollImmediate(time.Second, bindTimeout, func() (done bool, err error) {
for _, volume := range volumes { for _, volume := range volumes {
pvc, err := config.client.CoreV1().PersistentVolumeClaims(volume.pvc.Namespace).Get(volume.pvc.Name, metav1.GetOptions{}) pvc, err := config.client.CoreV1().PersistentVolumeClaims(volume.pvc.Namespace).Get(volume.pvc.Name, metav1.GetOptions{})
framework.ExpectNoError(err) if err != nil {
framework.ExpectEqual(pvc.Status.Phase, v1.ClaimPending) return false, fmt.Errorf("failed to get PVC %s/%s: %v", volume.pvc.Namespace, volume.pvc.Name, err)
}
if pvc.Status.Phase != v1.ClaimPending {
return true, nil
}
} }
return false, nil return false, nil
}) })
framework.ExpectError(err) if waitErr == wait.ErrWaitTimeout {
framework.Logf("PVCs were not bound within %v (that's good)", bindTimeout)
waitErr = nil
}
framework.ExpectNoError(waitErr, "Error making sure PVCs are not bound")
} }
} }

View File

@ -252,18 +252,17 @@ func testZonalFailover(c clientset.Interface, ns string) {
} else { } else {
otherZone = cloudZones[0] otherZone = cloudZones[0]
} }
err = wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) { waitErr := wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) {
framework.Logf("checking whether new pod is scheduled in zone %q", otherZone) framework.Logf("Checking whether new pod is scheduled in zone %q", otherZone)
pod = getPod(c, ns, regionalPDLabels) pod := getPod(c, ns, regionalPDLabels)
nodeName = pod.Spec.NodeName node, err := c.CoreV1().Nodes().Get(pod.Spec.NodeName, metav1.GetOptions{})
node, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if err != nil { if err != nil {
return false, nil return false, nil
} }
newPodZone := node.Labels[v1.LabelZoneFailureDomain] newPodZone := node.Labels[v1.LabelZoneFailureDomain]
return newPodZone == otherZone, nil return newPodZone == otherZone, nil
}) })
framework.ExpectNoError(err, "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err) framework.ExpectNoError(waitErr, "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err)
err = waitForStatefulSetReplicasReady(statefulSet.Name, ns, c, 3*time.Second, framework.RestartPodReadyAgainTimeout) err = waitForStatefulSetReplicasReady(statefulSet.Name, ns, c, 3*time.Second, framework.RestartPodReadyAgainTimeout)
if err != nil { if err != nil {

View File

@ -759,7 +759,7 @@ func waitForPodSubpathError(f *framework.Framework, pod *v1.Pod, allowContainerT
return fmt.Errorf("failed to find container that uses subpath") return fmt.Errorf("failed to find container that uses subpath")
} }
return wait.PollImmediate(framework.Poll, framework.PodStartTimeout, func() (bool, error) { waitErr := wait.PollImmediate(framework.Poll, framework.PodStartTimeout, func() (bool, error) {
pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}) pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{})
if err != nil { if err != nil {
return false, err return false, err
@ -786,6 +786,10 @@ func waitForPodSubpathError(f *framework.Framework, pod *v1.Pod, allowContainerT
} }
return false, nil return false, nil
}) })
if waitErr != nil {
return fmt.Errorf("error waiting for pod subpath error to occur: %v", waitErr)
}
return nil
} }
// Tests that the existing subpath mount is detected when a container restarts // Tests that the existing subpath mount is detected when a container restarts

View File

@ -265,22 +265,32 @@ func ExpandPVCSize(origPVC *v1.PersistentVolumeClaim, size resource.Quantity, c
pvcName := origPVC.Name pvcName := origPVC.Name
updatedPVC := origPVC.DeepCopy() updatedPVC := origPVC.DeepCopy()
// Retry the update on error, until we hit a timeout.
// TODO: Determine whether "retry with timeout" is appropriate here. Maybe we should only retry on version conflict.
var lastUpdateError error
waitErr := wait.PollImmediate(resizePollInterval, 30*time.Second, func() (bool, error) { waitErr := wait.PollImmediate(resizePollInterval, 30*time.Second, func() (bool, error) {
var err error var err error
updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Get(pvcName, metav1.GetOptions{}) updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Get(pvcName, metav1.GetOptions{})
if err != nil { if err != nil {
return false, fmt.Errorf("error fetching pvc %q for resizing with %v", pvcName, err) return false, fmt.Errorf("error fetching pvc %q for resizing: %v", pvcName, err)
} }
updatedPVC.Spec.Resources.Requests[v1.ResourceStorage] = size updatedPVC.Spec.Resources.Requests[v1.ResourceStorage] = size
updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Update(updatedPVC) updatedPVC, err = c.CoreV1().PersistentVolumeClaims(origPVC.Namespace).Update(updatedPVC)
if err == nil { if err != nil {
return true, nil framework.Logf("Error updating pvc %s: %v", pvcName, err)
lastUpdateError = err
return false, nil
} }
framework.Logf("Error updating pvc %s with %v", pvcName, err) return true, nil
return false, nil
}) })
return updatedPVC, waitErr if waitErr == wait.ErrWaitTimeout {
return nil, fmt.Errorf("timed out attempting to update PVC size. last update error: %v", lastUpdateError)
}
if waitErr != nil {
return nil, fmt.Errorf("failed to expand PVC size (check logs for error): %v", waitErr)
}
return updatedPVC, nil
} }
// WaitForResizingCondition waits for the pvc condition to be PersistentVolumeClaimResizing // WaitForResizingCondition waits for the pvc condition to be PersistentVolumeClaimResizing
@ -290,7 +300,7 @@ func WaitForResizingCondition(pvc *v1.PersistentVolumeClaim, c clientset.Interfa
updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pvc.Name, metav1.GetOptions{}) updatedPVC, err := c.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(pvc.Name, metav1.GetOptions{})
if err != nil { if err != nil {
return false, fmt.Errorf("error fetching pvc %q for checking for resize status : %v", pvc.Name, err) return false, fmt.Errorf("error fetching pvc %q for checking for resize status: %v", pvc.Name, err)
} }
pvcConditions := updatedPVC.Status.Conditions pvcConditions := updatedPVC.Status.Conditions
@ -301,13 +311,16 @@ func WaitForResizingCondition(pvc *v1.PersistentVolumeClaim, c clientset.Interfa
} }
return false, nil return false, nil
}) })
return waitErr if waitErr != nil {
return fmt.Errorf("error waiting for pvc %q to have resize status: %v", pvc.Name, waitErr)
}
return nil
} }
// WaitForControllerVolumeResize waits for the controller resize to be finished // WaitForControllerVolumeResize waits for the controller resize to be finished
func WaitForControllerVolumeResize(pvc *v1.PersistentVolumeClaim, c clientset.Interface, duration time.Duration) error { func WaitForControllerVolumeResize(pvc *v1.PersistentVolumeClaim, c clientset.Interface, duration time.Duration) error {
pvName := pvc.Spec.VolumeName pvName := pvc.Spec.VolumeName
return wait.PollImmediate(resizePollInterval, duration, func() (bool, error) { waitErr := wait.PollImmediate(resizePollInterval, duration, func() (bool, error) {
pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage] pvcSize := pvc.Spec.Resources.Requests[v1.ResourceStorage]
pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{}) pv, err := c.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
@ -323,6 +336,10 @@ func WaitForControllerVolumeResize(pvc *v1.PersistentVolumeClaim, c clientset.In
} }
return false, nil return false, nil
}) })
if waitErr != nil {
return fmt.Errorf("error while waiting for controller resize to finish: %v", waitErr)
}
return nil
} }
// WaitForPendingFSResizeCondition waits for pvc to have resize condition // WaitForPendingFSResizeCondition waits for pvc to have resize condition
@ -347,7 +364,10 @@ func WaitForPendingFSResizeCondition(pvc *v1.PersistentVolumeClaim, c clientset.
} }
return false, nil return false, nil
}) })
return updatedPVC, waitErr if waitErr != nil {
return nil, fmt.Errorf("error waiting for pvc %q to have filesystem resize status: %v", pvc.Name, waitErr)
}
return updatedPVC, nil
} }
// WaitForFSResize waits for the filesystem in the pv to be resized // WaitForFSResize waits for the filesystem in the pv to be resized
@ -370,5 +390,8 @@ func WaitForFSResize(pvc *v1.PersistentVolumeClaim, c clientset.Interface) (*v1.
} }
return false, nil return false, nil
}) })
return updatedPVC, waitErr if waitErr != nil {
return nil, fmt.Errorf("error waiting for pvc %q filesystem resize to finish: %v", pvc.Name, waitErr)
}
return updatedPVC, nil
} }

View File

@ -178,7 +178,7 @@ func (t *volumeLimitsTestSuite) DefineTests(driver TestDriver, pattern testpatte
framework.ExpectNoError(err) framework.ExpectNoError(err)
ginkgo.By("Waiting for all PVCs to get Bound") ginkgo.By("Waiting for all PVCs to get Bound")
l.pvNames, err = waitForAllPVCsPhase(l.cs, testSlowMultiplier*e2epv.PVBindingTimeout, l.pvcs) l.pvNames, err = waitForAllPVCsBound(l.cs, testSlowMultiplier*e2epv.PVBindingTimeout, l.pvcs)
framework.ExpectNoError(err) framework.ExpectNoError(err)
ginkgo.By("Waiting for the pod Running") ginkgo.By("Waiting for the pod Running")
@ -268,7 +268,8 @@ func cleanupTest(cs clientset.Interface, ns string, runningPodName, unschedulabl
return nil return nil
} }
func waitForAllPVCsPhase(cs clientset.Interface, timeout time.Duration, pvcs []*v1.PersistentVolumeClaim) (sets.String, error) { // waitForAllPVCsBound waits until the given PVCs are all bound. It then returns the bound PVC names as a set.
func waitForAllPVCsBound(cs clientset.Interface, timeout time.Duration, pvcs []*v1.PersistentVolumeClaim) (sets.String, error) {
pvNames := sets.NewString() pvNames := sets.NewString()
err := wait.Poll(5*time.Second, timeout, func() (bool, error) { err := wait.Poll(5*time.Second, timeout, func() (bool, error) {
unbound := 0 unbound := 0
@ -289,7 +290,10 @@ func waitForAllPVCsPhase(cs clientset.Interface, timeout time.Duration, pvcs []*
} }
return true, nil return true, nil
}) })
return pvNames, err if err != nil {
return nil, fmt.Errorf("error waiting for all PVCs to be bound: %v", err)
}
return pvNames, nil
} }
func getNodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) { func getNodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) {
@ -327,7 +331,7 @@ func getInTreeNodeLimits(cs clientset.Interface, nodeName string, driverInfo *Dr
} }
func getCSINodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) { func getCSINodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName string, driverInfo *DriverInfo) (int, error) {
// Wait in a loop, the driver might just have been installed and kubelet takes a while to publish everything. // Retry with a timeout, the driver might just have been installed and kubelet takes a while to publish everything.
var limit int var limit int
err := wait.PollImmediate(2*time.Second, csiNodeInfoTimeout, func() (bool, error) { err := wait.PollImmediate(2*time.Second, csiNodeInfoTimeout, func() (bool, error) {
csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{}) csiNode, err := cs.StorageV1().CSINodes().Get(nodeName, metav1.GetOptions{})
@ -355,5 +359,8 @@ func getCSINodeLimits(cs clientset.Interface, config *PerTestConfig, nodeName st
limit = int(*csiDriver.Allocatable.Count) limit = int(*csiDriver.Allocatable.Count)
return true, nil return true, nil
}) })
return limit, err if err != nil {
return 0, fmt.Errorf("could not get CSINode limit for driver %s: %v", driverInfo.Name, err)
}
return limit, nil
} }

View File

@ -583,7 +583,7 @@ func PrivilegedTestPSPClusterRoleBinding(client clientset.Interface,
_, err := roleBindingClient.Get(binding.GetName(), metav1.GetOptions{}) _, err := roleBindingClient.Get(binding.GetName(), metav1.GetOptions{})
return apierrors.IsNotFound(err), nil return apierrors.IsNotFound(err), nil
}) })
framework.ExpectNoError(err, "Timed out waiting for deletion: %v", err) framework.ExpectNoError(err, "Timed out waiting for RBAC binding %s deletion: %v", binding.GetName(), err)
if teardown { if teardown {
continue continue

View File

@ -226,6 +226,8 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
kubeletmetrics.VolumeStatsInodesFreeKey, kubeletmetrics.VolumeStatsInodesFreeKey,
kubeletmetrics.VolumeStatsInodesUsedKey, kubeletmetrics.VolumeStatsInodesUsedKey,
} }
key := volumeStatKeys[0]
kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key)
// Poll kubelet metrics waiting for the volume to be picked up // Poll kubelet metrics waiting for the volume to be picked up
// by the volume stats collector // by the volume stats collector
var kubeMetrics metrics.KubeletMetrics var kubeMetrics metrics.KubeletMetrics
@ -238,14 +240,12 @@ var _ = utils.SIGDescribe("[Serial] Volume metrics", func() {
framework.Logf("Error fetching kubelet metrics") framework.Logf("Error fetching kubelet metrics")
return false, err return false, err
} }
key := volumeStatKeys[0]
kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key)
if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) { if !findVolumeStatMetric(kubeletKeyName, pvc.Namespace, pvc.Name, kubeMetrics) {
return false, nil return false, nil
} }
return true, nil return true, nil
}) })
framework.ExpectNoError(waitErr, "Error finding volume metrics : %v", waitErr) framework.ExpectNoError(waitErr, "Unable to find metric %s for PVC %s/%s", kubeletKeyName, pvc.Namespace, pvc.Name)
for _, key := range volumeStatKeys { for _, key := range volumeStatKeys {
kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key) kubeletKeyName := fmt.Sprintf("%s_%s", kubeletmetrics.KubeletSubsystem, key)
@ -566,7 +566,7 @@ func waitForDetachAndGrabMetrics(oldMetrics *storageControllerMetrics, metricsGr
} }
waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc) waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
framework.ExpectNoError(waitErr, "Timeout error fetching storage c-m metrics : %v", waitErr) framework.ExpectNoError(waitErr, "Unable to get updated metrics for plugin %s", pluginName)
return updatedStorageMetrics return updatedStorageMetrics
} }
@ -697,8 +697,7 @@ func waitForPVControllerSync(metricsGrabber *metrics.Grabber, metricName, dimens
return len(getPVControllerMetrics(updatedMetrics, metricName, dimension)) > 0, nil return len(getPVControllerMetrics(updatedMetrics, metricName, dimension)) > 0, nil
} }
waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc) waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
framework.ExpectNoError(waitErr, framework.ExpectNoError(waitErr, "Unable to get pv controller metrics")
"Timeout error fetching pv controller metrics : %v", waitErr)
} }
func getPVControllerMetrics(ms metrics.ControllerManagerMetrics, metricName, dimension string) map[string]int64 { func getPVControllerMetrics(ms metrics.ControllerManagerMetrics, metricName, dimension string) map[string]int64 {
@ -787,5 +786,5 @@ func waitForADControllerStatesMetrics(metricsGrabber *metrics.Grabber, metricNam
return true, nil return true, nil
} }
waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc) waitErr := wait.ExponentialBackoff(backoff, verifyMetricFunc)
framework.ExpectNoError(waitErr, "Timeout error fetching A/D controller metrics : %v", waitErr) framework.ExpectNoError(waitErr, "Unable to get A/D controller metrics")
} }

View File

@ -513,18 +513,10 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {
ginkgo.By(fmt.Sprintf("Checking for residual PersistentVolumes associated with StorageClass %s", class.Name)) ginkgo.By(fmt.Sprintf("Checking for residual PersistentVolumes associated with StorageClass %s", class.Name))
residualPVs, err = waitForProvisionedVolumesDeleted(c, class.Name) residualPVs, err = waitForProvisionedVolumesDeleted(c, class.Name)
framework.ExpectNoError(err)
// Cleanup the test resources before breaking // Cleanup the test resources before breaking
defer deleteProvisionedVolumesAndDisks(c, residualPVs) defer deleteProvisionedVolumesAndDisks(c, residualPVs)
framework.ExpectNoError(err, "PersistentVolumes were not deleted as expected. %d remain", len(residualPVs))
// Report indicators of regression
if len(residualPVs) > 0 {
framework.Logf("Remaining PersistentVolumes:")
for i, pv := range residualPVs {
framework.Logf("\t%d) %s", i+1, pv.Name)
}
framework.Failf("Expected 0 PersistentVolumes remaining. Found %d", len(residualPVs))
}
framework.Logf("0 PersistentVolumes remain.") framework.Logf("0 PersistentVolumes remain.")
}) })
@ -821,7 +813,9 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {
// ClaimProvisionTimeout in the very same loop. // ClaimProvisionTimeout in the very same loop.
err = wait.Poll(time.Second, framework.ClaimProvisionTimeout, func() (bool, error) { err = wait.Poll(time.Second, framework.ClaimProvisionTimeout, func() (bool, error) {
events, err := c.CoreV1().Events(claim.Namespace).List(metav1.ListOptions{}) events, err := c.CoreV1().Events(claim.Namespace).List(metav1.ListOptions{})
framework.ExpectNoError(err) if err != nil {
return false, fmt.Errorf("could not list PVC events in %s: %v", claim.Namespace, err)
}
for _, event := range events.Items { for _, event := range events.Items {
if strings.Contains(event.Message, "failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key") { if strings.Contains(event.Message, "failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key") {
return true, nil return true, nil
@ -843,7 +837,7 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {
framework.Logf("The test missed event about failed provisioning, but checked that no volume was provisioned for %v", framework.ClaimProvisionTimeout) framework.Logf("The test missed event about failed provisioning, but checked that no volume was provisioned for %v", framework.ClaimProvisionTimeout)
err = nil err = nil
} }
framework.ExpectNoError(err) framework.ExpectNoError(err, "Error waiting for PVC to fail provisioning: %v", err)
}) })
}) })
}) })
@ -1033,7 +1027,10 @@ func waitForProvisionedVolumesDeleted(c clientset.Interface, scName string) ([]*
} }
return true, nil // No PVs remain return true, nil // No PVs remain
}) })
return remainingPVs, err if err != nil {
return remainingPVs, fmt.Errorf("Error waiting for PVs to be deleted: %v", err)
}
return nil, nil
} }
// deleteStorageClass deletes the passed in StorageClass and catches errors other than "Not Found" // deleteStorageClass deletes the passed in StorageClass and catches errors other than "Not Found"
@ -1046,6 +1043,10 @@ func deleteStorageClass(c clientset.Interface, className string) {
// deleteProvisionedVolumes [gce||gke only] iteratively deletes persistent volumes and attached GCE PDs. // deleteProvisionedVolumes [gce||gke only] iteratively deletes persistent volumes and attached GCE PDs.
func deleteProvisionedVolumesAndDisks(c clientset.Interface, pvs []*v1.PersistentVolume) { func deleteProvisionedVolumesAndDisks(c clientset.Interface, pvs []*v1.PersistentVolume) {
framework.Logf("Remaining PersistentVolumes:")
for i, pv := range pvs {
framework.Logf("\t%d) %s", i+1, pv.Name)
}
for _, pv := range pvs { for _, pv := range pvs {
framework.ExpectNoError(e2epv.DeletePDWithRetry(pv.Spec.PersistentVolumeSource.GCEPersistentDisk.PDName)) framework.ExpectNoError(e2epv.DeletePDWithRetry(pv.Spec.PersistentVolumeSource.GCEPersistentDisk.PDName))
framework.ExpectNoError(e2epv.DeletePersistentVolume(c, pv.Name)) framework.ExpectNoError(e2epv.DeletePersistentVolume(c, pv.Name))

View File

@ -69,12 +69,10 @@ const (
// Wait until vsphere volumes are detached from the list of nodes or time out after 5 minutes // Wait until vsphere volumes are detached from the list of nodes or time out after 5 minutes
func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error { func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error {
var ( var (
err error
disksAttached = true
detachTimeout = 5 * time.Minute detachTimeout = 5 * time.Minute
detachPollTime = 10 * time.Second detachPollTime = 10 * time.Second
) )
err = wait.Poll(detachPollTime, detachTimeout, func() (bool, error) { waitErr := wait.Poll(detachPollTime, detachTimeout, func() (bool, error) {
attachedResult, err := disksAreAttached(nodeVolumes) attachedResult, err := disksAreAttached(nodeVolumes)
if err != nil { if err != nil {
return false, err return false, err
@ -82,20 +80,19 @@ func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error {
for nodeName, nodeVolumes := range attachedResult { for nodeName, nodeVolumes := range attachedResult {
for volumePath, attached := range nodeVolumes { for volumePath, attached := range nodeVolumes {
if attached { if attached {
framework.Logf("Waiting for volumes %q to detach from %q.", volumePath, string(nodeName)) framework.Logf("Volume %q is still attached to %q.", volumePath, string(nodeName))
return false, nil return false, nil
} }
} }
} }
disksAttached = false
framework.Logf("Volume are successfully detached from all the nodes: %+v", nodeVolumes) framework.Logf("Volume are successfully detached from all the nodes: %+v", nodeVolumes)
return true, nil return true, nil
}) })
if err != nil { if waitErr != nil {
return err if waitErr == wait.ErrWaitTimeout {
} return fmt.Errorf("volumes have not detached after %v: %v", detachTimeout, waitErr)
if disksAttached { }
return fmt.Errorf("Gave up waiting for volumes to detach after %v", detachTimeout) return fmt.Errorf("error waiting for volumes to detach: %v", waitErr)
} }
return nil return nil
} }
@ -103,8 +100,6 @@ func waitForVSphereDisksToDetach(nodeVolumes map[string][]string) error {
// Wait until vsphere vmdk moves to expected state on the given node, or time out after 6 minutes // Wait until vsphere vmdk moves to expected state on the given node, or time out after 6 minutes
func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState volumeState) error { func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState volumeState) error {
var ( var (
err error
diskAttached bool
currentState volumeState currentState volumeState
timeout = 6 * time.Minute timeout = 6 * time.Minute
pollTime = 10 * time.Second pollTime = 10 * time.Second
@ -120,8 +115,8 @@ func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState
volumeStateDetached: "detached from", volumeStateDetached: "detached from",
} }
err = wait.Poll(pollTime, timeout, func() (bool, error) { waitErr := wait.Poll(pollTime, timeout, func() (bool, error) {
diskAttached, err = diskIsAttached(volumePath, nodeName) diskAttached, err := diskIsAttached(volumePath, nodeName)
if err != nil { if err != nil {
return true, err return true, err
} }
@ -134,14 +129,13 @@ func waitForVSphereDiskStatus(volumePath string, nodeName string, expectedState
framework.Logf("Waiting for Volume %q to be %s %q.", volumePath, attachedStateMsg[expectedState], nodeName) framework.Logf("Waiting for Volume %q to be %s %q.", volumePath, attachedStateMsg[expectedState], nodeName)
return false, nil return false, nil
}) })
if err != nil { if waitErr != nil {
return err if waitErr == wait.ErrWaitTimeout {
return fmt.Errorf("volume %q is not %s %q after %v: %v", volumePath, attachedStateMsg[expectedState], nodeName, timeout, waitErr)
}
return fmt.Errorf("error waiting for volume %q to be %s %q: %v", volumePath, attachedStateMsg[expectedState], nodeName, waitErr)
} }
return nil
if currentState != expectedState {
err = fmt.Errorf("Gave up waiting for Volume %q to be %s %q after %v", volumePath, attachedStateMsg[expectedState], nodeName, timeout)
}
return err
} }
// Wait until vsphere vmdk is attached from the given node or time out after 6 minutes // Wait until vsphere vmdk is attached from the given node or time out after 6 minutes

View File

@ -147,20 +147,18 @@ var _ = utils.SIGDescribe("Node Poweroff [Feature:vsphere] [Slow] [Disruptive]",
// Wait until the pod failed over to a different node, or time out after 3 minutes // Wait until the pod failed over to a different node, or time out after 3 minutes
func waitForPodToFailover(client clientset.Interface, deployment *appsv1.Deployment, oldNode string) (string, error) { func waitForPodToFailover(client clientset.Interface, deployment *appsv1.Deployment, oldNode string) (string, error) {
var ( var (
err error
newNode string
timeout = 3 * time.Minute timeout = 3 * time.Minute
pollTime = 10 * time.Second pollTime = 10 * time.Second
) )
err = wait.Poll(pollTime, timeout, func() (bool, error) { waitErr := wait.Poll(pollTime, timeout, func() (bool, error) {
newNode, err = getNodeForDeployment(client, deployment) currentNode, err := getNodeForDeployment(client, deployment)
if err != nil { if err != nil {
return true, err return true, err
} }
if newNode != oldNode { if currentNode != oldNode {
framework.Logf("The pod has been failed over from %q to %q", oldNode, newNode) framework.Logf("The pod has been failed over from %q to %q", oldNode, currentNode)
return true, nil return true, nil
} }
@ -168,12 +166,11 @@ func waitForPodToFailover(client clientset.Interface, deployment *appsv1.Deploym
return false, nil return false, nil
}) })
if err != nil { if waitErr != nil {
if err == wait.ErrWaitTimeout { if waitErr == wait.ErrWaitTimeout {
framework.Logf("Time out after waiting for %v", timeout) return "", fmt.Errorf("pod has not failed over after %v: %v", timeout, waitErr)
} }
framework.Logf("Pod did not fail over from %q with error: %v", oldNode, err) return "", fmt.Errorf("pod did not fail over from %q: %v", oldNode, waitErr)
return "", err
} }
return getNodeForDeployment(client, deployment) return getNodeForDeployment(client, deployment)