From 004c1b644164601c99f51ce5a80b9fe9f5fd3534 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 16 Sep 2022 09:00:08 +0200 Subject: [PATCH] e2e framework: abort polling for running pod early When the pod state is failed or completed, the pod will never again enter the running state and polling can stop. --- test/e2e/framework/pod/resource.go | 5 ++-- test/e2e/framework/pod/wait.go | 39 +++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/test/e2e/framework/pod/resource.go b/test/e2e/framework/pod/resource.go index 0adc985f3ca..e99b4e12da7 100644 --- a/test/e2e/framework/pod/resource.go +++ b/test/e2e/framework/pod/resource.go @@ -18,6 +18,7 @@ package pod import ( "context" + "errors" "fmt" "os" "path/filepath" @@ -41,11 +42,11 @@ import ( // errPodCompleted is returned by PodRunning or PodContainerRunning to indicate that // the pod has already reached completed state. -var errPodCompleted = fmt.Errorf("pod ran to completion successfully") +var errPodCompleted = FinalError(errors.New("pod ran to completion successfully")) // errPodFailed is returned by PodRunning or PodContainerRunning to indicate that // the pod has already reached a permanent failue state. -var errPodFailed = fmt.Errorf("pod failed permanently") +var errPodFailed = FinalError(errors.New("pod failed permanently")) // LabelLogOnPodFailure can be used to mark which Pods will have their logs logged in the case of // a test failure. By default, if there are no Pods with this label, only the first 5 Pods will diff --git a/test/e2e/framework/pod/wait.go b/test/e2e/framework/pod/wait.go index 5f97f7c0e61..8ffa969e0a2 100644 --- a/test/e2e/framework/pod/wait.go +++ b/test/e2e/framework/pod/wait.go @@ -81,6 +81,39 @@ func TimeoutError(msg string, observedObjects ...interface{}) *timeoutError { } } +// FinalError constructs an error that indicates to a poll function that +// polling can be stopped immediately because some permanent error has been +// encountered that is not going to go away. +// +// TODO (@pohly): move this into framework once the refactoring from +// https://github.com/kubernetes/kubernetes/pull/112043 allows it. Right now it +// leads to circular dependencies. +func FinalError(err error) error { + return &FinalErr{Err: err} +} + +type FinalErr struct { + Err error +} + +func (err *FinalErr) Error() string { + if err.Err != nil { + return fmt.Sprintf("final error: %s", err.Err.Error()) + } + return "final error, exact problem unknown" +} + +func (err *FinalErr) Unwrap() error { + return err.Err +} + +// IsFinal checks whether the error was marked as final by wrapping some error +// with FinalError. +func IsFinal(err error) bool { + var finalErr *FinalErr + return errors.As(err, &finalErr) +} + // maybeTimeoutError returns a TimeoutError if err is a timeout. Otherwise, wrap err. // taskFormat and taskArgs should be the task being performed when the error occurred, // e.g. "waiting for pod to be running". @@ -244,6 +277,8 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN } // WaitForPodCondition waits a pods to be matched to the given condition. +// If the condition callback returns an error that matches FinalErr (checked with IsFinal), +// then polling aborts early. func WaitForPodCondition(c clientset.Interface, ns, podName, conditionDesc string, timeout time.Duration, condition podCondition) error { e2elog.Logf("Waiting up to %v for pod %q in namespace %q to be %q", timeout, podName, ns, conditionDesc) var ( @@ -268,8 +303,10 @@ func WaitForPodCondition(c clientset.Interface, ns, podName, conditionDesc strin } return true, err } else if err != nil { - // TODO(#109732): stop polling and return the error in this case. e2elog.Logf("Error evaluating pod condition %s: %v", conditionDesc, err) + if IsFinal(err) { + return false, err + } } return false, nil })