e2e framework: implement pod polling with gomega.Eventually

gomega.Eventually provides better progress reports: instead of filling up the
log with rather useless one-line messages that are not enough to to understand
the current state, it integrates with Gingko's progress reporting (SIGUSR1,
--poll-progress-after) and then dumps the same complete failure message as
after a timeout. That makes it possible to understand why progress isn't
getting made without having to wait for the timeout.

The other advantage is that the failure message for some unexpected pod state
becomes more readable: instead of encapsulating it as "observed object" inside
an error, it directly gets rendered by gomega.
This commit is contained in:
Patrick Ohly 2022-10-24 08:24:55 +02:00
parent 71dc81ec89
commit 005a9da0cc
3 changed files with 230 additions and 167 deletions

View File

@ -122,6 +122,9 @@ var timePrefix = regexp.MustCompile(`(?m)^[[:alpha:]]{3} +[[:digit:]]{1,2} +[[:d
// elapsedSuffix matches "Elapsed: 16.189µs" // elapsedSuffix matches "Elapsed: 16.189µs"
var elapsedSuffix = regexp.MustCompile(`Elapsed: [[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m)`) var elapsedSuffix = regexp.MustCompile(`Elapsed: [[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m)`)
// afterSuffix matches "after 5.001s."
var afterSuffix = regexp.MustCompile(`after [[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m).`)
// timeSuffix matches "@ 09/06/22 15:36:43.44 (5.001s)" as printed by Ginkgo v2 for log output, with the duration being optional. // timeSuffix matches "@ 09/06/22 15:36:43.44 (5.001s)" as printed by Ginkgo v2 for log output, with the duration being optional.
var timeSuffix = regexp.MustCompile(`(?m)@[[:space:]][[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{2} [[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2}(\.[[:digit:]]{1,3})?( \([[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m)\))?$`) var timeSuffix = regexp.MustCompile(`(?m)@[[:space:]][[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{2} [[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2}(\.[[:digit:]]{1,3})?( \([[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m)\))?$`)
@ -129,6 +132,7 @@ func stripTimes(in string) string {
out := timePrefix.ReplaceAllString(in, "") out := timePrefix.ReplaceAllString(in, "")
out = elapsedSuffix.ReplaceAllString(out, "Elapsed: <elapsed>") out = elapsedSuffix.ReplaceAllString(out, "Elapsed: <elapsed>")
out = timeSuffix.ReplaceAllString(out, "<time>") out = timeSuffix.ReplaceAllString(out, "<time>")
out = afterSuffix.ReplaceAllString(out, "after <after>.")
return out return out
} }

View File

@ -550,18 +550,12 @@ func WaitForPodRunningInNamespaceSlow(ctx context.Context, c clientset.Interface
} }
// WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running. // WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running.
// It does not need to exist yet when this function gets called and the pod is not expected to be recreated
// when it succeeds or fails.
func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
return WaitForPodCondition(ctx, c, namespace, podName, "running", timeout, func(pod *v1.Pod) (bool, error) { return framework.Gomega().Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(namespace).Get, podName, metav1.GetOptions{}))).
switch pod.Status.Phase { WithTimeout(timeout).
case v1.PodRunning: Should(BeRunningNoRetries())
return true, nil
case v1.PodFailed:
return false, errPodFailed
case v1.PodSucceeded:
return false, errPodCompleted
}
return false, nil
})
} }
// WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running. // WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running.

View File

@ -18,12 +18,13 @@ package pod_test
import ( import (
"context" "context"
"strings" "fmt"
"testing" "testing"
"time" "time"
"github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/reporters" "github.com/onsi/ginkgo/v2/reporters"
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -31,6 +32,7 @@ import (
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/internal/output" "k8s.io/kubernetes/test/e2e/framework/internal/output"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
_ "k8s.io/kubernetes/test/utils/format" // activate YAML object dumps
) )
// The line number of the following code is checked in TestFailureOutput below. // The line number of the following code is checked in TestFailureOutput below.
@ -45,17 +47,28 @@ import (
// //
// //
// //
//
//
// This must be line #50. // This must be line #50.
var _ = ginkgo.Describe("pod", func() { var _ = ginkgo.Describe("pod", func() {
ginkgo.It("not found", func(ctx context.Context) { ginkgo.It("not found, must exist", func(ctx context.Context) {
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, "no-such-pod", "default", timeout /* no explanation here to cover that code path */)) gomega.Eventually(ctx, framework.HandleRetry(getNoSuchPod)).WithTimeout(timeout).Should(e2epod.BeInPhase(v1.PodRunning))
})
ginkgo.It("not found, retry", func(ctx context.Context) {
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, "no-such-pod", "default", timeout))
})
ginkgo.It("not found, retry with wrappers", func(ctx context.Context) {
gomega.Eventually(ctx, framework.RetryNotFound(framework.HandleRetry(getNoSuchPod))).WithTimeout(timeout).Should(e2epod.BeInPhase(v1.PodRunning))
})
ginkgo.It("not found, retry with inverted wrappers", func(ctx context.Context) {
gomega.Eventually(ctx, framework.HandleRetry(framework.RetryNotFound(getNoSuchPod))).WithTimeout(timeout).Should(e2epod.BeInPhase(v1.PodRunning))
}) })
ginkgo.It("not running", func(ctx context.Context) { ginkgo.It("not running", func(ctx context.Context) {
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, podName, podNamespace, timeout), "wait for pod %s running", podName /* tests printf formatting */) ginkgo.By(fmt.Sprintf("waiting for pod %s to run", podName))
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, podName, podNamespace, timeout))
}) })
ginkgo.It("failed", func(ctx context.Context) { ginkgo.It("failed", func(ctx context.Context) {
@ -63,16 +76,20 @@ var _ = ginkgo.Describe("pod", func() {
}) })
}) })
func getNoSuchPod(ctx context.Context) (*v1.Pod, error) {
return clientSet.CoreV1().Pods("default").Get(ctx, "no-such-pod", metav1.GetOptions{})
}
const ( const (
podName = "pending-pod" podName = "pending-pod"
podNamespace = "default" podNamespace = "default"
failedPodName = "failed-pod" failedPodName = "failed-pod"
timeout = 5 * time.Second timeout = time.Second
) )
var ( var (
clientSet = fake.NewSimpleClientset( clientSet = fake.NewSimpleClientset(
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: podName, Namespace: podNamespace}}, &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: podName, Namespace: podNamespace}, Status: v1.PodStatus{Phase: v1.PodPending}},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: failedPodName, Namespace: podNamespace}, Status: v1.PodStatus{Phase: v1.PodFailed}}, &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: failedPodName, Namespace: podNamespace}, Status: v1.PodStatus{Phase: v1.PodFailed}},
) )
) )
@ -80,35 +97,40 @@ var (
func TestFailureOutput(t *testing.T) { func TestFailureOutput(t *testing.T) {
expected := output.TestResult{ expected := output.TestResult{
// "INFO: Ignoring ..." or "INFO: Pod ..." will normally occur
// every two seconds, but we reduce it to one line because it
// might occur less often on a loaded system.
NormalizeOutput: func(output string) string {
return trimDuplicateLines(output, "INFO: ")
},
Suite: reporters.JUnitTestSuite{ Suite: reporters.JUnitTestSuite{
Tests: 3, Tests: 6,
Failures: 3, Failures: 6,
Errors: 0, Errors: 0,
Disabled: 0, Disabled: 0,
Skipped: 0, Skipped: 0,
TestCases: []reporters.JUnitTestCase{ TestCases: []reporters.JUnitTestCase{
{ {
Name: "[It] pod not found", Name: "[It] pod not found, must exist",
Status: "failed", Status: "failed",
Failure: &reporters.JUnitFailure{ Failure: &reporters.JUnitFailure{
Type: "failed", Type: "failed",
Description: `[FAILED] error while waiting for pod default/no-such-pod to be running: pods "no-such-pod" not found Description: `[FAILED] Told to stop trying after <after>.
Unexpected final error while getting *v1.Pod: pods "no-such-pod" not found
In [It] at: wait_test.go:54 <time> In [It] at: wait_test.go:54 <time>
`, `,
}, },
SystemErr: `> Enter [It] not found - wait_test.go:53 <time> SystemErr: `> Enter [It] not found, must exist - wait_test.go:53 <time>
INFO: Waiting up to 5s for pod "no-such-pod" in namespace "default" to be "running" [FAILED] Told to stop trying after <after>.
INFO: Ignoring NotFound error while getting pod default/no-such-pod Unexpected final error while getting *v1.Pod: pods "no-such-pod" not found
INFO: Unexpected error: In [It] at: wait_test.go:54 <time>
<*fmt.wrapError>: { < Exit [It] not found, must exist - wait_test.go:53 <time>
msg: "error while waiting for pod default/no-such-pod to be running: pods \"no-such-pod\" not found", `,
err: <*errors.StatusError>{ },
{
Name: "[It] pod not found, retry",
Status: "failed",
Failure: &reporters.JUnitFailure{
Type: "failed",
Description: `[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: { ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""}, TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: { ListMeta: {
@ -125,134 +147,198 @@ INFO: Unexpected error:
}, },
}, },
} }
[FAILED] error while waiting for pod default/no-such-pod to be running: pods "no-such-pod" not found In [It] at: wait_test.go:58 <time>
In [It] at: wait_test.go:54 <time> `,
< Exit [It] not found - wait_test.go:53 <time> },
SystemErr: `> Enter [It] not found, retry - wait_test.go:57 <time>
[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:58 <time>
< Exit [It] not found, retry - wait_test.go:57 <time>
`,
},
{
Name: "[It] pod not found, retry with wrappers",
Status: "failed",
Failure: &reporters.JUnitFailure{
Type: "failed",
Description: `[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:62 <time>
`,
},
SystemErr: `> Enter [It] not found, retry with wrappers - wait_test.go:61 <time>
[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:62 <time>
< Exit [It] not found, retry with wrappers - wait_test.go:61 <time>
`,
},
{
Name: "[It] pod not found, retry with inverted wrappers",
Status: "failed",
Failure: &reporters.JUnitFailure{
Type: "failed",
Description: `[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:66 <time>
`,
},
SystemErr: `> Enter [It] not found, retry with inverted wrappers - wait_test.go:65 <time>
[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:66 <time>
< Exit [It] not found, retry with inverted wrappers - wait_test.go:65 <time>
`, `,
}, },
{ {
Name: "[It] pod not running", Name: "[It] pod not running",
Status: "failed", Status: "failed",
Failure: &reporters.JUnitFailure{ Failure: &reporters.JUnitFailure{
Description: `[FAILED] wait for pod pending-pod running: timed out while waiting for pod default/pending-pod to be running Description: `[FAILED] Timed out after <after>.
In [It] at: wait_test.go:58 <time> Expected Pod to be in <v1.PodPhase>: "Running"
Got instead:
<*v1.Pod>:
metadata:
creationTimestamp: null
name: pending-pod
namespace: default
spec:
containers: null
status:
phase: Pending
In [It] at: wait_test.go:71 <time>
`, `,
Type: "failed", Type: "failed",
}, },
SystemErr: `> Enter [It] not running - wait_test.go:57 <time> SystemErr: `> Enter [It] not running - wait_test.go:69 <time>
INFO: Waiting up to 5s for pod "pending-pod" in namespace "default" to be "running" STEP: waiting for pod pending-pod to run - wait_test.go:70 <time>
INFO: Pod "pending-pod": Phase="", Reason="", readiness=false. Elapsed: <elapsed> [FAILED] Timed out after <after>.
INFO: Unexpected error: wait for pod pending-pod running: Expected Pod to be in <v1.PodPhase>: "Running"
<*pod.timeoutError>: { Got instead:
msg: "timed out while waiting for pod default/pending-pod to be running", <*v1.Pod>:
observedObjects: [ metadata:
<*v1.Pod>{ creationTimestamp: null
TypeMeta: {Kind: "", APIVersion: ""}, name: pending-pod
ObjectMeta: { namespace: default
Name: "pending-pod", spec:
GenerateName: "", containers: null
Namespace: "default", status:
SelfLink: "", phase: Pending
UID: "", In [It] at: wait_test.go:71 <time>
ResourceVersion: "", < Exit [It] not running - wait_test.go:69 <time>
Generation: 0,
CreationTimestamp: {
Time: {wall: 0, ext: 0, loc: nil},
},
DeletionTimestamp: nil,
DeletionGracePeriodSeconds: nil,
Labels: nil,
Annotations: nil,
OwnerReferences: nil,
Finalizers: nil,
ManagedFields: nil,
},
Spec: {
Volumes: nil,
InitContainers: nil,
Containers: nil,
EphemeralContainers: nil,
RestartPolicy: "",
TerminationGracePeriodSeconds: nil,
ActiveDeadlineSeconds: nil,
DNSPolicy: "",
NodeSelector: nil,
ServiceAccountName: "",
DeprecatedServiceAccount: "",
AutomountServiceAccountToken: nil,
NodeName: "",
HostNetwork: false,
HostPID: false,
HostIPC: false,
ShareProcessNamespace: nil,
SecurityContext: nil,
ImagePullSecrets: nil,
Hostname: "",
Subdomain: "",
Affinity: nil,
SchedulerName: "",
Tolerations: nil,
HostAliases: nil,
PriorityClassName: "",
Priority: nil,
DNSConfig: nil,
ReadinessGates: nil,
RuntimeClassName: nil,
EnableServiceLinks: nil,
PreemptionPolicy: nil,
Overhead: nil,
TopologySpreadConstraints: nil,
SetHostnameAsFQDN: nil,
OS: nil,
HostUsers: nil,
SchedulingGates: nil,
ResourceClaims: nil,
},
Status: {
Phase: "",
Conditions: nil,
Message: "",
Reason: "",
NominatedNodeName: "",
HostIP: "",
PodIP: "",
PodIPs: nil,
StartTime: nil,
InitContainerStatuses: nil,
ContainerStatuses: nil,
QOSClass: "",
EphemeralContainerStatuses: nil,
},
},
],
}
[FAILED] wait for pod pending-pod running: timed out while waiting for pod default/pending-pod to be running
In [It] at: wait_test.go:58 <time>
< Exit [It] not running - wait_test.go:57 <time>
`, `,
}, },
{ {
Name: "[It] pod failed", Name: "[It] pod failed",
Status: "failed", Status: "failed",
Failure: &reporters.JUnitFailure{ Failure: &reporters.JUnitFailure{
Description: `[FAILED] error while waiting for pod default/failed-pod to be running: final error: pod failed permanently Description: `[FAILED] Told to stop trying after <after>.
In [It] at: wait_test.go:62 <time> Expected pod to reach phase "Running", got final phase "Failed" instead.
In [It] at: wait_test.go:75 <time>
`, `,
Type: "failed", Type: "failed",
}, },
SystemErr: `> Enter [It] failed - wait_test.go:61 <time> SystemErr: `> Enter [It] failed - wait_test.go:74 <time>
INFO: Waiting up to 5s for pod "failed-pod" in namespace "default" to be "running" [FAILED] Told to stop trying after <after>.
<*fmt.wrapError>: { Expected pod to reach phase "Running", got final phase "Failed" instead.
msg: "error while waiting for pod default/failed-pod to be running: final error: pod failed permanently", In [It] at: wait_test.go:75 <time>
err: <*pod.FinalErr>{ < Exit [It] failed - wait_test.go:74 <time>
Err: <*errors.errorString>{
s: "pod failed permanently",
},
},
}
[FAILED] error while waiting for pod default/failed-pod to be running: final error: pod failed permanently
In [It] at: wait_test.go:62 <time>
< Exit [It] failed - wait_test.go:61 <time>
`, `,
}, },
}, },
@ -260,24 +346,3 @@ In [It] at: wait_test.go:62 <time>
} }
output.TestGinkgoOutput(t, expected) output.TestGinkgoOutput(t, expected)
} }
func trimDuplicateLines(output, prefix string) string {
lines := strings.Split(output, "\n")
trimming := false
validLines := 0
for i := 0; i < len(lines); i++ {
if strings.HasPrefix(lines[i], prefix) {
// Keep the first line, and only that one.
if !trimming {
trimming = true
lines[validLines] = lines[i]
validLines++
}
} else {
trimming = false
lines[validLines] = lines[i]
validLines++
}
}
return strings.Join(lines[0:validLines], "\n")
}