e2e framework: implement pod polling with gomega.Eventually

gomega.Eventually provides better progress reports: instead of filling up the
log with rather useless one-line messages that are not enough to to understand
the current state, it integrates with Gingko's progress reporting (SIGUSR1,
--poll-progress-after) and then dumps the same complete failure message as
after a timeout. That makes it possible to understand why progress isn't
getting made without having to wait for the timeout.

The other advantage is that the failure message for some unexpected pod state
becomes more readable: instead of encapsulating it as "observed object" inside
an error, it directly gets rendered by gomega.
This commit is contained in:
Patrick Ohly 2022-10-24 08:24:55 +02:00
parent 71dc81ec89
commit 005a9da0cc
3 changed files with 230 additions and 167 deletions

View File

@ -122,6 +122,9 @@ var timePrefix = regexp.MustCompile(`(?m)^[[:alpha:]]{3} +[[:digit:]]{1,2} +[[:d
// elapsedSuffix matches "Elapsed: 16.189µs"
var elapsedSuffix = regexp.MustCompile(`Elapsed: [[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m)`)
// afterSuffix matches "after 5.001s."
var afterSuffix = regexp.MustCompile(`after [[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m).`)
// timeSuffix matches "@ 09/06/22 15:36:43.44 (5.001s)" as printed by Ginkgo v2 for log output, with the duration being optional.
var timeSuffix = regexp.MustCompile(`(?m)@[[:space:]][[:digit:]]{2}/[[:digit:]]{2}/[[:digit:]]{2} [[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2}(\.[[:digit:]]{1,3})?( \([[:digit:]]+(\.[[:digit:]]+)?(µs|ns|ms|s|m)\))?$`)
@ -129,6 +132,7 @@ func stripTimes(in string) string {
out := timePrefix.ReplaceAllString(in, "")
out = elapsedSuffix.ReplaceAllString(out, "Elapsed: <elapsed>")
out = timeSuffix.ReplaceAllString(out, "<time>")
out = afterSuffix.ReplaceAllString(out, "after <after>.")
return out
}

View File

@ -550,18 +550,12 @@ func WaitForPodRunningInNamespaceSlow(ctx context.Context, c clientset.Interface
}
// WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running.
// It does not need to exist yet when this function gets called and the pod is not expected to be recreated
// when it succeeds or fails.
func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
return WaitForPodCondition(ctx, c, namespace, podName, "running", timeout, func(pod *v1.Pod) (bool, error) {
switch pod.Status.Phase {
case v1.PodRunning:
return true, nil
case v1.PodFailed:
return false, errPodFailed
case v1.PodSucceeded:
return false, errPodCompleted
}
return false, nil
})
return framework.Gomega().Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(namespace).Get, podName, metav1.GetOptions{}))).
WithTimeout(timeout).
Should(BeRunningNoRetries())
}
// WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running.

View File

@ -18,12 +18,13 @@ package pod_test
import (
"context"
"strings"
"fmt"
"testing"
"time"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/reporters"
"github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -31,6 +32,7 @@ import (
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/framework/internal/output"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
_ "k8s.io/kubernetes/test/utils/format" // activate YAML object dumps
)
// The line number of the following code is checked in TestFailureOutput below.
@ -45,17 +47,28 @@ import (
//
//
//
//
//
// This must be line #50.
var _ = ginkgo.Describe("pod", func() {
ginkgo.It("not found", func(ctx context.Context) {
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, "no-such-pod", "default", timeout /* no explanation here to cover that code path */))
ginkgo.It("not found, must exist", func(ctx context.Context) {
gomega.Eventually(ctx, framework.HandleRetry(getNoSuchPod)).WithTimeout(timeout).Should(e2epod.BeInPhase(v1.PodRunning))
})
ginkgo.It("not found, retry", func(ctx context.Context) {
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, "no-such-pod", "default", timeout))
})
ginkgo.It("not found, retry with wrappers", func(ctx context.Context) {
gomega.Eventually(ctx, framework.RetryNotFound(framework.HandleRetry(getNoSuchPod))).WithTimeout(timeout).Should(e2epod.BeInPhase(v1.PodRunning))
})
ginkgo.It("not found, retry with inverted wrappers", func(ctx context.Context) {
gomega.Eventually(ctx, framework.HandleRetry(framework.RetryNotFound(getNoSuchPod))).WithTimeout(timeout).Should(e2epod.BeInPhase(v1.PodRunning))
})
ginkgo.It("not running", func(ctx context.Context) {
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, podName, podNamespace, timeout), "wait for pod %s running", podName /* tests printf formatting */)
ginkgo.By(fmt.Sprintf("waiting for pod %s to run", podName))
framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, clientSet, podName, podNamespace, timeout))
})
ginkgo.It("failed", func(ctx context.Context) {
@ -63,16 +76,20 @@ var _ = ginkgo.Describe("pod", func() {
})
})
func getNoSuchPod(ctx context.Context) (*v1.Pod, error) {
return clientSet.CoreV1().Pods("default").Get(ctx, "no-such-pod", metav1.GetOptions{})
}
const (
podName = "pending-pod"
podNamespace = "default"
failedPodName = "failed-pod"
timeout = 5 * time.Second
timeout = time.Second
)
var (
clientSet = fake.NewSimpleClientset(
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: podName, Namespace: podNamespace}},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: podName, Namespace: podNamespace}, Status: v1.PodStatus{Phase: v1.PodPending}},
&v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: failedPodName, Namespace: podNamespace}, Status: v1.PodStatus{Phase: v1.PodFailed}},
)
)
@ -80,35 +97,40 @@ var (
func TestFailureOutput(t *testing.T) {
expected := output.TestResult{
// "INFO: Ignoring ..." or "INFO: Pod ..." will normally occur
// every two seconds, but we reduce it to one line because it
// might occur less often on a loaded system.
NormalizeOutput: func(output string) string {
return trimDuplicateLines(output, "INFO: ")
},
Suite: reporters.JUnitTestSuite{
Tests: 3,
Failures: 3,
Tests: 6,
Failures: 6,
Errors: 0,
Disabled: 0,
Skipped: 0,
TestCases: []reporters.JUnitTestCase{
{
Name: "[It] pod not found",
Name: "[It] pod not found, must exist",
Status: "failed",
Failure: &reporters.JUnitFailure{
Type: "failed",
Description: `[FAILED] error while waiting for pod default/no-such-pod to be running: pods "no-such-pod" not found
Description: `[FAILED] Told to stop trying after <after>.
Unexpected final error while getting *v1.Pod: pods "no-such-pod" not found
In [It] at: wait_test.go:54 <time>
`,
},
SystemErr: `> Enter [It] not found - wait_test.go:53 <time>
INFO: Waiting up to 5s for pod "no-such-pod" in namespace "default" to be "running"
INFO: Ignoring NotFound error while getting pod default/no-such-pod
INFO: Unexpected error:
<*fmt.wrapError>: {
msg: "error while waiting for pod default/no-such-pod to be running: pods \"no-such-pod\" not found",
err: <*errors.StatusError>{
SystemErr: `> Enter [It] not found, must exist - wait_test.go:53 <time>
[FAILED] Told to stop trying after <after>.
Unexpected final error while getting *v1.Pod: pods "no-such-pod" not found
In [It] at: wait_test.go:54 <time>
< Exit [It] not found, must exist - wait_test.go:53 <time>
`,
},
{
Name: "[It] pod not found, retry",
Status: "failed",
Failure: &reporters.JUnitFailure{
Type: "failed",
Description: `[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
@ -125,134 +147,198 @@ INFO: Unexpected error:
},
},
}
[FAILED] error while waiting for pod default/no-such-pod to be running: pods "no-such-pod" not found
In [It] at: wait_test.go:54 <time>
< Exit [It] not found - wait_test.go:53 <time>
In [It] at: wait_test.go:58 <time>
`,
},
SystemErr: `> Enter [It] not found, retry - wait_test.go:57 <time>
[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:58 <time>
< Exit [It] not found, retry - wait_test.go:57 <time>
`,
},
{
Name: "[It] pod not found, retry with wrappers",
Status: "failed",
Failure: &reporters.JUnitFailure{
Type: "failed",
Description: `[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:62 <time>
`,
},
SystemErr: `> Enter [It] not found, retry with wrappers - wait_test.go:61 <time>
[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:62 <time>
< Exit [It] not found, retry with wrappers - wait_test.go:61 <time>
`,
},
{
Name: "[It] pod not found, retry with inverted wrappers",
Status: "failed",
Failure: &reporters.JUnitFailure{
Type: "failed",
Description: `[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:66 <time>
`,
},
SystemErr: `> Enter [It] not found, retry with inverted wrappers - wait_test.go:65 <time>
[FAILED] Timed out after <after>.
The function passed to Eventually returned the following error:
pods "no-such-pod" not found
<framework.transientError>: {
error: <*errors.StatusError>{
ErrStatus: {
TypeMeta: {Kind: "", APIVersion: ""},
ListMeta: {
SelfLink: "",
ResourceVersion: "",
Continue: "",
RemainingItemCount: nil,
},
Status: "Failure",
Message: "pods \"no-such-pod\" not found",
Reason: "NotFound",
Details: {Name: "no-such-pod", Group: "", Kind: "pods", UID: "", Causes: nil, RetryAfterSeconds: 0},
Code: 404,
},
},
}
In [It] at: wait_test.go:66 <time>
< Exit [It] not found, retry with inverted wrappers - wait_test.go:65 <time>
`,
},
{
Name: "[It] pod not running",
Status: "failed",
Failure: &reporters.JUnitFailure{
Description: `[FAILED] wait for pod pending-pod running: timed out while waiting for pod default/pending-pod to be running
In [It] at: wait_test.go:58 <time>
Description: `[FAILED] Timed out after <after>.
Expected Pod to be in <v1.PodPhase>: "Running"
Got instead:
<*v1.Pod>:
metadata:
creationTimestamp: null
name: pending-pod
namespace: default
spec:
containers: null
status:
phase: Pending
In [It] at: wait_test.go:71 <time>
`,
Type: "failed",
},
SystemErr: `> Enter [It] not running - wait_test.go:57 <time>
INFO: Waiting up to 5s for pod "pending-pod" in namespace "default" to be "running"
INFO: Pod "pending-pod": Phase="", Reason="", readiness=false. Elapsed: <elapsed>
INFO: Unexpected error: wait for pod pending-pod running:
<*pod.timeoutError>: {
msg: "timed out while waiting for pod default/pending-pod to be running",
observedObjects: [
<*v1.Pod>{
TypeMeta: {Kind: "", APIVersion: ""},
ObjectMeta: {
Name: "pending-pod",
GenerateName: "",
Namespace: "default",
SelfLink: "",
UID: "",
ResourceVersion: "",
Generation: 0,
CreationTimestamp: {
Time: {wall: 0, ext: 0, loc: nil},
},
DeletionTimestamp: nil,
DeletionGracePeriodSeconds: nil,
Labels: nil,
Annotations: nil,
OwnerReferences: nil,
Finalizers: nil,
ManagedFields: nil,
},
Spec: {
Volumes: nil,
InitContainers: nil,
Containers: nil,
EphemeralContainers: nil,
RestartPolicy: "",
TerminationGracePeriodSeconds: nil,
ActiveDeadlineSeconds: nil,
DNSPolicy: "",
NodeSelector: nil,
ServiceAccountName: "",
DeprecatedServiceAccount: "",
AutomountServiceAccountToken: nil,
NodeName: "",
HostNetwork: false,
HostPID: false,
HostIPC: false,
ShareProcessNamespace: nil,
SecurityContext: nil,
ImagePullSecrets: nil,
Hostname: "",
Subdomain: "",
Affinity: nil,
SchedulerName: "",
Tolerations: nil,
HostAliases: nil,
PriorityClassName: "",
Priority: nil,
DNSConfig: nil,
ReadinessGates: nil,
RuntimeClassName: nil,
EnableServiceLinks: nil,
PreemptionPolicy: nil,
Overhead: nil,
TopologySpreadConstraints: nil,
SetHostnameAsFQDN: nil,
OS: nil,
HostUsers: nil,
SchedulingGates: nil,
ResourceClaims: nil,
},
Status: {
Phase: "",
Conditions: nil,
Message: "",
Reason: "",
NominatedNodeName: "",
HostIP: "",
PodIP: "",
PodIPs: nil,
StartTime: nil,
InitContainerStatuses: nil,
ContainerStatuses: nil,
QOSClass: "",
EphemeralContainerStatuses: nil,
},
},
],
}
[FAILED] wait for pod pending-pod running: timed out while waiting for pod default/pending-pod to be running
In [It] at: wait_test.go:58 <time>
< Exit [It] not running - wait_test.go:57 <time>
SystemErr: `> Enter [It] not running - wait_test.go:69 <time>
STEP: waiting for pod pending-pod to run - wait_test.go:70 <time>
[FAILED] Timed out after <after>.
Expected Pod to be in <v1.PodPhase>: "Running"
Got instead:
<*v1.Pod>:
metadata:
creationTimestamp: null
name: pending-pod
namespace: default
spec:
containers: null
status:
phase: Pending
In [It] at: wait_test.go:71 <time>
< Exit [It] not running - wait_test.go:69 <time>
`,
},
{
Name: "[It] pod failed",
Status: "failed",
Failure: &reporters.JUnitFailure{
Description: `[FAILED] error while waiting for pod default/failed-pod to be running: final error: pod failed permanently
In [It] at: wait_test.go:62 <time>
Description: `[FAILED] Told to stop trying after <after>.
Expected pod to reach phase "Running", got final phase "Failed" instead.
In [It] at: wait_test.go:75 <time>
`,
Type: "failed",
},
SystemErr: `> Enter [It] failed - wait_test.go:61 <time>
INFO: Waiting up to 5s for pod "failed-pod" in namespace "default" to be "running"
<*fmt.wrapError>: {
msg: "error while waiting for pod default/failed-pod to be running: final error: pod failed permanently",
err: <*pod.FinalErr>{
Err: <*errors.errorString>{
s: "pod failed permanently",
},
},
}
[FAILED] error while waiting for pod default/failed-pod to be running: final error: pod failed permanently
In [It] at: wait_test.go:62 <time>
< Exit [It] failed - wait_test.go:61 <time>
SystemErr: `> Enter [It] failed - wait_test.go:74 <time>
[FAILED] Told to stop trying after <after>.
Expected pod to reach phase "Running", got final phase "Failed" instead.
In [It] at: wait_test.go:75 <time>
< Exit [It] failed - wait_test.go:74 <time>
`,
},
},
@ -260,24 +346,3 @@ In [It] at: wait_test.go:62 <time>
}
output.TestGinkgoOutput(t, expected)
}
func trimDuplicateLines(output, prefix string) string {
lines := strings.Split(output, "\n")
trimming := false
validLines := 0
for i := 0; i < len(lines); i++ {
if strings.HasPrefix(lines[i], prefix) {
// Keep the first line, and only that one.
if !trimming {
trimming = true
lines[validLines] = lines[i]
validLines++
}
} else {
trimming = false
lines[validLines] = lines[i]
validLines++
}
}
return strings.Join(lines[0:validLines], "\n")
}