diff --git a/test/e2e_node/criproxy_test.go b/test/e2e_node/criproxy_test.go index 7de25fa3553..7438c0ca150 100644 --- a/test/e2e_node/criproxy_test.go +++ b/test/e2e_node/criproxy_test.go @@ -84,22 +84,22 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() { }) }) - ginkgo.Context("Inject a pull image error exception, then reset it, into the CriProxy", func() { + ginkgo.Context("Image pull backoff", func() { ginkgo.BeforeEach(func() { - if err := resetCRIProxyInjector(); err != nil { + if err := resetCRIProxyInjector(e2eCriProxy); err != nil { ginkgo.Skip("Skip the test since the CRI Proxy is undefined.") } }) ginkgo.AfterEach(func() { - err := resetCRIProxyInjector() + err := resetCRIProxyInjector(e2eCriProxy) framework.ExpectNoError(err) }) ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) { // inject PullImage failed to trigger backoff expectedErr := fmt.Errorf("PullImage failed") - err := addCRIProxyInjector(func(apiName string) error { + err := addCRIProxyInjector(e2eCriProxy, func(apiName string) error { if apiName == criproxy.PullImage { return expectedErr } @@ -107,7 +107,8 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() { }) framework.ExpectNoError(err) - pod := e2epod.NewPodClient(f).Create(ctx, newPullImageAlwaysPod()) + pod := e2epod.NewPodClient(f).Create(ctx, newImageBackoffPod()) + framework.Logf("pod name: %s", pod.Name) podErr := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "ImagePullBackOff", 1*time.Minute, func(pod *v1.Pod) (bool, error) { if len(pod.Status.ContainerStatuses) > 0 && pod.Status.Reason == images.ErrImagePullBackOff.Error() { return true, nil @@ -121,22 +122,16 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() { isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error()) gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy")) - // Wait for ~40s worth of backoffs to occur so we can confirm the backoff growth. - podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 40*time.Second) + // Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth. + podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute) gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeadedly backing off image pulls") - // Remove error so after next backoff we will succeed. - _ = resetCRIProxyInjector() - - podErr = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod) - framework.ExpectNoError(podErr) - - // Parse observed backoffs (TODO: don't use Events, but logs) and compare to expectations - durations, err := getImageBackOffDurations(ctx, f, pod.Name) + e, err := getImageBackoffs(ctx, f, pod.Name) framework.ExpectNoError(err) - gomega.Expect(durations).Error().ShouldNot(gomega.BeNil(), "Should have observed backoffs in Pod event log") - gomega.Expect(durations[0]).Should(gomega.BeNumerically("~", time.Duration(10*time.Second), time.Duration(2*time.Second))) - // TODO: and check the next set of durations are 2x, etc + // 3 would take 10s best case + gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3)) + // 6 would take 150s best case + gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 6)) }) }) @@ -190,49 +185,20 @@ func getFailedToPullImageMsg(ctx context.Context, f *framework.Framework, podNam return "", fmt.Errorf("failed to find FailedToPullImage event for pod: %s", podName) } -func getImageBackOffDurations(ctx context.Context, f *framework.Framework, podName string) ([]time.Duration, error) { - events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) +func getImageBackoffs(ctx context.Context, f *framework.Framework, podName string) (v1.Event, error) { + + event := v1.Event{} + e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) if err != nil { - return nil, err + return event, err } - var backoffs []time.Duration - - type BackOffRecord struct { - initialEventTime time.Time - backoffEventTimes []time.Time - duration time.Duration - } - - records := make(map[int]*BackOffRecord) - records[0] = &BackOffRecord{} - var backoffCount int - var pullTime time.Time - var r *BackOffRecord - // I'm doing this here for events but really it needs to be off kubelet logs or some kind of synchronous watch - // Because the events normalize to the latest - // But this is the idea - for _, event := range events.Items { - if event.InvolvedObject.Name == podName { - switch event.Reason { - case kubeletevents.PullingImage: - if !pullTime.IsZero() { - if event.FirstTimestamp.Time.After(pullTime) { - r = records[backoffCount] - r.duration = r.initialEventTime.Sub(r.backoffEventTimes[len(r.backoffEventTimes)-1]) - backoffs = append(backoffs, r.duration) - backoffCount++ - } - } - pullTime = event.FirstTimestamp.Time - records[backoffCount].initialEventTime = pullTime - case kubeletevents.BackOffPullImage: - current := records[backoffCount].backoffEventTimes - current = append(current, event.FirstTimestamp.Time) - } + for _, event := range e.Items { + if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.PullingImage { + return event, nil } } - return backoffs, nil + return event, nil } func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) { @@ -260,6 +226,25 @@ func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podNam return endTime.Sub(startTime), nil } +func newImageBackoffPod() *v1.Pod { + podName := "image-backoff" + string(uuid.NewUUID()) + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: imageutils.GetPauseImageName(), + Name: podName, + ImagePullPolicy: v1.PullAlways, + }, + }, + }, + } + return pod +} + func newPullImageAlwaysPod() *v1.Pod { podName := "cri-proxy-test-" + string(uuid.NewUUID()) pod := &v1.Pod{