Fix test to count pull tries

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
This commit is contained in:
Laura Lorenz 2024-11-11 22:34:15 +00:00
parent 6ef05dbd01
commit 2479d91f2a

View File

@ -84,22 +84,22 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
})
})
ginkgo.Context("Inject a pull image error exception, then reset it, into the CriProxy", func() {
ginkgo.Context("Image pull backoff", func() {
ginkgo.BeforeEach(func() {
if err := resetCRIProxyInjector(); err != nil {
if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
}
})
ginkgo.AfterEach(func() {
err := resetCRIProxyInjector()
err := resetCRIProxyInjector(e2eCriProxy)
framework.ExpectNoError(err)
})
ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
// inject PullImage failed to trigger backoff
expectedErr := fmt.Errorf("PullImage failed")
err := addCRIProxyInjector(func(apiName string) error {
err := addCRIProxyInjector(e2eCriProxy, func(apiName string) error {
if apiName == criproxy.PullImage {
return expectedErr
}
@ -107,7 +107,8 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
})
framework.ExpectNoError(err)
pod := e2epod.NewPodClient(f).Create(ctx, newPullImageAlwaysPod())
pod := e2epod.NewPodClient(f).Create(ctx, newImageBackoffPod())
framework.Logf("pod name: %s", pod.Name)
podErr := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "ImagePullBackOff", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
if len(pod.Status.ContainerStatuses) > 0 && pod.Status.Reason == images.ErrImagePullBackOff.Error() {
return true, nil
@ -121,22 +122,16 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
// Wait for ~40s worth of backoffs to occur so we can confirm the backoff growth.
podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 40*time.Second)
// Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth.
podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute)
gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeadedly backing off image pulls")
// Remove error so after next backoff we will succeed.
_ = resetCRIProxyInjector()
podErr = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
framework.ExpectNoError(podErr)
// Parse observed backoffs (TODO: don't use Events, but logs) and compare to expectations
durations, err := getImageBackOffDurations(ctx, f, pod.Name)
e, err := getImageBackoffs(ctx, f, pod.Name)
framework.ExpectNoError(err)
gomega.Expect(durations).Error().ShouldNot(gomega.BeNil(), "Should have observed backoffs in Pod event log")
gomega.Expect(durations[0]).Should(gomega.BeNumerically("~", time.Duration(10*time.Second), time.Duration(2*time.Second)))
// TODO: and check the next set of durations are 2x, etc
// 3 would take 10s best case
gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3))
// 6 would take 150s best case
gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 6))
})
})
@ -190,49 +185,20 @@ func getFailedToPullImageMsg(ctx context.Context, f *framework.Framework, podNam
return "", fmt.Errorf("failed to find FailedToPullImage event for pod: %s", podName)
}
func getImageBackOffDurations(ctx context.Context, f *framework.Framework, podName string) ([]time.Duration, error) {
events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
func getImageBackoffs(ctx context.Context, f *framework.Framework, podName string) (v1.Event, error) {
event := v1.Event{}
e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
return event, err
}
var backoffs []time.Duration
type BackOffRecord struct {
initialEventTime time.Time
backoffEventTimes []time.Time
duration time.Duration
}
records := make(map[int]*BackOffRecord)
records[0] = &BackOffRecord{}
var backoffCount int
var pullTime time.Time
var r *BackOffRecord
// I'm doing this here for events but really it needs to be off kubelet logs or some kind of synchronous watch
// Because the events normalize to the latest
// But this is the idea
for _, event := range events.Items {
if event.InvolvedObject.Name == podName {
switch event.Reason {
case kubeletevents.PullingImage:
if !pullTime.IsZero() {
if event.FirstTimestamp.Time.After(pullTime) {
r = records[backoffCount]
r.duration = r.initialEventTime.Sub(r.backoffEventTimes[len(r.backoffEventTimes)-1])
backoffs = append(backoffs, r.duration)
backoffCount++
}
}
pullTime = event.FirstTimestamp.Time
records[backoffCount].initialEventTime = pullTime
case kubeletevents.BackOffPullImage:
current := records[backoffCount].backoffEventTimes
current = append(current, event.FirstTimestamp.Time)
}
for _, event := range e.Items {
if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.PullingImage {
return event, nil
}
}
return backoffs, nil
return event, nil
}
func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) {
@ -260,6 +226,25 @@ func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podNam
return endTime.Sub(startTime), nil
}
func newImageBackoffPod() *v1.Pod {
podName := "image-backoff" + string(uuid.NewUUID())
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Image: imageutils.GetPauseImageName(),
Name: podName,
ImagePullPolicy: v1.PullAlways,
},
},
},
}
return pod
}
func newPullImageAlwaysPod() *v1.Pod {
podName := "cri-proxy-test-" + string(uuid.NewUUID())
pod := &v1.Pod{