From 0fc4b740f821533f2c45741f9a8e8697af003d46 Mon Sep 17 00:00:00 2001 From: Sascha Grunert Date: Tue, 8 Oct 2024 10:22:11 +0200 Subject: [PATCH] Use image pull error in `message` during back-off The container status waiting reason toggles between `ImagePullBackOff` and the actual pull error, resulting in a bad user experience for consumers like kubectl. For example, the output of `kubectl get pods` does return either: ``` NAME READY STATUS RESTARTS AGE pod 0/1 SignatureValidationFailed 0 10s ``` or ``` NAME READY STATUS RESTARTS AGE pod 0/1 ImagePullBackOff 0 18s ``` depending in which state the image pull is. We now improve that behavior by preserving the actual error in the `message` of the `waiting` state from the pull during back-off: ```json { "waiting": { "message": "Back-off pulling image \"quay.io/crio/unsigned:latest\": SignatureValidationFailed: image pull failed for quay.io/crio/unsigned:latest because the signature validation failed: Source image rejected: A signature was required, but no signature exists", "reason": "ImagePullBackOff" } } ``` While the `SignatureValidationFailed` value inherits from the previous known state: ```json { "waiting": { "message": "image pull failed for quay.io/crio/unsigned:latest because the signature validation failed: Source image rejected: A signature was required, but no signature exists", "reason": "SignatureValidationFailed" } } ``` Signed-off-by: Sascha Grunert --- pkg/kubelet/images/image_manager.go | 26 ++++++- pkg/kubelet/images/image_manager_test.go | 99 ++++++++++++++++-------- 2 files changed, 90 insertions(+), 35 deletions(-) diff --git a/pkg/kubelet/images/image_manager.go b/pkg/kubelet/images/image_manager.go index d2704f61dca..684eba80356 100644 --- a/pkg/kubelet/images/image_manager.go +++ b/pkg/kubelet/images/image_manager.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "strings" + "sync" "time" v1 "k8s.io/api/core/v1" @@ -43,9 +44,11 @@ type ImagePodPullingTimeRecorder interface { // imageManager provides the functionalities for image pulling. type imageManager struct { - recorder record.EventRecorder - imageService kubecontainer.ImageService - backOff *flowcontrol.Backoff + recorder record.EventRecorder + imageService kubecontainer.ImageService + backOff *flowcontrol.Backoff + prevPullErrMsg sync.Map + // It will check the presence of the image, and report the 'image pulling', image pulled' events correspondingly. puller imagePuller @@ -154,8 +157,20 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR if m.backOff.IsInBackOffSinceUpdate(backOffKey, m.backOff.Clock.Now()) { msg := fmt.Sprintf("Back-off pulling image %q", imgRef) m.logIt(objRef, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info) + + // Wrap the error from the actual pull if available. + // This information is populated to the pods + // .status.containerStatuses[*].state.waiting.message. + prevPullErrMsg, ok := m.prevPullErrMsg.Load(backOffKey) + if ok { + msg = fmt.Sprintf("%s: %s", msg, prevPullErrMsg) + } + return "", msg, ErrImagePullBackOff } + // Ensure that the map cannot grow indefinitely. + m.prevPullErrMsg.Delete(backOffKey) + m.podPullingTimeRecorder.RecordImageStartedPulling(pod.UID) m.logIt(objRef, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("Pulling image %q", imgRef), klog.Info) startTime := time.Now() @@ -167,6 +182,11 @@ func (m *imageManager) EnsureImageExists(ctx context.Context, objRef *v1.ObjectR m.backOff.Next(backOffKey, m.backOff.Clock.Now()) msg, err := evalCRIPullErr(imgRef, imagePullResult.err) + + // Store the actual pull error for providing that information during + // the image pull back-off. + m.prevPullErrMsg.Store(backOffKey, fmt.Sprintf("%s: %s", err, msg)) + return "", msg, err } m.podPullingTimeRecorder.RecordImageFinishedPulling(pod.UID) diff --git a/pkg/kubelet/images/image_manager_test.go b/pkg/kubelet/images/image_manager_test.go index a1be7b09dd9..41a1f49f237 100644 --- a/pkg/kubelet/images/image_manager_test.go +++ b/pkg/kubelet/images/image_manager_test.go @@ -49,6 +49,7 @@ type pullerExpects struct { shouldRecordStartedPullingTime bool shouldRecordFinishedPullingTime bool events []v1.Event + msg string } type pullerTestCase struct { @@ -77,7 +78,7 @@ func pullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "Pulling"}, {Reason: "Pulled"}, - }}, + }, ""}, }}, { // image present, don't pull @@ -92,15 +93,15 @@ func pullerTestCases() []pullerTestCase { {[]string{"GetImageRef"}, nil, false, false, []v1.Event{ {Reason: "Pulled"}, - }}, + }, ""}, {[]string{"GetImageRef"}, nil, false, false, []v1.Event{ {Reason: "Pulled"}, - }}, + }, ""}, {[]string{"GetImageRef"}, nil, false, false, []v1.Event{ {Reason: "Pulled"}, - }}, + }, ""}, }}, // image present, pull it {containerImage: "present_image", @@ -115,17 +116,17 @@ func pullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "Pulling"}, {Reason: "Pulled"}, - }}, + }, ""}, {[]string{"PullImage", "GetImageSize"}, nil, true, true, []v1.Event{ {Reason: "Pulling"}, {Reason: "Pulled"}, - }}, + }, ""}, {[]string{"PullImage", "GetImageSize"}, nil, true, true, []v1.Event{ {Reason: "Pulling"}, {Reason: "Pulled"}, - }}, + }, ""}, }}, // missing image, error PullNever {containerImage: "missing_image", @@ -139,15 +140,15 @@ func pullerTestCases() []pullerTestCase { {[]string{"GetImageRef"}, ErrImageNeverPull, false, false, []v1.Event{ {Reason: "ErrImageNeverPull"}, - }}, + }, ""}, {[]string{"GetImageRef"}, ErrImageNeverPull, false, false, []v1.Event{ {Reason: "ErrImageNeverPull"}, - }}, + }, ""}, {[]string{"GetImageRef"}, ErrImageNeverPull, false, false, []v1.Event{ {Reason: "ErrImageNeverPull"}, - }}, + }, ""}, }}, // missing image, unable to inspect {containerImage: "missing_image", @@ -161,15 +162,15 @@ func pullerTestCases() []pullerTestCase { {[]string{"GetImageRef"}, ErrImageInspect, false, false, []v1.Event{ {Reason: "InspectFailed"}, - }}, + }, ""}, {[]string{"GetImageRef"}, ErrImageInspect, false, false, []v1.Event{ {Reason: "InspectFailed"}, - }}, + }, ""}, {[]string{"GetImageRef"}, ErrImageInspect, false, false, []v1.Event{ {Reason: "InspectFailed"}, - }}, + }, ""}, }}, // missing image, unable to fetch {containerImage: "typo_image", @@ -184,29 +185,29 @@ func pullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "Pulling"}, {Reason: "Failed"}, - }}, + }, ""}, {[]string{"GetImageRef", "PullImage"}, ErrImagePull, true, false, []v1.Event{ {Reason: "Pulling"}, {Reason: "Failed"}, - }}, + }, ""}, {[]string{"GetImageRef"}, ErrImagePullBackOff, false, false, []v1.Event{ {Reason: "BackOff"}, - }}, + }, ""}, {[]string{"GetImageRef", "PullImage"}, ErrImagePull, true, false, []v1.Event{ {Reason: "Pulling"}, {Reason: "Failed"}, - }}, + }, ""}, {[]string{"GetImageRef"}, ErrImagePullBackOff, false, false, []v1.Event{ {Reason: "BackOff"}, - }}, + }, ""}, {[]string{"GetImageRef"}, ErrImagePullBackOff, false, false, []v1.Event{ {Reason: "BackOff"}, - }}, + }, ""}, }}, // image present, non-zero qps, try to pull {containerImage: "present_image", @@ -221,17 +222,17 @@ func pullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "Pulling"}, {Reason: "Pulled"}, - }}, + }, ""}, {[]string{"PullImage", "GetImageSize"}, nil, true, true, []v1.Event{ {Reason: "Pulling"}, {Reason: "Pulled"}, - }}, + }, ""}, {[]string{"PullImage", "GetImageSize"}, nil, true, true, []v1.Event{ {Reason: "Pulling"}, {Reason: "Pulled"}, - }}, + }, ""}, }}, // image present, non-zero qps, try to pull when qps exceeded {containerImage: "present_image", @@ -246,16 +247,16 @@ func pullerTestCases() []pullerTestCase { []v1.Event{ {Reason: "Pulling"}, {Reason: "Failed"}, - }}, + }, ""}, {[]string(nil), ErrImagePull, true, false, []v1.Event{ {Reason: "Pulling"}, {Reason: "Failed"}, - }}, + }, ""}, {[]string(nil), ErrImagePullBackOff, false, false, []v1.Event{ {Reason: "BackOff"}, - }}, + }, ""}, }}, // error case if image name fails validation due to invalid reference format {containerImage: "FAILED_IMAGE", @@ -269,7 +270,7 @@ func pullerTestCases() []pullerTestCase { {[]string(nil), ErrInvalidImageName, false, false, []v1.Event{ {Reason: "InspectFailed"}, - }}, + }, ""}, }}, // error case if image name contains http {containerImage: "http://url", @@ -283,7 +284,7 @@ func pullerTestCases() []pullerTestCase { {[]string(nil), ErrInvalidImageName, false, false, []v1.Event{ {Reason: "InspectFailed"}, - }}, + }, ""}, }}, // error case if image name contains sha256 {containerImage: "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad", @@ -297,7 +298,39 @@ func pullerTestCases() []pullerTestCase { {[]string(nil), ErrInvalidImageName, false, false, []v1.Event{ {Reason: "InspectFailed"}, - }}, + }, ""}, + }}, + {containerImage: "typo_image", + testName: "image missing, SignatureValidationFailed", + policy: v1.PullIfNotPresent, + inspectErr: nil, + pullerErr: crierrors.ErrSignatureValidationFailed, + qps: 0.0, + burst: 0, + expected: []pullerExpects{ + {[]string{"GetImageRef", "PullImage"}, crierrors.ErrSignatureValidationFailed, true, false, + []v1.Event{ + {Reason: "Pulling"}, + {Reason: "Failed"}, + }, "image pull failed for typo_image because the signature validation failed"}, + {[]string{"GetImageRef", "PullImage"}, crierrors.ErrSignatureValidationFailed, true, false, + []v1.Event{ + {Reason: "Pulling"}, + {Reason: "Failed"}, + }, "image pull failed for typo_image because the signature validation failed"}, + {[]string{"GetImageRef"}, ErrImagePullBackOff, false, false, + []v1.Event{ + {Reason: "BackOff"}, + }, "Back-off pulling image \"typo_image\": SignatureValidationFailed: image pull failed for typo_image because the signature validation failed"}, + {[]string{"GetImageRef", "PullImage"}, crierrors.ErrSignatureValidationFailed, true, false, + []v1.Event{ + {Reason: "Pulling"}, + {Reason: "Failed"}, + }, "image pull failed for typo_image because the signature validation failed"}, + {[]string{"GetImageRef"}, ErrImagePullBackOff, false, false, + []v1.Event{ + {Reason: "BackOff"}, + }, "Back-off pulling image \"typo_image\": SignatureValidationFailed: image pull failed for typo_image because the signature validation failed"}, }}, } } @@ -372,11 +405,12 @@ func TestParallelPuller(t *testing.T) { fakeRuntime.CalledFunctions = nil fakeClock.Step(time.Second) - _, _, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy) + _, msg, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy) fakeRuntime.AssertCalls(expected.calls) assert.Equal(t, expected.err, err) assert.Equal(t, expected.shouldRecordStartedPullingTime, fakePodPullingTimeRecorder.startedPullingRecorded) assert.Equal(t, expected.shouldRecordFinishedPullingTime, fakePodPullingTimeRecorder.finishedPullingRecorded) + assert.Contains(t, msg, expected.msg) fakePodPullingTimeRecorder.reset() } }) @@ -404,11 +438,12 @@ func TestSerializedPuller(t *testing.T) { fakeRuntime.CalledFunctions = nil fakeClock.Step(time.Second) - _, _, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy) + _, msg, err := puller.EnsureImageExists(ctx, nil, pod, container.Image, nil, nil, "", container.ImagePullPolicy) fakeRuntime.AssertCalls(expected.calls) assert.Equal(t, expected.err, err) assert.Equal(t, expected.shouldRecordStartedPullingTime, fakePodPullingTimeRecorder.startedPullingRecorded) assert.Equal(t, expected.shouldRecordFinishedPullingTime, fakePodPullingTimeRecorder.finishedPullingRecorded) + assert.Contains(t, msg, expected.msg) fakePodPullingTimeRecorder.reset() } }) @@ -456,7 +491,7 @@ func TestPullAndListImageWithPodAnnotations(t *testing.T) { inspectErr: nil, pullerErr: nil, expected: []pullerExpects{ - {[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil}, + {[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil, ""}, }} useSerializedEnv := true @@ -512,7 +547,7 @@ func TestPullAndListImageWithRuntimeHandlerInImageCriAPIFeatureGate(t *testing.T inspectErr: nil, pullerErr: nil, expected: []pullerExpects{ - {[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil}, + {[]string{"GetImageRef", "PullImage", "GetImageSize"}, nil, true, true, nil, ""}, }} useSerializedEnv := true