From 2176e80333ddd9966b37758f8417bf72ceb61d5f Mon Sep 17 00:00:00 2001 From: mprahl Date: Thu, 11 Jul 2024 13:50:52 -0400 Subject: [PATCH] Stop the RetryWatcher when failing due to permissions issue When the client does not have permission to watch a resource, the RetryWatcher continuously retried. In this case, it's better to send an error and stop retrying to let the caller handle this case since this is not a transient error that can be recovered without user intervention. This is particularly helpful in applications that leverage a user provided service account and the application needs to notify the user to set the correct permissions for the service account. This also accounts for invalid credentials from the watch client. Signed-off-by: mprahl Kubernetes-commit: db2218d16e3fb2a30b21ffa0f307beb515d8394b --- tools/watch/retrywatcher.go | 29 +++++++++++++++++++++++++ tools/watch/retrywatcher_test.go | 36 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/tools/watch/retrywatcher.go b/tools/watch/retrywatcher.go index 8431d02f..d36d7455 100644 --- a/tools/watch/retrywatcher.go +++ b/tools/watch/retrywatcher.go @@ -128,6 +128,35 @@ func (rw *RetryWatcher) doReceive() (bool, time.Duration) { return false, 0 } + // Check if the watch failed due to the client not having permission to watch the resource or the credentials + // being invalid (e.g. expired token). + if apierrors.IsForbidden(err) || apierrors.IsUnauthorized(err) { + // Add more detail since the forbidden message returned by the Kubernetes API is just "unknown". + klog.ErrorS(err, msg+": ensure the client has valid credentials and watch permissions on the resource") + + if apiStatus, ok := err.(apierrors.APIStatus); ok { + statusErr := apiStatus.Status() + + sent := rw.send(watch.Event{ + Type: watch.Error, + Object: &statusErr, + }) + if !sent { + // This likely means the RetryWatcher is stopping but return false so the caller to doReceive can + // verify this and potentially retry. + klog.Error("Failed to send the Unauthorized or Forbidden watch event") + + return false, 0 + } + } else { + // This should never happen since apierrors only handles apierrors.APIStatus. Still, this is an + // unrecoverable error, so still allow it to return true below. + klog.ErrorS(err, msg+": encountered an unexpected Unauthorized or Forbidden error type") + } + + return true, 0 + } + klog.ErrorS(err, msg) // Retry return false, 0 diff --git a/tools/watch/retrywatcher_test.go b/tools/watch/retrywatcher_test.go index 297661aa..873ce37e 100644 --- a/tools/watch/retrywatcher_test.go +++ b/tools/watch/retrywatcher_test.go @@ -288,6 +288,42 @@ func TestRetryWatcher(t *testing.T) { }, }, }, + { + name: "fails on Forbidden", + initialRV: "5", + watchClient: &cache.ListWatch{ + WatchFunc: func() func(options metav1.ListOptions) (watch.Interface, error) { + return func(options metav1.ListOptions) (watch.Interface, error) { + return nil, apierrors.NewForbidden(schema.GroupResource{}, "", errors.New("unknown")) + } + }(), + }, + watchCount: 1, + expected: []watch.Event{ + { + Type: watch.Error, + Object: &apierrors.NewForbidden(schema.GroupResource{}, "", errors.New("unknown")).ErrStatus, + }, + }, + }, + { + name: "fails on Unauthorized", + initialRV: "5", + watchClient: &cache.ListWatch{ + WatchFunc: func() func(options metav1.ListOptions) (watch.Interface, error) { + return func(options metav1.ListOptions) (watch.Interface, error) { + return nil, apierrors.NewUnauthorized("") + } + }(), + }, + watchCount: 1, + expected: []watch.Event{ + { + Type: watch.Error, + Object: &apierrors.NewUnauthorized("").ErrStatus, + }, + }, + }, { name: "recovers from timeout error", initialRV: "5",