From db2218d16e3fb2a30b21ffa0f307beb515d8394b Mon Sep 17 00:00:00 2001 From: mprahl Date: Thu, 11 Jul 2024 13:50:52 -0400 Subject: [PATCH] Stop the RetryWatcher when failing due to permissions issue When the client does not have permission to watch a resource, the RetryWatcher continuously retried. In this case, it's better to send an error and stop retrying to let the caller handle this case since this is not a transient error that can be recovered without user intervention. This is particularly helpful in applications that leverage a user provided service account and the application needs to notify the user to set the correct permissions for the service account. This also accounts for invalid credentials from the watch client. Signed-off-by: mprahl --- .../client-go/tools/watch/retrywatcher.go | 29 +++++++++++++++ .../tools/watch/retrywatcher_test.go | 36 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/staging/src/k8s.io/client-go/tools/watch/retrywatcher.go b/staging/src/k8s.io/client-go/tools/watch/retrywatcher.go index d81dc43570d..61356919759 100644 --- a/staging/src/k8s.io/client-go/tools/watch/retrywatcher.go +++ b/staging/src/k8s.io/client-go/tools/watch/retrywatcher.go @@ -126,6 +126,35 @@ func (rw *RetryWatcher) doReceive() (bool, time.Duration) { return false, 0 } + // Check if the watch failed due to the client not having permission to watch the resource or the credentials + // being invalid (e.g. expired token). + if apierrors.IsForbidden(err) || apierrors.IsUnauthorized(err) { + // Add more detail since the forbidden message returned by the Kubernetes API is just "unknown". + klog.ErrorS(err, msg+": ensure the client has valid credentials and watch permissions on the resource") + + if apiStatus, ok := err.(apierrors.APIStatus); ok { + statusErr := apiStatus.Status() + + sent := rw.send(watch.Event{ + Type: watch.Error, + Object: &statusErr, + }) + if !sent { + // This likely means the RetryWatcher is stopping but return false so the caller to doReceive can + // verify this and potentially retry. + klog.Error("Failed to send the Unauthorized or Forbidden watch event") + + return false, 0 + } + } else { + // This should never happen since apierrors only handles apierrors.APIStatus. Still, this is an + // unrecoverable error, so still allow it to return true below. + klog.ErrorS(err, msg+": encountered an unexpected Unauthorized or Forbidden error type") + } + + return true, 0 + } + klog.ErrorS(err, msg) // Retry return false, 0 diff --git a/staging/src/k8s.io/client-go/tools/watch/retrywatcher_test.go b/staging/src/k8s.io/client-go/tools/watch/retrywatcher_test.go index fff3a46c482..36e1494b54d 100644 --- a/staging/src/k8s.io/client-go/tools/watch/retrywatcher_test.go +++ b/staging/src/k8s.io/client-go/tools/watch/retrywatcher_test.go @@ -288,6 +288,42 @@ func TestRetryWatcher(t *testing.T) { }, }, }, + { + name: "fails on Forbidden", + initialRV: "5", + watchClient: &cache.ListWatch{ + WatchFunc: func() func(options metav1.ListOptions) (watch.Interface, error) { + return func(options metav1.ListOptions) (watch.Interface, error) { + return nil, apierrors.NewForbidden(schema.GroupResource{}, "", errors.New("unknown")) + } + }(), + }, + watchCount: 1, + expected: []watch.Event{ + { + Type: watch.Error, + Object: &apierrors.NewForbidden(schema.GroupResource{}, "", errors.New("unknown")).ErrStatus, + }, + }, + }, + { + name: "fails on Unauthorized", + initialRV: "5", + watchClient: &cache.ListWatch{ + WatchFunc: func() func(options metav1.ListOptions) (watch.Interface, error) { + return func(options metav1.ListOptions) (watch.Interface, error) { + return nil, apierrors.NewUnauthorized("") + } + }(), + }, + watchCount: 1, + expected: []watch.Event{ + { + Type: watch.Error, + Object: &apierrors.NewUnauthorized("").ErrStatus, + }, + }, + }, { name: "recovers from timeout error", initialRV: "5",