From a179203bdba8b05ac1a40fbc279b76e872ecb15d Mon Sep 17 00:00:00 2001 From: Timo Reimann Date: Fri, 21 Oct 2022 14:08:17 +0200 Subject: [PATCH] Support specifying custom LB retry period from cloud provider This change allows cloud providers to specify a custom retry period by returning a RetryError. The purpose is to bypass the work queue-driven exponential backoff algorithm when there is no need to back off. Specifically, this can be the case when a cloud load balancer operation such as a create or delete is still pending and the cloud API should be polled for completion at a constant interval. A backoff algorithm would not always be reasonable to apply here since there is no API or performance degradation warranting an increasing wait time between API requests. --- .../k8s.io/cloud-provider/api/retry_error.go | 43 +++++++++++++++++++ .../controllers/service/controller.go | 15 +++++-- 2 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 staging/src/k8s.io/cloud-provider/api/retry_error.go diff --git a/staging/src/k8s.io/cloud-provider/api/retry_error.go b/staging/src/k8s.io/cloud-provider/api/retry_error.go new file mode 100644 index 00000000000..6708cd3bcd0 --- /dev/null +++ b/staging/src/k8s.io/cloud-provider/api/retry_error.go @@ -0,0 +1,43 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package api + +import "time" + +// RetryError indicates after what time a service reconciliation should be +// retried. +type RetryError struct { + msg string + retryAfter time.Duration +} + +// NewRetryError returns a RetryError. +func NewRetryError(msg string, retryAfter time.Duration) *RetryError { + return &RetryError{ + msg: msg, + retryAfter: retryAfter, + } +} + +func (re *RetryError) Error() string { + return re.msg +} + +// RetryAfter returns the defined retry-after duration. +func (re *RetryError) RetryAfter() time.Duration { + return re.retryAfter +} diff --git a/staging/src/k8s.io/cloud-provider/controllers/service/controller.go b/staging/src/k8s.io/cloud-provider/controllers/service/controller.go index a8b4bdf477f..de4c7ceb2e9 100644 --- a/staging/src/k8s.io/cloud-provider/controllers/service/controller.go +++ b/staging/src/k8s.io/cloud-provider/controllers/service/controller.go @@ -18,6 +18,7 @@ package service import ( "context" + stderrors "errors" "fmt" "reflect" "sync" @@ -39,6 +40,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider/api" servicehelper "k8s.io/cloud-provider/service/helpers" "k8s.io/component-base/featuregate" controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers" @@ -288,8 +290,15 @@ func (c *Controller) processNextServiceItem(ctx context.Context) bool { return true } - runtime.HandleError(fmt.Errorf("error processing service %v (will retry): %v", key, err)) - c.serviceQueue.AddRateLimited(key) + var re *api.RetryError + if stderrors.As(err, &re) { + klog.V(4).Infof("Retrying processing for service %v in %s", key, re.RetryAfter()) + c.serviceQueue.AddAfter(key, re.RetryAfter()) + } else { + runtime.HandleError(fmt.Errorf("error processing service %v (will retry): %v", key, err)) + c.serviceQueue.AddRateLimited(key) + } + return true } @@ -401,7 +410,7 @@ func (c *Controller) syncLoadBalancerIfNeeded(ctx context.Context, service *v1.S klog.V(4).Infof("LoadBalancer for service %s implemented by a different controller %s, Ignoring error", key, c.cloud.ProviderName()) return op, nil } - return op, fmt.Errorf("failed to ensure load balancer: %v", err) + return op, fmt.Errorf("failed to ensure load balancer: %w", err) } if newStatus == nil { return op, fmt.Errorf("service status returned by EnsureLoadBalancer is nil")