Support specifying custom LB retry period from cloud provider

This change allows cloud providers to specify a custom retry period by returning a RetryError. The purpose is to bypass the work queue-driven exponential backoff algorithm when there is no need to back off. Specifically, this can be the case when a cloud load balancer operation such as a create or delete is still pending and the cloud API should be polled for completion at a constant interval. A backoff algorithm would not always be reasonable to apply here since there is no API or performance degradation warranting an increasing wait time between API requests.
2025-09-21 18:11:22 +00:00 · 2022-10-21 14:08:17 +02:00
parent 4115b2b180
commit a179203bdb
2 changed files with 55 additions and 3 deletions
--- a/staging/src/k8s.io/cloud-provider/api/retry_error.go
+++ b/staging/src/k8s.io/cloud-provider/api/retry_error.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package api
+
+import "time"
+
+// RetryError indicates after what time a service reconciliation should be
+// retried.
+type RetryError struct {
+	msg        string
+	retryAfter time.Duration
+}
+
+// NewRetryError returns a RetryError.
+func NewRetryError(msg string, retryAfter time.Duration) *RetryError {
+	return &RetryError{
+		msg:        msg,
+		retryAfter: retryAfter,
+	}
+}
+
+func (re *RetryError) Error() string {
+	return re.msg
+}
+
+// RetryAfter returns the defined retry-after duration.
+func (re *RetryError) RetryAfter() time.Duration {
+	return re.retryAfter
+}
--- a/staging/src/k8s.io/cloud-provider/controllers/service/controller.go
+++ b/staging/src/k8s.io/cloud-provider/controllers/service/controller.go
@@ -18,6 +18,7 @@ package service

 import (
 	"context"
+	stderrors "errors"
 	"fmt"
 	"reflect"
 	"sync"
@@ -39,6 +40,7 @@ import (
 	"k8s.io/client-go/tools/record"
 	"k8s.io/client-go/util/workqueue"
 	cloudprovider "k8s.io/cloud-provider"
+	"k8s.io/cloud-provider/api"
 	servicehelper "k8s.io/cloud-provider/service/helpers"
 	"k8s.io/component-base/featuregate"
 	controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers"
@@ -288,8 +290,15 @@ func (c *Controller) processNextServiceItem(ctx context.Context) bool {
 		return true
 	}

-	runtime.HandleError(fmt.Errorf("error processing service %v (will retry): %v", key, err))
-	c.serviceQueue.AddRateLimited(key)
+	var re *api.RetryError
+	if stderrors.As(err, &re) {
+		klog.V(4).Infof("Retrying processing for service %v in %s", key, re.RetryAfter())
+		c.serviceQueue.AddAfter(key, re.RetryAfter())
+	} else {
+		runtime.HandleError(fmt.Errorf("error processing service %v (will retry): %v", key, err))
+		c.serviceQueue.AddRateLimited(key)
+	}
+
 	return true
 }

@@ -401,7 +410,7 @@ func (c *Controller) syncLoadBalancerIfNeeded(ctx context.Context, service *v1.S
 				klog.V(4).Infof("LoadBalancer for service %s implemented by a different controller %s, Ignoring error", key, c.cloud.ProviderName())
 				return op, nil
 			}
-			return op, fmt.Errorf("failed to ensure load balancer: %v", err)
+			return op, fmt.Errorf("failed to ensure load balancer: %w", err)
 		}
 		if newStatus == nil {
 			return op, fmt.Errorf("service status returned by EnsureLoadBalancer is nil")