mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-20 01:01:22 +00:00
Merge pull request #94021 from timoreimann/support-specifying-custom-lb-retry-period-from-cloud-provider
Support specifying custom LB retry period from cloud provider
This commit is contained in:
commit
f51dad586d
46
staging/src/k8s.io/cloud-provider/api/retry_error.go
Normal file
46
staging/src/k8s.io/cloud-provider/api/retry_error.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2023 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RetryError indicates that a service reconciliation should be retried after a
|
||||||
|
// fixed duration (as opposed to backing off exponentially).
|
||||||
|
type RetryError struct {
|
||||||
|
msg string
|
||||||
|
retryAfter time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRetryError returns a RetryError.
|
||||||
|
func NewRetryError(msg string, retryAfter time.Duration) *RetryError {
|
||||||
|
return &RetryError{
|
||||||
|
msg: msg,
|
||||||
|
retryAfter: retryAfter,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error shows the details of the retry reason.
|
||||||
|
func (re *RetryError) Error() string {
|
||||||
|
return re.msg
|
||||||
|
}
|
||||||
|
|
||||||
|
// RetryAfter returns the defined retry-after duration.
|
||||||
|
func (re *RetryError) RetryAfter() time.Duration {
|
||||||
|
return re.retryAfter
|
||||||
|
}
|
@ -131,11 +131,11 @@ func GetInstanceProviderID(ctx context.Context, cloud Interface, nodeName types.
|
|||||||
// irrespective of the ImplementedElsewhere error. Additional finalizers for
|
// irrespective of the ImplementedElsewhere error. Additional finalizers for
|
||||||
// LB services must be managed in the alternate implementation.
|
// LB services must be managed in the alternate implementation.
|
||||||
type LoadBalancer interface {
|
type LoadBalancer interface {
|
||||||
// TODO: Break this up into different interfaces (LB, etc) when we have more than one type of service
|
|
||||||
// GetLoadBalancer returns whether the specified load balancer exists, and
|
// GetLoadBalancer returns whether the specified load balancer exists, and
|
||||||
// if so, what its status is.
|
// if so, what its status is.
|
||||||
// Implementations must treat the *v1.Service parameter as read-only and not modify it.
|
// Implementations must treat the *v1.Service parameter as read-only and not modify it.
|
||||||
// Parameter 'clusterName' is the name of the cluster as presented to kube-controller-manager
|
// Parameter 'clusterName' is the name of the cluster as presented to kube-controller-manager.
|
||||||
|
// TODO: Break this up into different interfaces (LB, etc) when we have more than one type of service
|
||||||
GetLoadBalancer(ctx context.Context, clusterName string, service *v1.Service) (status *v1.LoadBalancerStatus, exists bool, err error)
|
GetLoadBalancer(ctx context.Context, clusterName string, service *v1.Service) (status *v1.LoadBalancerStatus, exists bool, err error)
|
||||||
// GetLoadBalancerName returns the name of the load balancer. Implementations must treat the
|
// GetLoadBalancerName returns the name of the load balancer. Implementations must treat the
|
||||||
// *v1.Service parameter as read-only and not modify it.
|
// *v1.Service parameter as read-only and not modify it.
|
||||||
@ -143,7 +143,13 @@ type LoadBalancer interface {
|
|||||||
// EnsureLoadBalancer creates a new load balancer 'name', or updates the existing one. Returns the status of the balancer
|
// EnsureLoadBalancer creates a new load balancer 'name', or updates the existing one. Returns the status of the balancer
|
||||||
// Implementations must treat the *v1.Service and *v1.Node
|
// Implementations must treat the *v1.Service and *v1.Node
|
||||||
// parameters as read-only and not modify them.
|
// parameters as read-only and not modify them.
|
||||||
// Parameter 'clusterName' is the name of the cluster as presented to kube-controller-manager
|
// Parameter 'clusterName' is the name of the cluster as presented to kube-controller-manager.
|
||||||
|
//
|
||||||
|
// Implementations may return a (possibly wrapped) api.RetryError to enforce
|
||||||
|
// backing off at a fixed duration. This can be used for cases like when the
|
||||||
|
// load balancer is not ready yet (e.g., it is still being provisioned) and
|
||||||
|
// polling at a fixed rate is preferred over backing off exponentially in
|
||||||
|
// order to minimize latency.
|
||||||
EnsureLoadBalancer(ctx context.Context, clusterName string, service *v1.Service, nodes []*v1.Node) (*v1.LoadBalancerStatus, error)
|
EnsureLoadBalancer(ctx context.Context, clusterName string, service *v1.Service, nodes []*v1.Node) (*v1.LoadBalancerStatus, error)
|
||||||
// UpdateLoadBalancer updates hosts under the specified load balancer.
|
// UpdateLoadBalancer updates hosts under the specified load balancer.
|
||||||
// Implementations must treat the *v1.Service and *v1.Node
|
// Implementations must treat the *v1.Service and *v1.Node
|
||||||
|
@ -18,13 +18,14 @@ package service
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"reflect"
|
"reflect"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/errors"
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
"k8s.io/apimachinery/pkg/labels"
|
"k8s.io/apimachinery/pkg/labels"
|
||||||
"k8s.io/apimachinery/pkg/util/runtime"
|
"k8s.io/apimachinery/pkg/util/runtime"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
@ -39,6 +40,7 @@ import (
|
|||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
cloudprovider "k8s.io/cloud-provider"
|
cloudprovider "k8s.io/cloud-provider"
|
||||||
|
"k8s.io/cloud-provider/api"
|
||||||
servicehelper "k8s.io/cloud-provider/service/helpers"
|
servicehelper "k8s.io/cloud-provider/service/helpers"
|
||||||
"k8s.io/component-base/featuregate"
|
"k8s.io/component-base/featuregate"
|
||||||
controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers"
|
controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers"
|
||||||
@ -288,8 +290,15 @@ func (c *Controller) processNextServiceItem(ctx context.Context) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
runtime.HandleError(fmt.Errorf("error processing service %v (will retry): %v", key, err))
|
var re *api.RetryError
|
||||||
|
if errors.As(err, &re) {
|
||||||
|
klog.Warningf("error processing service %v (retrying in %s): %v", key, re.RetryAfter(), err)
|
||||||
|
c.serviceQueue.AddAfter(key, re.RetryAfter())
|
||||||
|
} else {
|
||||||
|
runtime.HandleError(fmt.Errorf("error processing service %v (retrying with exponential backoff): %v", key, err))
|
||||||
c.serviceQueue.AddRateLimited(key)
|
c.serviceQueue.AddRateLimited(key)
|
||||||
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -401,7 +410,8 @@ func (c *Controller) syncLoadBalancerIfNeeded(ctx context.Context, service *v1.S
|
|||||||
klog.V(4).Infof("LoadBalancer for service %s implemented by a different controller %s, Ignoring error", key, c.cloud.ProviderName())
|
klog.V(4).Infof("LoadBalancer for service %s implemented by a different controller %s, Ignoring error", key, c.cloud.ProviderName())
|
||||||
return op, nil
|
return op, nil
|
||||||
}
|
}
|
||||||
return op, fmt.Errorf("failed to ensure load balancer: %v", err)
|
// Use %w deliberately so that a returned RetryError can be handled.
|
||||||
|
return op, fmt.Errorf("failed to ensure load balancer: %w", err)
|
||||||
}
|
}
|
||||||
if newStatus == nil {
|
if newStatus == nil {
|
||||||
return op, fmt.Errorf("service status returned by EnsureLoadBalancer is nil")
|
return op, fmt.Errorf("service status returned by EnsureLoadBalancer is nil")
|
||||||
@ -415,7 +425,7 @@ func (c *Controller) syncLoadBalancerIfNeeded(ctx context.Context, service *v1.S
|
|||||||
// - Not found error mostly happens when service disappears right after
|
// - Not found error mostly happens when service disappears right after
|
||||||
// we remove the finalizer.
|
// we remove the finalizer.
|
||||||
// - We can't patch status on non-exist service anyway.
|
// - We can't patch status on non-exist service anyway.
|
||||||
if !errors.IsNotFound(err) {
|
if !apierrors.IsNotFound(err) {
|
||||||
return op, fmt.Errorf("failed to update load balancer status: %v", err)
|
return op, fmt.Errorf("failed to update load balancer status: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -837,7 +847,7 @@ func (c *Controller) syncService(ctx context.Context, key string) error {
|
|||||||
// service holds the latest service info from apiserver
|
// service holds the latest service info from apiserver
|
||||||
service, err := c.serviceLister.Services(namespace).Get(name)
|
service, err := c.serviceLister.Services(namespace).Get(name)
|
||||||
switch {
|
switch {
|
||||||
case errors.IsNotFound(err):
|
case apierrors.IsNotFound(err):
|
||||||
// service absence in store means watcher caught the deletion, ensure LB info is cleaned
|
// service absence in store means watcher caught the deletion, ensure LB info is cleaned
|
||||||
err = c.processServiceDeletion(ctx, key)
|
err = c.processServiceDeletion(ctx, key)
|
||||||
case err != nil:
|
case err != nil:
|
||||||
|
@ -41,9 +41,12 @@ import (
|
|||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
"k8s.io/client-go/kubernetes/scheme"
|
"k8s.io/client-go/kubernetes/scheme"
|
||||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||||
|
corelisters "k8s.io/client-go/listers/core/v1"
|
||||||
core "k8s.io/client-go/testing"
|
core "k8s.io/client-go/testing"
|
||||||
|
"k8s.io/client-go/tools/cache"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
|
"k8s.io/cloud-provider/api"
|
||||||
fakecloud "k8s.io/cloud-provider/fake"
|
fakecloud "k8s.io/cloud-provider/fake"
|
||||||
servicehelper "k8s.io/cloud-provider/service/helpers"
|
servicehelper "k8s.io/cloud-provider/service/helpers"
|
||||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
@ -1093,6 +1096,7 @@ func TestSyncService(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.testName, func(t *testing.T) {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
@ -1109,6 +1113,7 @@ func TestSyncService(t *testing.T) {
|
|||||||
if exist {
|
if exist {
|
||||||
t.Fatalf("%v working Queue should be empty, but contains %s", tc.testName, tc.key)
|
t.Fatalf("%v working Queue should be empty, but contains %s", tc.testName, tc.key)
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2253,6 +2258,87 @@ func Test_shouldSyncUpdatedNode_compoundedPredicates(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestServiceQueueDelay(t *testing.T) {
|
||||||
|
const ns = metav1.NamespaceDefault
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
lbCloudErr error
|
||||||
|
wantRetryDelay time.Duration
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "processing successful",
|
||||||
|
lbCloudErr: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "regular error",
|
||||||
|
lbCloudErr: errors.New("something went wrong"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "retry error",
|
||||||
|
lbCloudErr: api.NewRetryError("LB create in progress", 42*time.Second),
|
||||||
|
wantRetryDelay: 42 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
controller, cloud, client := newController()
|
||||||
|
queue := &spyWorkQueue{RateLimitingInterface: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "test-service-queue-delay")}
|
||||||
|
controller.serviceQueue = queue
|
||||||
|
cloud.Err = tc.lbCloudErr
|
||||||
|
|
||||||
|
serviceCache := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
|
||||||
|
controller.serviceLister = corelisters.NewServiceLister(serviceCache)
|
||||||
|
|
||||||
|
svc := defaultExternalService()
|
||||||
|
if err := serviceCache.Add(svc); err != nil {
|
||||||
|
t.Fatalf("adding service %s to cache: %s", svc.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
_, err := client.CoreV1().Services(ns).Create(ctx, svc, metav1.CreateOptions{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
key, err := cache.MetaNamespaceKeyFunc(svc)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("creating meta namespace key: %s", err)
|
||||||
|
}
|
||||||
|
queue.Add(key)
|
||||||
|
|
||||||
|
done := controller.processNextServiceItem(ctx)
|
||||||
|
if !done {
|
||||||
|
t.Fatal("processNextServiceItem stopped prematurely")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expect no requeues unless we hit an error that is not a retry
|
||||||
|
// error.
|
||||||
|
wantNumRequeues := 0
|
||||||
|
var re *api.RetryError
|
||||||
|
isRetryError := errors.As(tc.lbCloudErr, &re)
|
||||||
|
if tc.lbCloudErr != nil && !isRetryError {
|
||||||
|
wantNumRequeues = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if gotNumRequeues := queue.NumRequeues(key); gotNumRequeues != wantNumRequeues {
|
||||||
|
t.Fatalf("got %d requeue(s), want %d", gotNumRequeues, wantNumRequeues)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.wantRetryDelay > 0 {
|
||||||
|
items := queue.getItems()
|
||||||
|
if len(items) != 1 {
|
||||||
|
t.Fatalf("got %d item(s), want 1", len(items))
|
||||||
|
}
|
||||||
|
if gotDelay := items[0].Delay; gotDelay != tc.wantRetryDelay {
|
||||||
|
t.Fatalf("got delay %s, want %s", gotDelay, tc.wantRetryDelay)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type fakeNodeLister struct {
|
type fakeNodeLister struct {
|
||||||
cache []*v1.Node
|
cache []*v1.Node
|
||||||
err error
|
err error
|
||||||
@ -2281,3 +2367,33 @@ func (l *fakeNodeLister) Get(name string) (*v1.Node, error) {
|
|||||||
}
|
}
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// spyWorkQueue implements a work queue and adds the ability to inspect processed
|
||||||
|
// items for testing purposes.
|
||||||
|
type spyWorkQueue struct {
|
||||||
|
workqueue.RateLimitingInterface
|
||||||
|
items []spyQueueItem
|
||||||
|
}
|
||||||
|
|
||||||
|
// spyQueueItem represents an item that was being processed.
|
||||||
|
type spyQueueItem struct {
|
||||||
|
Key interface{}
|
||||||
|
// Delay represents the delayed duration if and only if AddAfter was invoked.
|
||||||
|
Delay time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddAfter is like workqueue.RateLimitingInterface.AddAfter but records the
|
||||||
|
// added key and delay internally.
|
||||||
|
func (f *spyWorkQueue) AddAfter(key interface{}, delay time.Duration) {
|
||||||
|
f.items = append(f.items, spyQueueItem{
|
||||||
|
Key: key,
|
||||||
|
Delay: delay,
|
||||||
|
})
|
||||||
|
|
||||||
|
f.RateLimitingInterface.AddAfter(key, delay)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getItems returns all items that were recorded.
|
||||||
|
func (f *spyWorkQueue) getItems() []spyQueueItem {
|
||||||
|
return f.items
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user