Merge pull request #89438 from robscott/endpointslice-controller-error-backoff

Lengthening initial backoff time for EndpointSlice controller
This commit is contained in:
Kubernetes Prow Robot 2020-03-25 14:02:38 -07:00 committed by GitHub
commit c4fd09d80a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 8 deletions

View File

@ -41,6 +41,7 @@ go_library(
"//staging/src/k8s.io/client-go/tools/record:go_default_library", "//staging/src/k8s.io/client-go/tools/record:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library", "//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/component-base/metrics/prometheus/ratelimiter:go_default_library", "//staging/src/k8s.io/component-base/metrics/prometheus/ratelimiter:go_default_library",
"//vendor/golang.org/x/time/rate:go_default_library",
"//vendor/k8s.io/klog:go_default_library", "//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/utils/net:go_default_library", "//vendor/k8s.io/utils/net:go_default_library",
], ],

View File

@ -20,6 +20,8 @@ import (
"fmt" "fmt"
"time" "time"
"golang.org/x/time/rate"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
discovery "k8s.io/api/discovery/v1beta1" discovery "k8s.io/api/discovery/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors"
@ -47,13 +49,24 @@ const (
// maxRetries is the number of times a service will be retried before it is // maxRetries is the number of times a service will be retried before it is
// dropped out of the queue. Any sync error, such as a failure to create or // dropped out of the queue. Any sync error, such as a failure to create or
// update an EndpointSlice could trigger a retry. With the current // update an EndpointSlice could trigger a retry. With the current
// rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers // rate-limiter in use (1s*2^(numRetries-1)) the following numbers represent
// represent the sequence of delays between successive queuings of a // the sequence of delays between successive queuings of a service.
// service.
// //
// 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, // 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s, 512s, 1000s (max)
// 10.2s, 20.4s, 41s, 82s
maxRetries = 15 maxRetries = 15
// endpointSliceChangeMinSyncDelay indicates the mininum delay before
// queuing a syncService call after an EndpointSlice changes. If
// endpointUpdatesBatchPeriod is greater than this value, it will be used
// instead. This helps batch processing of changes to multiple
// EndpointSlices.
endpointSliceChangeMinSyncDelay = 1 * time.Second
// defaultSyncBackOff is the default backoff period for syncService calls.
defaultSyncBackOff = 1 * time.Second
// maxSyncBackOff is the max backoff period for syncService calls.
maxSyncBackOff = 100 * time.Second
// controllerName is a unique value used with LabelManagedBy to indicated // controllerName is a unique value used with LabelManagedBy to indicated
// the component managing an EndpointSlice. // the component managing an EndpointSlice.
controllerName = "endpointslice-controller.k8s.io" controllerName = "endpointslice-controller.k8s.io"
@ -81,7 +94,18 @@ func NewController(podInformer coreinformers.PodInformer,
c := &Controller{ c := &Controller{
client: client, client: client,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "endpoint_slice"), // This is similar to the DefaultControllerRateLimiter, just with a
// significantly higher default backoff (1s vs 5ms). This controller
// processes events that can require significant EndpointSlice changes,
// such as an update to a Service or Deployment. A more significant
// rate limit back off here helps ensure that the Controller does not
// overwhelm the API Server.
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewMaxOfRateLimiter(
workqueue.NewItemExponentialFailureRateLimiter(defaultSyncBackOff, maxSyncBackOff),
// 10 qps, 100 bucket size. This is only for retry speed and its
// only the overall factor (not per item).
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)},
), "endpoint_slice"),
workerLoopPeriod: time.Second, workerLoopPeriod: time.Second,
} }
@ -409,7 +433,14 @@ func (c *Controller) queueServiceForEndpointSlice(endpointSlice *discovery.Endpo
utilruntime.HandleError(fmt.Errorf("Couldn't get key for EndpointSlice %+v: %v", endpointSlice, err)) utilruntime.HandleError(fmt.Errorf("Couldn't get key for EndpointSlice %+v: %v", endpointSlice, err))
return return
} }
c.queue.Add(key)
// queue after the max of endpointSliceChangeMinSyncDelay and
// endpointUpdatesBatchPeriod.
delay := endpointSliceChangeMinSyncDelay
if c.endpointUpdatesBatchPeriod > delay {
delay = c.endpointUpdatesBatchPeriod
}
c.queue.AddAfter(key, delay)
} }
func (c *Controller) addPod(obj interface{}) { func (c *Controller) addPod(obj interface{}) {