mirror of
https://github.com/kubernetes/client-go.git
synced 2025-06-27 15:39:39 +00:00
Merge pull request #111387 from marseel/feature/retry_internal_errors
Add option to retry internal api error in reflector. Kubernetes-commit: 79a62d62350fb600f97d1f6309c3274515b3587a
This commit is contained in:
commit
ec0f33729d
10
tools/cache/reflector.go
vendored
10
tools/cache/reflector.go
vendored
@ -71,6 +71,8 @@ type Reflector struct {
|
|||||||
backoffManager wait.BackoffManager
|
backoffManager wait.BackoffManager
|
||||||
// initConnBackoffManager manages backoff the initial connection with the Watch call of ListAndWatch.
|
// initConnBackoffManager manages backoff the initial connection with the Watch call of ListAndWatch.
|
||||||
initConnBackoffManager wait.BackoffManager
|
initConnBackoffManager wait.BackoffManager
|
||||||
|
// MaxInternalErrorRetryDuration defines how long we should retry internal errors returned by watch.
|
||||||
|
MaxInternalErrorRetryDuration time.Duration
|
||||||
|
|
||||||
resyncPeriod time.Duration
|
resyncPeriod time.Duration
|
||||||
// ShouldResync is invoked periodically and whenever it returns `true` the Store's Resync operation is invoked
|
// ShouldResync is invoked periodically and whenever it returns `true` the Store's Resync operation is invoked
|
||||||
@ -287,6 +289,7 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
retry := NewRetryWithDeadline(r.MaxInternalErrorRetryDuration, time.Minute, apierrors.IsInternalError, r.clock)
|
||||||
for {
|
for {
|
||||||
// give the stopCh a chance to stop the loop, even in case of continue statements further down on errors
|
// give the stopCh a chance to stop the loop, even in case of continue statements further down on errors
|
||||||
select {
|
select {
|
||||||
@ -323,7 +326,9 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := watchHandler(start, w, r.store, r.expectedType, r.expectedGVK, r.name, r.expectedTypeName, r.setLastSyncResourceVersion, r.clock, resyncerrc, stopCh); err != nil {
|
err = watchHandler(start, w, r.store, r.expectedType, r.expectedGVK, r.name, r.expectedTypeName, r.setLastSyncResourceVersion, r.clock, resyncerrc, stopCh)
|
||||||
|
retry.After(err)
|
||||||
|
if err != nil {
|
||||||
if err != errorStopRequested {
|
if err != errorStopRequested {
|
||||||
switch {
|
switch {
|
||||||
case isExpiredError(err):
|
case isExpiredError(err):
|
||||||
@ -335,6 +340,9 @@ func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
|
|||||||
klog.V(2).Infof("%s: watch of %v returned 429 - backing off", r.name, r.expectedTypeName)
|
klog.V(2).Infof("%s: watch of %v returned 429 - backing off", r.name, r.expectedTypeName)
|
||||||
<-r.initConnBackoffManager.Backoff().C()
|
<-r.initConnBackoffManager.Backoff().C()
|
||||||
continue
|
continue
|
||||||
|
case apierrors.IsInternalError(err) && retry.ShouldRetry():
|
||||||
|
klog.V(2).Infof("%s: retrying watch of %v internal error: %v", r.name, r.expectedTypeName, err)
|
||||||
|
continue
|
||||||
default:
|
default:
|
||||||
klog.Warningf("%s: watch of %v ended with: %v", r.name, r.expectedTypeName, err)
|
klog.Warningf("%s: watch of %v ended with: %v", r.name, r.expectedTypeName, err)
|
||||||
}
|
}
|
||||||
|
73
tools/cache/reflector_test.go
vendored
73
tools/cache/reflector_test.go
vendored
@ -487,6 +487,79 @@ func TestBackoffOnTooManyRequests(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRetryInternalError(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
maxInternalDuration time.Duration
|
||||||
|
rewindTime int
|
||||||
|
wantRetries int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "retries off",
|
||||||
|
maxInternalDuration: time.Duration(0),
|
||||||
|
wantRetries: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "retries on, all calls fail",
|
||||||
|
maxInternalDuration: time.Second * 30,
|
||||||
|
wantRetries: 31,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "retries on, one call successful",
|
||||||
|
maxInternalDuration: time.Second * 30,
|
||||||
|
rewindTime: 10,
|
||||||
|
wantRetries: 40,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
err := apierrors.NewInternalError(fmt.Errorf("etcdserver: no leader"))
|
||||||
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
||||||
|
bm := &fakeBackoff{clock: fakeClock}
|
||||||
|
|
||||||
|
counter := 0
|
||||||
|
|
||||||
|
lw := &testLW{
|
||||||
|
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
|
||||||
|
return &v1.PodList{ListMeta: metav1.ListMeta{ResourceVersion: "1"}}, nil
|
||||||
|
},
|
||||||
|
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
|
||||||
|
counter = counter + 1
|
||||||
|
t.Logf("Counter: %v", counter)
|
||||||
|
if counter == tc.rewindTime {
|
||||||
|
t.Logf("Rewinding")
|
||||||
|
fakeClock.Step(time.Minute)
|
||||||
|
}
|
||||||
|
|
||||||
|
fakeClock.Step(time.Second)
|
||||||
|
w := watch.NewFakeWithChanSize(1, false)
|
||||||
|
status := err.Status()
|
||||||
|
w.Error(&status)
|
||||||
|
return w, nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
r := &Reflector{
|
||||||
|
name: "test-reflector",
|
||||||
|
listerWatcher: lw,
|
||||||
|
store: NewFIFO(MetaNamespaceKeyFunc),
|
||||||
|
initConnBackoffManager: bm,
|
||||||
|
clock: fakeClock,
|
||||||
|
watchErrorHandler: WatchErrorHandler(DefaultWatchErrorHandler),
|
||||||
|
}
|
||||||
|
|
||||||
|
r.MaxInternalErrorRetryDuration = tc.maxInternalDuration
|
||||||
|
|
||||||
|
stopCh := make(chan struct{})
|
||||||
|
r.ListAndWatch(stopCh)
|
||||||
|
close(stopCh)
|
||||||
|
|
||||||
|
if counter-1 != tc.wantRetries {
|
||||||
|
t.Errorf("%v unexpected number of retries: %d", tc, counter-1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestReflectorResync(t *testing.T) {
|
func TestReflectorResync(t *testing.T) {
|
||||||
iteration := 0
|
iteration := 0
|
||||||
stopCh := make(chan struct{})
|
stopCh := make(chan struct{})
|
||||||
|
78
tools/cache/retry_with_deadline.go
vendored
Normal file
78
tools/cache/retry_with_deadline.go
vendored
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"k8s.io/utils/clock"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RetryWithDeadline interface {
|
||||||
|
After(error)
|
||||||
|
ShouldRetry() bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type retryWithDeadlineImpl struct {
|
||||||
|
firstErrorTime time.Time
|
||||||
|
lastErrorTime time.Time
|
||||||
|
maxRetryDuration time.Duration
|
||||||
|
minResetPeriod time.Duration
|
||||||
|
isRetryable func(error) bool
|
||||||
|
clock clock.Clock
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewRetryWithDeadline(maxRetryDuration, minResetPeriod time.Duration, isRetryable func(error) bool, clock clock.Clock) RetryWithDeadline {
|
||||||
|
return &retryWithDeadlineImpl{
|
||||||
|
firstErrorTime: time.Time{},
|
||||||
|
lastErrorTime: time.Time{},
|
||||||
|
maxRetryDuration: maxRetryDuration,
|
||||||
|
minResetPeriod: minResetPeriod,
|
||||||
|
isRetryable: isRetryable,
|
||||||
|
clock: clock,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *retryWithDeadlineImpl) reset() {
|
||||||
|
r.firstErrorTime = time.Time{}
|
||||||
|
r.lastErrorTime = time.Time{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *retryWithDeadlineImpl) After(err error) {
|
||||||
|
if r.isRetryable(err) {
|
||||||
|
if r.clock.Now().Sub(r.lastErrorTime) >= r.minResetPeriod {
|
||||||
|
r.reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.firstErrorTime.IsZero() {
|
||||||
|
r.firstErrorTime = r.clock.Now()
|
||||||
|
}
|
||||||
|
r.lastErrorTime = r.clock.Now()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *retryWithDeadlineImpl) ShouldRetry() bool {
|
||||||
|
if r.maxRetryDuration <= time.Duration(0) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.clock.Now().Sub(r.firstErrorTime) <= r.maxRetryDuration {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
r.reset()
|
||||||
|
return false
|
||||||
|
}
|
143
tools/cache/retry_with_deadline_test.go
vendored
Normal file
143
tools/cache/retry_with_deadline_test.go
vendored
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2022 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||||
|
testingclock "k8s.io/utils/clock/testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type retryScenarioStep struct {
|
||||||
|
clockStep time.Duration
|
||||||
|
err error
|
||||||
|
wantRetry bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRetryWithDeadline(t *testing.T) {
|
||||||
|
internalError := apierrors.NewInternalError(fmt.Errorf("etcdserver: no leader"))
|
||||||
|
otherError := fmt.Errorf("some other error")
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
duration time.Duration
|
||||||
|
reset time.Duration
|
||||||
|
isRetryable func(error) bool
|
||||||
|
scenario []retryScenarioStep
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Never retry when duration is zero",
|
||||||
|
duration: time.Duration(0),
|
||||||
|
reset: time.Second * 30,
|
||||||
|
isRetryable: func(err error) bool { return false },
|
||||||
|
scenario: []retryScenarioStep{
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: nil,
|
||||||
|
wantRetry: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 0,
|
||||||
|
err: internalError,
|
||||||
|
wantRetry: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: otherError,
|
||||||
|
wantRetry: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Retry when internal error happens only within duration",
|
||||||
|
duration: time.Second * 1,
|
||||||
|
reset: time.Second * 30,
|
||||||
|
isRetryable: apierrors.IsInternalError,
|
||||||
|
scenario: []retryScenarioStep{
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: internalError,
|
||||||
|
wantRetry: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: internalError,
|
||||||
|
wantRetry: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: internalError,
|
||||||
|
wantRetry: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Don't retry when other error happens",
|
||||||
|
duration: time.Second * 1,
|
||||||
|
reset: time.Second * 30,
|
||||||
|
isRetryable: apierrors.IsInternalError,
|
||||||
|
scenario: []retryScenarioStep{
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: otherError,
|
||||||
|
wantRetry: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Ignore other errors for retries",
|
||||||
|
duration: time.Second * 1,
|
||||||
|
reset: time.Second * 30,
|
||||||
|
isRetryable: apierrors.IsInternalError,
|
||||||
|
scenario: []retryScenarioStep{
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: internalError,
|
||||||
|
wantRetry: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 0,
|
||||||
|
err: otherError,
|
||||||
|
wantRetry: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
clockStep: time.Second * 1,
|
||||||
|
err: internalError,
|
||||||
|
wantRetry: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
||||||
|
retry := NewRetryWithDeadline(tc.duration, tc.reset, tc.isRetryable, fakeClock)
|
||||||
|
|
||||||
|
for i, step := range tc.scenario {
|
||||||
|
fakeClock.Step(step.clockStep)
|
||||||
|
retry.After(step.err)
|
||||||
|
result := retry.ShouldRetry()
|
||||||
|
if result != step.wantRetry {
|
||||||
|
t.Errorf("%v unexpected retry, step %d, result %v want %v", tc, i, result, step.wantRetry)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user