Merge pull request #99398 from tnqn/ds-update-retry

Fix DaemonSet status update retry
This commit is contained in:
Kubernetes Prow Robot 2021-05-18 22:32:50 -07:00 committed by GitHub
commit 89e0b2d185
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 82 additions and 1 deletions

View File

@ -1068,7 +1068,7 @@ func storeDaemonSetStatus(dsClient unversionedapps.DaemonSetInterface, ds *apps.
toUpdate := ds.DeepCopy()
var updateErr, getErr error
for i := 0; i < StatusUpdateRetries; i++ {
for i := 0; ; i++ {
if updateObservedGen {
toUpdate.Status.ObservedGeneration = ds.Generation
}
@ -1084,6 +1084,10 @@ func storeDaemonSetStatus(dsClient unversionedapps.DaemonSetInterface, ds *apps.
return nil
}
// Stop retrying if we exceed statusUpdateRetries - the DaemonSet will be requeued with a rate limit.
if i >= StatusUpdateRetries {
break
}
// Update the set with the latest resource version for the next poll
if toUpdate, getErr = dsClient.Get(context.TODO(), ds.Name, metav1.GetOptions{}); getErr != nil {
// If the GET fails we can't trust status.Replicas anymore. This error

View File

@ -3284,3 +3284,80 @@ func TestSurgeDeletesOldReadyWithUnsatisfiedMinReady(t *testing.T) {
t.Errorf("unexpected deletes\nexpected: %v\n actual: %v", expected.List(), actual.List())
}
}
func TestStoreDaemonSetStatus(t *testing.T) {
getError := fmt.Errorf("fake get error")
updateError := fmt.Errorf("fake update error")
tests := []struct {
name string
updateErrorNum int
getErrorNum int
expectedUpdateCalled int
expectedGetCalled int
expectedError error
}{
{
name: "succeed immediately",
updateErrorNum: 0,
getErrorNum: 0,
expectedUpdateCalled: 1,
expectedGetCalled: 0,
expectedError: nil,
},
{
name: "succeed after one update failure",
updateErrorNum: 1,
getErrorNum: 0,
expectedUpdateCalled: 2,
expectedGetCalled: 1,
expectedError: nil,
},
{
name: "fail after two update failures",
updateErrorNum: 2,
getErrorNum: 0,
expectedUpdateCalled: 2,
expectedGetCalled: 1,
expectedError: updateError,
},
{
name: "fail after one update failure and one get failure",
updateErrorNum: 1,
getErrorNum: 1,
expectedUpdateCalled: 1,
expectedGetCalled: 1,
expectedError: getError,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ds := newDaemonSet("foo")
fakeClient := &fake.Clientset{}
getCalled := 0
fakeClient.AddReactor("get", "daemonsets", func(action core.Action) (bool, runtime.Object, error) {
getCalled += 1
if getCalled <= tt.getErrorNum {
return true, nil, getError
}
return true, ds, nil
})
updateCalled := 0
fakeClient.AddReactor("update", "daemonsets", func(action core.Action) (bool, runtime.Object, error) {
updateCalled += 1
if updateCalled <= tt.updateErrorNum {
return true, nil, updateError
}
return true, ds, nil
})
if err := storeDaemonSetStatus(fakeClient.AppsV1().DaemonSets("default"), ds, 2, 2, 2, 2, 2, 2, 2, true); err != tt.expectedError {
t.Errorf("storeDaemonSetStatus() got %v, expected %v", err, tt.expectedError)
}
if getCalled != tt.expectedGetCalled {
t.Errorf("Get() was called %v times, expected %v times", getCalled, tt.expectedGetCalled)
}
if updateCalled != tt.expectedUpdateCalled {
t.Errorf("UpdateStatus() was called %v times, expected %v times", updateCalled, tt.expectedUpdateCalled)
}
})
}
}