Quit retrying early with user supplied resourceVersion

This commit also updates tests and comments.
This commit is contained in:
Michael Gugino 2020-05-28 14:13:00 -04:00
parent 4522141f0a
commit 047b0cee71
2 changed files with 21 additions and 10 deletions

View File

@ -45,13 +45,12 @@ const (
// This situation should self-correct because the PDB controller removes // This situation should self-correct because the PDB controller removes
// entries from the map automatically after the PDB DeletionTimeout regardless. // entries from the map automatically after the PDB DeletionTimeout regardless.
MaxDisruptedPodSize = 2000 MaxDisruptedPodSize = 2000
retrySteps = 20
) )
// EvictionsRetry is the retry for a conflict where multiple clients // EvictionsRetry is the retry for a conflict where multiple clients
// are making changes to the same resource. // are making changes to the same resource.
var EvictionsRetry = wait.Backoff{ var EvictionsRetry = wait.Backoff{
Steps: retrySteps, Steps: 20,
Duration: 500 * time.Millisecond, Duration: 500 * time.Millisecond,
Factor: 1.0, Factor: 1.0,
Jitter: 0.1, Jitter: 0.1,
@ -124,7 +123,14 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
var pod *api.Pod var pod *api.Pod
deletedPod := false deletedPod := false
err = retry.RetryOnConflict(EvictionsRetry, func() error { // by default, retry conflict errors
shouldRetry := errors.IsConflict
if !resourceVersionIsUnset(originalDeleteOptions) {
// if the original options included a resourceVersion precondition, don't retry
shouldRetry = func(err error) bool { return false }
}
err = retry.OnError(EvictionsRetry, shouldRetry, func() error {
obj, err = r.store.Get(ctx, eviction.Name, &metav1.GetOptions{}) obj, err = r.store.Get(ctx, eviction.Name, &metav1.GetOptions{})
if err != nil { if err != nil {
return err return err
@ -140,6 +146,9 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
// the PDB can be ignored, so delete the pod // the PDB can be ignored, so delete the pod
deletionOptions := originalDeleteOptions.DeepCopy() deletionOptions := originalDeleteOptions.DeepCopy()
// We should check if resourceVersion is already set by the requestor
// as it might be older than the pod we just fetched and should be
// honored.
if shouldEnforceResourceVersion(pod) && resourceVersionIsUnset(originalDeleteOptions) { if shouldEnforceResourceVersion(pod) && resourceVersionIsUnset(originalDeleteOptions) {
// Set deletionOptions.Preconditions.ResourceVersion to ensure we're not // Set deletionOptions.Preconditions.ResourceVersion to ensure we're not
// racing with another PDB-impacting process elsewhere. // racing with another PDB-impacting process elsewhere.
@ -158,7 +167,7 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
switch { switch {
case err != nil: case err != nil:
// this can happen in cases where the PDB can be ignored, but there was a problem issuing the pod delete: // this can happen in cases where the PDB can be ignored, but there was a problem issuing the pod delete:
// maybe we conflicted two many times or we didn't have permission or something else weird. // maybe we conflicted too many times or we didn't have permission or something else weird.
return nil, err return nil, err
case deletedPod: case deletedPod:
@ -245,12 +254,14 @@ func canIgnorePDB(pod *api.Pod) bool {
} }
func shouldEnforceResourceVersion(pod *api.Pod) bool { func shouldEnforceResourceVersion(pod *api.Pod) bool {
// Only pods that may be included as health in PDBs in the future need to be checked. // We don't need to enforce ResourceVersion for terminal pods
if pod.Status.Phase == api.PodPending { if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed {
return true
}
return false return false
} }
// Return true for all other pods to ensure we don't race against a pod becoming
// ready and violating PDBs.
return true
}
func resourceVersionIsUnset(options *metav1.DeleteOptions) bool { func resourceVersionIsUnset(options *metav1.DeleteOptions) bool {
return options.Preconditions == nil || options.Preconditions.ResourceVersion == nil return options.Preconditions == nil || options.Preconditions.ResourceVersion == nil

View File

@ -272,7 +272,7 @@ func TestEvictionIngorePDB(t *testing.T) {
expectError: true, expectError: true,
podPhase: api.PodPending, podPhase: api.PodPending,
podName: "t4", podName: "t4",
expectedDeleteCount: retrySteps, expectedDeleteCount: EvictionsRetry.Steps,
}, },
{ {
name: "pod pending, always conflict on delete, user provided ResourceVersion constraint", name: "pod pending, always conflict on delete, user provided ResourceVersion constraint",
@ -285,7 +285,7 @@ func TestEvictionIngorePDB(t *testing.T) {
expectError: true, expectError: true,
podPhase: api.PodPending, podPhase: api.PodPending,
podName: "t5", podName: "t5",
expectedDeleteCount: retrySteps, expectedDeleteCount: 1,
}, },
} }