From ddf624efd67b04d35f2bef4f491ad58070ad779c Mon Sep 17 00:00:00 2001 From: ymqytw Date: Wed, 30 Nov 2016 14:30:29 -0800 Subject: [PATCH] add retry logic for eviction --- pkg/registry/core/pod/etcd/BUILD | 2 + pkg/registry/core/pod/etcd/eviction.go | 85 ++++++++++++++++---------- 2 files changed, 56 insertions(+), 31 deletions(-) diff --git a/pkg/registry/core/pod/etcd/BUILD b/pkg/registry/core/pod/etcd/BUILD index 243d432f15d..f70bac824ef 100644 --- a/pkg/registry/core/pod/etcd/BUILD +++ b/pkg/registry/core/pod/etcd/BUILD @@ -26,6 +26,7 @@ go_library( "//pkg/api/validation:go_default_library", "//pkg/apis/policy:go_default_library", "//pkg/client/clientset_generated/internalclientset/typed/policy/internalversion:go_default_library", + "//pkg/client/retry:go_default_library", "//pkg/kubelet/client:go_default_library", "//pkg/labels:go_default_library", "//pkg/registry/cachesize:go_default_library", @@ -35,6 +36,7 @@ go_library( "//pkg/registry/generic/registry:go_default_library", "//pkg/runtime:go_default_library", "//pkg/storage:go_default_library", + "//pkg/util/wait:go_default_library", ], ) diff --git a/pkg/registry/core/pod/etcd/eviction.go b/pkg/registry/core/pod/etcd/eviction.go index 312bdfdf884..a47a7defc19 100644 --- a/pkg/registry/core/pod/etcd/eviction.go +++ b/pkg/registry/core/pod/etcd/eviction.go @@ -25,9 +25,11 @@ import ( "k8s.io/kubernetes/pkg/api/unversioned" "k8s.io/kubernetes/pkg/apis/policy" policyclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/policy/internalversion" + "k8s.io/kubernetes/pkg/client/retry" "k8s.io/kubernetes/pkg/labels" "k8s.io/kubernetes/pkg/registry/generic/registry" "k8s.io/kubernetes/pkg/runtime" + "k8s.io/kubernetes/pkg/util/wait" ) const ( @@ -40,6 +42,15 @@ const ( MaxDisruptedPodSize = 2000 ) +// EvictionsRetry is the retry for a conflict where multiple clients +// are making changes to the same resource. +var EvictionsRetry = wait.Backoff{ + Steps: 20, + Duration: 500 * time.Millisecond, + Factor: 1.0, + Jitter: 0.1, +} + func newEvictionStorage(store *registry.Store, podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter) *EvictionREST { return &EvictionREST{store: store, podDisruptionBudgetClient: podDisruptionBudgetClient} } @@ -66,41 +77,53 @@ func (r *EvictionREST) Create(ctx api.Context, obj runtime.Object) (runtime.Obje return nil, err } pod := obj.(*api.Pod) - pdbs, err := r.getPodDisruptionBudgets(ctx, pod) + var rtStatus *unversioned.Status + err = retry.RetryOnConflict(EvictionsRetry, func() error { + pdbs, err := r.getPodDisruptionBudgets(ctx, pod) + if err != nil { + return err + } + + if len(pdbs) > 1 { + rtStatus = &unversioned.Status{ + Status: unversioned.StatusFailure, + Message: "This pod has more than one PodDisruptionBudget, which the eviction subresource does not support.", + Code: 500, + } + return nil + } else if len(pdbs) == 1 { + pdb := pdbs[0] + // Try to verify-and-decrement + + // If it was false already, or if it becomes false during the course of our retries, + // raise an error marked as a 429. + ok, err := r.checkAndDecrement(pod.Namespace, pod.Name, pdb) + if err != nil { + return err + } + + if !ok { + rtStatus = &unversioned.Status{ + Status: unversioned.StatusFailure, + // TODO(mml): Include some more details about why the eviction is disallowed. + // Ideally any such text is generated by the DisruptionController (offline). + Message: "Cannot evict pod as it would violate the pod's disruption budget.", + Code: 429, + // TODO(mml): Add a Retry-After header. Once there are time-based + // budgets, we can sometimes compute a sensible suggested value. But + // even without that, we can give a suggestion (10 minutes?) that + // prevents well-behaved clients from hammering us. + } + } + } + return nil + }) if err != nil { return nil, err } - if len(pdbs) > 1 { - return &unversioned.Status{ - Status: unversioned.StatusFailure, - Message: "This pod has more than one PodDisruptionBudget, which the eviction subresource does not support.", - Code: 500, - }, nil - } else if len(pdbs) == 1 { - pdb := pdbs[0] - // Try to verify-and-decrement - - // If it was false already, or if it becomes false during the course of our retries, - // raise an error marked as a 429. - ok, err := r.checkAndDecrement(pod.Namespace, pod.Name, pdb) - if err != nil { - return nil, err - } - - if !ok { - return &unversioned.Status{ - Status: unversioned.StatusFailure, - // TODO(mml): Include some more details about why the eviction is disallowed. - // Ideally any such text is generated by the DisruptionController (offline). - Message: "Cannot evict pod as it would violate the pod's disruption budget.", - Code: 429, - // TODO(mml): Add a Retry-After header. Once there are time-based - // budgets, we can sometimes compute a sensible suggested value. But - // even without that, we can give a suggestion (10 minutes?) that - // prevents well-behaved clients from hammering us. - }, nil - } + if rtStatus != nil { + return rtStatus, nil } // At this point there was either no PDB or we succeded in decrementing