Merge pull request #37668 from ymqytw/add_retry_loop_for_eviction

Automatic merge from submit-queue

add retry logic for eviction

Make eviction retry when it fails to update the PDB due to conflict (409) error.

fixes #37605

cc: @davidopp
This commit is contained in:
Kubernetes Submit Queue 2016-12-02 02:17:40 -08:00 committed by GitHub
commit c7785fb114
2 changed files with 56 additions and 31 deletions

View File

@ -26,6 +26,7 @@ go_library(
"//pkg/api/validation:go_default_library",
"//pkg/apis/policy:go_default_library",
"//pkg/client/clientset_generated/internalclientset/typed/policy/internalversion:go_default_library",
"//pkg/client/retry:go_default_library",
"//pkg/kubelet/client:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/registry/cachesize:go_default_library",
@ -35,6 +36,7 @@ go_library(
"//pkg/registry/generic/registry:go_default_library",
"//pkg/runtime:go_default_library",
"//pkg/storage:go_default_library",
"//pkg/util/wait:go_default_library",
],
)

View File

@ -25,9 +25,11 @@ import (
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/apis/policy"
policyclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/policy/internalversion"
"k8s.io/kubernetes/pkg/client/retry"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/registry/generic/registry"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util/wait"
)
const (
@ -40,6 +42,15 @@ const (
MaxDisruptedPodSize = 2000
)
// EvictionsRetry is the retry for a conflict where multiple clients
// are making changes to the same resource.
var EvictionsRetry = wait.Backoff{
Steps: 20,
Duration: 500 * time.Millisecond,
Factor: 1.0,
Jitter: 0.1,
}
func newEvictionStorage(store *registry.Store, podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter) *EvictionREST {
return &EvictionREST{store: store, podDisruptionBudgetClient: podDisruptionBudgetClient}
}
@ -66,41 +77,53 @@ func (r *EvictionREST) Create(ctx api.Context, obj runtime.Object) (runtime.Obje
return nil, err
}
pod := obj.(*api.Pod)
pdbs, err := r.getPodDisruptionBudgets(ctx, pod)
var rtStatus *unversioned.Status
err = retry.RetryOnConflict(EvictionsRetry, func() error {
pdbs, err := r.getPodDisruptionBudgets(ctx, pod)
if err != nil {
return err
}
if len(pdbs) > 1 {
rtStatus = &unversioned.Status{
Status: unversioned.StatusFailure,
Message: "This pod has more than one PodDisruptionBudget, which the eviction subresource does not support.",
Code: 500,
}
return nil
} else if len(pdbs) == 1 {
pdb := pdbs[0]
// Try to verify-and-decrement
// If it was false already, or if it becomes false during the course of our retries,
// raise an error marked as a 429.
ok, err := r.checkAndDecrement(pod.Namespace, pod.Name, pdb)
if err != nil {
return err
}
if !ok {
rtStatus = &unversioned.Status{
Status: unversioned.StatusFailure,
// TODO(mml): Include some more details about why the eviction is disallowed.
// Ideally any such text is generated by the DisruptionController (offline).
Message: "Cannot evict pod as it would violate the pod's disruption budget.",
Code: 429,
// TODO(mml): Add a Retry-After header. Once there are time-based
// budgets, we can sometimes compute a sensible suggested value. But
// even without that, we can give a suggestion (10 minutes?) that
// prevents well-behaved clients from hammering us.
}
}
}
return nil
})
if err != nil {
return nil, err
}
if len(pdbs) > 1 {
return &unversioned.Status{
Status: unversioned.StatusFailure,
Message: "This pod has more than one PodDisruptionBudget, which the eviction subresource does not support.",
Code: 500,
}, nil
} else if len(pdbs) == 1 {
pdb := pdbs[0]
// Try to verify-and-decrement
// If it was false already, or if it becomes false during the course of our retries,
// raise an error marked as a 429.
ok, err := r.checkAndDecrement(pod.Namespace, pod.Name, pdb)
if err != nil {
return nil, err
}
if !ok {
return &unversioned.Status{
Status: unversioned.StatusFailure,
// TODO(mml): Include some more details about why the eviction is disallowed.
// Ideally any such text is generated by the DisruptionController (offline).
Message: "Cannot evict pod as it would violate the pod's disruption budget.",
Code: 429,
// TODO(mml): Add a Retry-After header. Once there are time-based
// budgets, we can sometimes compute a sensible suggested value. But
// even without that, we can give a suggestion (10 minutes?) that
// prevents well-behaved clients from hammering us.
}, nil
}
if rtStatus != nil {
return rtStatus, nil
}
// At this point there was either no PDB or we succeded in decrementing