mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
Merge pull request #37668 from ymqytw/add_retry_loop_for_eviction
Automatic merge from submit-queue add retry logic for eviction Make eviction retry when it fails to update the PDB due to conflict (409) error. fixes #37605 cc: @davidopp
This commit is contained in:
commit
c7785fb114
@ -26,6 +26,7 @@ go_library(
|
||||
"//pkg/api/validation:go_default_library",
|
||||
"//pkg/apis/policy:go_default_library",
|
||||
"//pkg/client/clientset_generated/internalclientset/typed/policy/internalversion:go_default_library",
|
||||
"//pkg/client/retry:go_default_library",
|
||||
"//pkg/kubelet/client:go_default_library",
|
||||
"//pkg/labels:go_default_library",
|
||||
"//pkg/registry/cachesize:go_default_library",
|
||||
@ -35,6 +36,7 @@ go_library(
|
||||
"//pkg/registry/generic/registry:go_default_library",
|
||||
"//pkg/runtime:go_default_library",
|
||||
"//pkg/storage:go_default_library",
|
||||
"//pkg/util/wait:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -25,9 +25,11 @@ import (
|
||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||
"k8s.io/kubernetes/pkg/apis/policy"
|
||||
policyclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/policy/internalversion"
|
||||
"k8s.io/kubernetes/pkg/client/retry"
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
"k8s.io/kubernetes/pkg/registry/generic/registry"
|
||||
"k8s.io/kubernetes/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -40,6 +42,15 @@ const (
|
||||
MaxDisruptedPodSize = 2000
|
||||
)
|
||||
|
||||
// EvictionsRetry is the retry for a conflict where multiple clients
|
||||
// are making changes to the same resource.
|
||||
var EvictionsRetry = wait.Backoff{
|
||||
Steps: 20,
|
||||
Duration: 500 * time.Millisecond,
|
||||
Factor: 1.0,
|
||||
Jitter: 0.1,
|
||||
}
|
||||
|
||||
func newEvictionStorage(store *registry.Store, podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter) *EvictionREST {
|
||||
return &EvictionREST{store: store, podDisruptionBudgetClient: podDisruptionBudgetClient}
|
||||
}
|
||||
@ -66,41 +77,53 @@ func (r *EvictionREST) Create(ctx api.Context, obj runtime.Object) (runtime.Obje
|
||||
return nil, err
|
||||
}
|
||||
pod := obj.(*api.Pod)
|
||||
pdbs, err := r.getPodDisruptionBudgets(ctx, pod)
|
||||
var rtStatus *unversioned.Status
|
||||
err = retry.RetryOnConflict(EvictionsRetry, func() error {
|
||||
pdbs, err := r.getPodDisruptionBudgets(ctx, pod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(pdbs) > 1 {
|
||||
rtStatus = &unversioned.Status{
|
||||
Status: unversioned.StatusFailure,
|
||||
Message: "This pod has more than one PodDisruptionBudget, which the eviction subresource does not support.",
|
||||
Code: 500,
|
||||
}
|
||||
return nil
|
||||
} else if len(pdbs) == 1 {
|
||||
pdb := pdbs[0]
|
||||
// Try to verify-and-decrement
|
||||
|
||||
// If it was false already, or if it becomes false during the course of our retries,
|
||||
// raise an error marked as a 429.
|
||||
ok, err := r.checkAndDecrement(pod.Namespace, pod.Name, pdb)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !ok {
|
||||
rtStatus = &unversioned.Status{
|
||||
Status: unversioned.StatusFailure,
|
||||
// TODO(mml): Include some more details about why the eviction is disallowed.
|
||||
// Ideally any such text is generated by the DisruptionController (offline).
|
||||
Message: "Cannot evict pod as it would violate the pod's disruption budget.",
|
||||
Code: 429,
|
||||
// TODO(mml): Add a Retry-After header. Once there are time-based
|
||||
// budgets, we can sometimes compute a sensible suggested value. But
|
||||
// even without that, we can give a suggestion (10 minutes?) that
|
||||
// prevents well-behaved clients from hammering us.
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(pdbs) > 1 {
|
||||
return &unversioned.Status{
|
||||
Status: unversioned.StatusFailure,
|
||||
Message: "This pod has more than one PodDisruptionBudget, which the eviction subresource does not support.",
|
||||
Code: 500,
|
||||
}, nil
|
||||
} else if len(pdbs) == 1 {
|
||||
pdb := pdbs[0]
|
||||
// Try to verify-and-decrement
|
||||
|
||||
// If it was false already, or if it becomes false during the course of our retries,
|
||||
// raise an error marked as a 429.
|
||||
ok, err := r.checkAndDecrement(pod.Namespace, pod.Name, pdb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !ok {
|
||||
return &unversioned.Status{
|
||||
Status: unversioned.StatusFailure,
|
||||
// TODO(mml): Include some more details about why the eviction is disallowed.
|
||||
// Ideally any such text is generated by the DisruptionController (offline).
|
||||
Message: "Cannot evict pod as it would violate the pod's disruption budget.",
|
||||
Code: 429,
|
||||
// TODO(mml): Add a Retry-After header. Once there are time-based
|
||||
// budgets, we can sometimes compute a sensible suggested value. But
|
||||
// even without that, we can give a suggestion (10 minutes?) that
|
||||
// prevents well-behaved clients from hammering us.
|
||||
}, nil
|
||||
}
|
||||
if rtStatus != nil {
|
||||
return rtStatus, nil
|
||||
}
|
||||
|
||||
// At this point there was either no PDB or we succeded in decrementing
|
||||
|
Loading…
Reference in New Issue
Block a user