mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 07:47:56 +00:00
Fix retry logic in DisruptionController
This changes the retry logic in DisruptionController so that it reconciles update conflicts. In the old behavior, any pdb status update failure was retried with the same status, regardless of error. Now there is no retry logic with the status update. The error is passed up the stack where the PDB can be requeued for processing. If the PDB status update error is a conflict error, there are some new special cases: - failSafe is not triggered, since this is considered a retryable error - the PDB is requeued immediately (ignoring the rate limiter) because we assume that conflict can be resolved by getting the latest version
This commit is contained in:
parent
afa979c295
commit
8835f6bb00
@ -32,7 +32,6 @@ go_library(
|
||||
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes/typed/policy/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/listers/apps/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/listers/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/listers/policy/v1beta1:go_default_library",
|
||||
|
@ -21,7 +21,7 @@ import (
|
||||
"time"
|
||||
|
||||
apps "k8s.io/api/apps/v1beta1"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/api/extensions/v1beta1"
|
||||
policy "k8s.io/api/policy/v1beta1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
@ -39,7 +39,6 @@ import (
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
policyclientset "k8s.io/client-go/kubernetes/typed/policy/v1beta1"
|
||||
appsv1listers "k8s.io/client-go/listers/apps/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
policylisters "k8s.io/client-go/listers/policy/v1beta1"
|
||||
@ -53,8 +52,6 @@ import (
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const statusUpdateRetries = 2
|
||||
|
||||
// DeletionTimeout sets maximum time from the moment a pod is added to DisruptedPods in PDB.Status
|
||||
// to the time when the pod is expected to be seen by PDB controller as having been marked for deletion.
|
||||
// If the pod was not marked for deletion during that time it is assumed that it won't be deleted at
|
||||
@ -532,7 +529,13 @@ func (dc *DisruptionController) sync(key string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := dc.trySync(pdb); err != nil {
|
||||
err = dc.trySync(pdb)
|
||||
// If the reason for failure was a conflict, then allow this PDB update to be
|
||||
// requeued without triggering the failSafe logic.
|
||||
if errors.IsConflict(err) {
|
||||
return err
|
||||
}
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to sync pdb %s/%s: %v", pdb.Namespace, pdb.Name, err)
|
||||
return dc.failSafe(pdb)
|
||||
}
|
||||
@ -773,29 +776,9 @@ func (dc *DisruptionController) updatePdbStatus(pdb *policy.PodDisruptionBudget,
|
||||
return dc.getUpdater()(newPdb)
|
||||
}
|
||||
|
||||
// refresh tries to re-GET the given PDB. If there are any errors, it just
|
||||
// returns the old PDB. Intended to be used in a retry loop where it runs a
|
||||
// bounded number of times.
|
||||
func refresh(pdbClient policyclientset.PodDisruptionBudgetInterface, pdb *policy.PodDisruptionBudget) *policy.PodDisruptionBudget {
|
||||
newPdb, err := pdbClient.Get(pdb.Name, metav1.GetOptions{})
|
||||
if err == nil {
|
||||
return newPdb
|
||||
}
|
||||
return pdb
|
||||
|
||||
}
|
||||
|
||||
func (dc *DisruptionController) writePdbStatus(pdb *policy.PodDisruptionBudget) error {
|
||||
pdbClient := dc.kubeClient.PolicyV1beta1().PodDisruptionBudgets(pdb.Namespace)
|
||||
st := pdb.Status
|
||||
|
||||
var err error
|
||||
for i, pdb := 0, pdb; i < statusUpdateRetries; i, pdb = i+1, refresh(pdbClient, pdb) {
|
||||
pdb.Status = st
|
||||
if _, err = pdbClient.UpdateStatus(pdb); err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If this update fails, don't retry it. Allow the failure to get handled &
|
||||
// retried in `processNextWorkItem()`.
|
||||
_, err := dc.kubeClient.PolicyV1beta1().PodDisruptionBudgets(pdb.Namespace).UpdateStatus(pdb)
|
||||
return err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user