mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-05 15:37:24 +00:00
Backoff correctly when adopting replica sets/pods
This commit is contained in:
@@ -33,8 +33,6 @@ import (
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
utilerrors "k8s.io/kubernetes/pkg/util/errors"
|
||||
labelsutil "k8s.io/kubernetes/pkg/util/labels"
|
||||
podutil "k8s.io/kubernetes/pkg/util/pod"
|
||||
rsutil "k8s.io/kubernetes/pkg/util/replicaset"
|
||||
)
|
||||
|
||||
// syncStatusOnly only updates Deployments Status and doesn't take any mutating actions.
|
||||
@@ -162,22 +160,14 @@ func (dc *DeploymentController) rsAndPodsWithHashKeySynced(deployment *extension
|
||||
// 1. Add hash label to the rs's pod template, and make sure the controller sees this update so that no orphaned pods will be created
|
||||
// 2. Add hash label to all pods this rs owns, wait until replicaset controller reports rs.Status.FullyLabeledReplicas equal to the desired number of replicas
|
||||
// 3. Add hash label to the rs's label and selector
|
||||
func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet) (updatedRS *extensions.ReplicaSet, err error) {
|
||||
objCopy, err := api.Scheme.Copy(rs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
updatedRS = objCopy.(*extensions.ReplicaSet)
|
||||
|
||||
func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet) (*extensions.ReplicaSet, error) {
|
||||
// If the rs already has the new hash label in its selector, it's done syncing
|
||||
if labelsutil.SelectorHasLabel(rs.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey) {
|
||||
return
|
||||
return rs, nil
|
||||
}
|
||||
namespace := rs.Namespace
|
||||
hash := rsutil.GetPodTemplateSpecHash(rs)
|
||||
rsUpdated := false
|
||||
hash := deploymentutil.GetReplicaSetHash(rs)
|
||||
// 1. Add hash template label to the rs. This ensures that any newly created pods will have the new label.
|
||||
updatedRS, rsUpdated, err = rsutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(namespace), updatedRS,
|
||||
updatedRS, err := deploymentutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(rs.Namespace), dc.rsLister, rs.Namespace, rs.Name,
|
||||
func(updated *extensions.ReplicaSet) error {
|
||||
// Precondition: the RS doesn't contain the new hash in its pod template label.
|
||||
if updated.Spec.Template.Labels[extensions.DefaultDeploymentUniqueLabelKey] == hash {
|
||||
@@ -187,20 +177,15 @@ func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error updating %s %s/%s pod template label with template hash: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, err)
|
||||
}
|
||||
if !rsUpdated {
|
||||
// If RS wasn't updated but didn't return error in step 1, we've hit a RS not found error.
|
||||
// Return here and retry in the next sync loop.
|
||||
return rs, nil
|
||||
return nil, fmt.Errorf("error updating replica set %s/%s pod template label with template hash: %v", rs.Namespace, rs.Name, err)
|
||||
}
|
||||
// Make sure rs pod template is updated so that it won't create pods without the new label (orphaned pods).
|
||||
if updatedRS.Generation > updatedRS.Status.ObservedGeneration {
|
||||
if err = deploymentutil.WaitForReplicaSetUpdated(dc.client, updatedRS.Generation, namespace, updatedRS.Name); err != nil {
|
||||
return nil, fmt.Errorf("error waiting for %s %s/%s generation %d observed by controller: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, updatedRS.Generation, err)
|
||||
if err = deploymentutil.WaitForReplicaSetUpdated(dc.client, updatedRS.Generation, updatedRS.Namespace, updatedRS.Name); err != nil {
|
||||
return nil, fmt.Errorf("error waiting for replica set %s/%s to be observed by controller: %v", updatedRS.Namespace, updatedRS.Name, err)
|
||||
}
|
||||
glog.V(4).Infof("Observed the update of replica set %s/%s's pod template with hash %s.", rs.Namespace, rs.Name, hash)
|
||||
}
|
||||
glog.V(4).Infof("Observed the update of %s %s/%s's pod template with hash %s.", rs.Kind, rs.Namespace, rs.Name, hash)
|
||||
|
||||
// 2. Update all pods managed by the rs to have the new hash label, so they will be correctly adopted.
|
||||
selector, err := metav1.LabelSelectorAsSelector(updatedRS.Spec.Selector)
|
||||
@@ -212,54 +197,45 @@ func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pods, err := dc.podLister.Pods(namespace).List(parsed)
|
||||
pods, err := dc.podLister.Pods(updatedRS.Namespace).List(parsed)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error in getting pod list for namespace %s and list options %+v: %s", namespace, options, err)
|
||||
return nil, fmt.Errorf("error in getting pod list for namespace %s and list options %+v: %s", rs.Namespace, options, err)
|
||||
}
|
||||
podList := v1.PodList{Items: make([]v1.Pod, 0, len(pods))}
|
||||
for i := range pods {
|
||||
podList.Items = append(podList.Items, *pods[i])
|
||||
}
|
||||
allPodsLabeled := false
|
||||
if allPodsLabeled, err = deploymentutil.LabelPodsWithHash(&podList, updatedRS, dc.client, namespace, hash); err != nil {
|
||||
if err := deploymentutil.LabelPodsWithHash(&podList, dc.client, dc.podLister, rs.Namespace, rs.Name, hash); err != nil {
|
||||
return nil, fmt.Errorf("error in adding template hash label %s to pods %+v: %s", hash, podList, err)
|
||||
}
|
||||
// If not all pods are labeled but didn't return error in step 2, we've hit at least one pod not found error.
|
||||
// Return here and retry in the next sync loop.
|
||||
if !allPodsLabeled {
|
||||
return updatedRS, nil
|
||||
}
|
||||
|
||||
// We need to wait for the replicaset controller to observe the pods being
|
||||
// labeled with pod template hash. Because previously we've called
|
||||
// WaitForReplicaSetUpdated, the replicaset controller should have dropped
|
||||
// FullyLabeledReplicas to 0 already, we only need to wait it to increase
|
||||
// back to the number of replicas in the spec.
|
||||
if err = deploymentutil.WaitForPodsHashPopulated(dc.client, updatedRS.Generation, namespace, updatedRS.Name); err != nil {
|
||||
return nil, fmt.Errorf("%s %s/%s: error waiting for replicaset controller to observe pods being labeled with template hash: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, err)
|
||||
if err := deploymentutil.WaitForPodsHashPopulated(dc.client, updatedRS.Generation, updatedRS.Namespace, updatedRS.Name); err != nil {
|
||||
return nil, fmt.Errorf("Replica set %s/%s: error waiting for replicaset controller to observe pods being labeled with template hash: %v", updatedRS.Namespace, updatedRS.Name, err)
|
||||
}
|
||||
|
||||
// 3. Update rs label and selector to include the new hash label
|
||||
// Copy the old selector, so that we can scrub out any orphaned pods
|
||||
if updatedRS, rsUpdated, err = rsutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(namespace), updatedRS,
|
||||
func(updated *extensions.ReplicaSet) error {
|
||||
// Precondition: the RS doesn't contain the new hash in its label or selector.
|
||||
if updated.Labels[extensions.DefaultDeploymentUniqueLabelKey] == hash && updated.Spec.Selector.MatchLabels[extensions.DefaultDeploymentUniqueLabelKey] == hash {
|
||||
return utilerrors.ErrPreconditionViolated
|
||||
}
|
||||
updated.Labels = labelsutil.AddLabel(updated.Labels, extensions.DefaultDeploymentUniqueLabelKey, hash)
|
||||
updated.Spec.Selector = labelsutil.AddLabelToSelector(updated.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey, hash)
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("error updating %s %s/%s label and selector with template hash: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, err)
|
||||
updatedRS, err = deploymentutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(rs.Namespace), dc.rsLister, rs.Namespace, rs.Name, func(updated *extensions.ReplicaSet) error {
|
||||
// Precondition: the RS doesn't contain the new hash in its label and selector.
|
||||
if updated.Labels[extensions.DefaultDeploymentUniqueLabelKey] == hash && updated.Spec.Selector.MatchLabels[extensions.DefaultDeploymentUniqueLabelKey] == hash {
|
||||
return utilerrors.ErrPreconditionViolated
|
||||
}
|
||||
updated.Labels = labelsutil.AddLabel(updated.Labels, extensions.DefaultDeploymentUniqueLabelKey, hash)
|
||||
updated.Spec.Selector = labelsutil.AddLabelToSelector(updated.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey, hash)
|
||||
return nil
|
||||
})
|
||||
// If the RS isn't actually updated, that's okay, we'll retry in the
|
||||
// next sync loop since its selector isn't updated yet.
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error updating ReplicaSet %s/%s label and selector with template hash: %v", updatedRS.Namespace, updatedRS.Name, err)
|
||||
}
|
||||
if rsUpdated {
|
||||
glog.V(4).Infof("Updated %s %s/%s's selector and label with hash %s.", rs.Kind, rs.Namespace, rs.Name, hash)
|
||||
}
|
||||
// If the RS isn't actually updated in step 3, that's okay, we'll retry in the next sync loop since its selector isn't updated yet.
|
||||
|
||||
// TODO: look for orphaned pods and label them in the background somewhere else periodically
|
||||
|
||||
return updatedRS, nil
|
||||
}
|
||||
|
||||
@@ -340,7 +316,7 @@ func (dc *DeploymentController) getNewReplicaSet(deployment *extensions.Deployme
|
||||
|
||||
// new ReplicaSet does not exist, create one.
|
||||
namespace := deployment.Namespace
|
||||
podTemplateSpecHash := podutil.GetPodTemplateSpecHash(deployment.Spec.Template)
|
||||
podTemplateSpecHash := deploymentutil.GetPodTemplateSpecHash(deployment.Spec.Template)
|
||||
newRSTemplate := deploymentutil.GetNewReplicaSetTemplate(deployment)
|
||||
// Add podTemplateHash label to selector.
|
||||
newRSSelector := labelsutil.CloneSelectorAndAddLabel(deployment.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey, podTemplateSpecHash)
|
||||
|
||||
Reference in New Issue
Block a user