Backoff correctly when adopting replica sets/pods

This commit is contained in:
Michail Kargakis
2016-11-07 14:23:15 +01:00
parent fc361206e7
commit b3765c4df9
11 changed files with 208 additions and 366 deletions

View File

@@ -33,8 +33,6 @@ import (
"k8s.io/kubernetes/pkg/labels"
utilerrors "k8s.io/kubernetes/pkg/util/errors"
labelsutil "k8s.io/kubernetes/pkg/util/labels"
podutil "k8s.io/kubernetes/pkg/util/pod"
rsutil "k8s.io/kubernetes/pkg/util/replicaset"
)
// syncStatusOnly only updates Deployments Status and doesn't take any mutating actions.
@@ -162,22 +160,14 @@ func (dc *DeploymentController) rsAndPodsWithHashKeySynced(deployment *extension
// 1. Add hash label to the rs's pod template, and make sure the controller sees this update so that no orphaned pods will be created
// 2. Add hash label to all pods this rs owns, wait until replicaset controller reports rs.Status.FullyLabeledReplicas equal to the desired number of replicas
// 3. Add hash label to the rs's label and selector
func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet) (updatedRS *extensions.ReplicaSet, err error) {
objCopy, err := api.Scheme.Copy(rs)
if err != nil {
return nil, err
}
updatedRS = objCopy.(*extensions.ReplicaSet)
func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet) (*extensions.ReplicaSet, error) {
// If the rs already has the new hash label in its selector, it's done syncing
if labelsutil.SelectorHasLabel(rs.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey) {
return
return rs, nil
}
namespace := rs.Namespace
hash := rsutil.GetPodTemplateSpecHash(rs)
rsUpdated := false
hash := deploymentutil.GetReplicaSetHash(rs)
// 1. Add hash template label to the rs. This ensures that any newly created pods will have the new label.
updatedRS, rsUpdated, err = rsutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(namespace), updatedRS,
updatedRS, err := deploymentutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(rs.Namespace), dc.rsLister, rs.Namespace, rs.Name,
func(updated *extensions.ReplicaSet) error {
// Precondition: the RS doesn't contain the new hash in its pod template label.
if updated.Spec.Template.Labels[extensions.DefaultDeploymentUniqueLabelKey] == hash {
@@ -187,20 +177,15 @@ func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet)
return nil
})
if err != nil {
return nil, fmt.Errorf("error updating %s %s/%s pod template label with template hash: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, err)
}
if !rsUpdated {
// If RS wasn't updated but didn't return error in step 1, we've hit a RS not found error.
// Return here and retry in the next sync loop.
return rs, nil
return nil, fmt.Errorf("error updating replica set %s/%s pod template label with template hash: %v", rs.Namespace, rs.Name, err)
}
// Make sure rs pod template is updated so that it won't create pods without the new label (orphaned pods).
if updatedRS.Generation > updatedRS.Status.ObservedGeneration {
if err = deploymentutil.WaitForReplicaSetUpdated(dc.client, updatedRS.Generation, namespace, updatedRS.Name); err != nil {
return nil, fmt.Errorf("error waiting for %s %s/%s generation %d observed by controller: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, updatedRS.Generation, err)
if err = deploymentutil.WaitForReplicaSetUpdated(dc.client, updatedRS.Generation, updatedRS.Namespace, updatedRS.Name); err != nil {
return nil, fmt.Errorf("error waiting for replica set %s/%s to be observed by controller: %v", updatedRS.Namespace, updatedRS.Name, err)
}
glog.V(4).Infof("Observed the update of replica set %s/%s's pod template with hash %s.", rs.Namespace, rs.Name, hash)
}
glog.V(4).Infof("Observed the update of %s %s/%s's pod template with hash %s.", rs.Kind, rs.Namespace, rs.Name, hash)
// 2. Update all pods managed by the rs to have the new hash label, so they will be correctly adopted.
selector, err := metav1.LabelSelectorAsSelector(updatedRS.Spec.Selector)
@@ -212,54 +197,45 @@ func (dc *DeploymentController) addHashKeyToRSAndPods(rs *extensions.ReplicaSet)
if err != nil {
return nil, err
}
pods, err := dc.podLister.Pods(namespace).List(parsed)
pods, err := dc.podLister.Pods(updatedRS.Namespace).List(parsed)
if err != nil {
return nil, fmt.Errorf("error in getting pod list for namespace %s and list options %+v: %s", namespace, options, err)
return nil, fmt.Errorf("error in getting pod list for namespace %s and list options %+v: %s", rs.Namespace, options, err)
}
podList := v1.PodList{Items: make([]v1.Pod, 0, len(pods))}
for i := range pods {
podList.Items = append(podList.Items, *pods[i])
}
allPodsLabeled := false
if allPodsLabeled, err = deploymentutil.LabelPodsWithHash(&podList, updatedRS, dc.client, namespace, hash); err != nil {
if err := deploymentutil.LabelPodsWithHash(&podList, dc.client, dc.podLister, rs.Namespace, rs.Name, hash); err != nil {
return nil, fmt.Errorf("error in adding template hash label %s to pods %+v: %s", hash, podList, err)
}
// If not all pods are labeled but didn't return error in step 2, we've hit at least one pod not found error.
// Return here and retry in the next sync loop.
if !allPodsLabeled {
return updatedRS, nil
}
// We need to wait for the replicaset controller to observe the pods being
// labeled with pod template hash. Because previously we've called
// WaitForReplicaSetUpdated, the replicaset controller should have dropped
// FullyLabeledReplicas to 0 already, we only need to wait it to increase
// back to the number of replicas in the spec.
if err = deploymentutil.WaitForPodsHashPopulated(dc.client, updatedRS.Generation, namespace, updatedRS.Name); err != nil {
return nil, fmt.Errorf("%s %s/%s: error waiting for replicaset controller to observe pods being labeled with template hash: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, err)
if err := deploymentutil.WaitForPodsHashPopulated(dc.client, updatedRS.Generation, updatedRS.Namespace, updatedRS.Name); err != nil {
return nil, fmt.Errorf("Replica set %s/%s: error waiting for replicaset controller to observe pods being labeled with template hash: %v", updatedRS.Namespace, updatedRS.Name, err)
}
// 3. Update rs label and selector to include the new hash label
// Copy the old selector, so that we can scrub out any orphaned pods
if updatedRS, rsUpdated, err = rsutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(namespace), updatedRS,
func(updated *extensions.ReplicaSet) error {
// Precondition: the RS doesn't contain the new hash in its label or selector.
if updated.Labels[extensions.DefaultDeploymentUniqueLabelKey] == hash && updated.Spec.Selector.MatchLabels[extensions.DefaultDeploymentUniqueLabelKey] == hash {
return utilerrors.ErrPreconditionViolated
}
updated.Labels = labelsutil.AddLabel(updated.Labels, extensions.DefaultDeploymentUniqueLabelKey, hash)
updated.Spec.Selector = labelsutil.AddLabelToSelector(updated.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey, hash)
return nil
}); err != nil {
return nil, fmt.Errorf("error updating %s %s/%s label and selector with template hash: %v", updatedRS.Kind, updatedRS.Namespace, updatedRS.Name, err)
updatedRS, err = deploymentutil.UpdateRSWithRetries(dc.client.Extensions().ReplicaSets(rs.Namespace), dc.rsLister, rs.Namespace, rs.Name, func(updated *extensions.ReplicaSet) error {
// Precondition: the RS doesn't contain the new hash in its label and selector.
if updated.Labels[extensions.DefaultDeploymentUniqueLabelKey] == hash && updated.Spec.Selector.MatchLabels[extensions.DefaultDeploymentUniqueLabelKey] == hash {
return utilerrors.ErrPreconditionViolated
}
updated.Labels = labelsutil.AddLabel(updated.Labels, extensions.DefaultDeploymentUniqueLabelKey, hash)
updated.Spec.Selector = labelsutil.AddLabelToSelector(updated.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey, hash)
return nil
})
// If the RS isn't actually updated, that's okay, we'll retry in the
// next sync loop since its selector isn't updated yet.
if err != nil {
return nil, fmt.Errorf("error updating ReplicaSet %s/%s label and selector with template hash: %v", updatedRS.Namespace, updatedRS.Name, err)
}
if rsUpdated {
glog.V(4).Infof("Updated %s %s/%s's selector and label with hash %s.", rs.Kind, rs.Namespace, rs.Name, hash)
}
// If the RS isn't actually updated in step 3, that's okay, we'll retry in the next sync loop since its selector isn't updated yet.
// TODO: look for orphaned pods and label them in the background somewhere else periodically
return updatedRS, nil
}
@@ -340,7 +316,7 @@ func (dc *DeploymentController) getNewReplicaSet(deployment *extensions.Deployme
// new ReplicaSet does not exist, create one.
namespace := deployment.Namespace
podTemplateSpecHash := podutil.GetPodTemplateSpecHash(deployment.Spec.Template)
podTemplateSpecHash := deploymentutil.GetPodTemplateSpecHash(deployment.Spec.Template)
newRSTemplate := deploymentutil.GetNewReplicaSetTemplate(deployment)
// Add podTemplateHash label to selector.
newRSSelector := labelsutil.CloneSelectorAndAddLabel(deployment.Spec.Selector, extensions.DefaultDeploymentUniqueLabelKey, podTemplateSpecHash)