From da1ff1c38ebc0830f0f93d42e9ac9c987d6213b2 Mon Sep 17 00:00:00 2001 From: Michail Kargakis Date: Sun, 25 Jun 2017 18:25:27 +0200 Subject: [PATCH] controller: cleanup complete deployments only Signed-off-by: Michail Kargakis --- .../deployment/deployment_controller.go | 19 ------------------- pkg/controller/deployment/recreate.go | 6 ++++++ pkg/controller/deployment/rolling.go | 6 ++++++ pkg/controller/deployment/sync.go | 15 +++++++++++---- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pkg/controller/deployment/deployment_controller.go b/pkg/controller/deployment/deployment_controller.go index 623f611fea5..0156afd7326 100644 --- a/pkg/controller/deployment/deployment_controller.go +++ b/pkg/controller/deployment/deployment_controller.go @@ -614,25 +614,6 @@ func (dc *DeploymentController) syncDeployment(key string) error { return dc.syncStatusOnly(d, rsList, podMap) } - // Why run the cleanup policy only when there is no rollback request? - // The thing with the cleanup policy currently is that it is far from smart because it takes into account - // the latest replica sets while it should instead retain the latest *working* replica sets. This means that - // you can have a cleanup policy of 1 but your last known working replica set may be 2 or 3 versions back - // in the history. - // Eventually we will want to find a way to recognize replica sets that have worked at some point in time - // (and chances are higher that they will work again as opposed to others that didn't) for candidates to - // automatically roll back to (#23211) and the cleanup policy should help. - if d.Spec.RollbackTo == nil { - _, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, podMap, false) - if err != nil { - return err - } - // So far the cleanup policy was executed once a deployment was paused, scaled up/down, or it - // successfully completed deploying a replica set. Decouple it from the strategies and have it - // run almost unconditionally - cleanupDeployment is safe by default. - dc.cleanupDeployment(oldRSs, d) - } - // Update deployment conditions with an Unknown condition when pausing/resuming // a deployment. In this way, we can be sure that we won't timeout when a user // resumes a Deployment with a set progressDeadlineSeconds. diff --git a/pkg/controller/deployment/recreate.go b/pkg/controller/deployment/recreate.go index b0f438fc6f5..4d2f2337d7a 100644 --- a/pkg/controller/deployment/recreate.go +++ b/pkg/controller/deployment/recreate.go @@ -63,6 +63,12 @@ func (dc *DeploymentController) rolloutRecreate(d *extensions.Deployment, rsList return err } + if util.DeploymentComplete(d, &d.Status) { + if err := dc.cleanupDeployment(oldRSs, d); err != nil { + return err + } + } + // Sync deployment status. return dc.syncRolloutStatus(allRSs, newRS, d) } diff --git a/pkg/controller/deployment/rolling.go b/pkg/controller/deployment/rolling.go index 3c1405a68f4..598928366d7 100644 --- a/pkg/controller/deployment/rolling.go +++ b/pkg/controller/deployment/rolling.go @@ -57,6 +57,12 @@ func (dc *DeploymentController) rolloutRolling(d *extensions.Deployment, rsList return dc.syncRolloutStatus(allRSs, newRS, d) } + if deploymentutil.DeploymentComplete(d, &d.Status) { + if err := dc.cleanupDeployment(oldRSs, d); err != nil { + return err + } + } + // Sync deployment status return dc.syncRolloutStatus(allRSs, newRS, d) } diff --git a/pkg/controller/deployment/sync.go b/pkg/controller/deployment/sync.go index 8b0408bb433..88b7395a0f5 100644 --- a/pkg/controller/deployment/sync.go +++ b/pkg/controller/deployment/sync.go @@ -59,6 +59,13 @@ func (dc *DeploymentController) sync(d *extensions.Deployment, rsList []*extensi return err } + // Clean up the deployment when it's paused and no rollback is in flight. + if d.Spec.Paused && d.Spec.RollbackTo == nil { + if err := dc.cleanupDeployment(oldRSs, d); err != nil { + return err + } + } + allRSs := append(oldRSs, newRS) return dc.syncDeploymentStatus(allRSs, newRS, d) } @@ -552,7 +559,6 @@ func (dc *DeploymentController) cleanupDeployment(oldRSs []*extensions.ReplicaSe sort.Sort(controller.ReplicaSetsByCreationTimestamp(cleanableRSes)) glog.V(4).Infof("Looking to cleanup old replica sets for deployment %q", deployment.Name) - var errList []error for i := int32(0); i < diff; i++ { rs := cleanableRSes[i] // Avoid delete replica set with non-zero replica counts @@ -561,12 +567,13 @@ func (dc *DeploymentController) cleanupDeployment(oldRSs []*extensions.ReplicaSe } glog.V(4).Infof("Trying to cleanup replica set %q for deployment %q", rs.Name, deployment.Name) if err := dc.client.Extensions().ReplicaSets(rs.Namespace).Delete(rs.Name, nil); err != nil && !errors.IsNotFound(err) { - glog.V(2).Infof("Failed deleting old replica set %v for deployment %v: %v", rs.Name, deployment.Name, err) - errList = append(errList, err) + // Return error instead of aggregating and continuing DELETEs on the theory + // that we may be overloading the api server. + return err } } - return utilerrors.NewAggregate(errList) + return nil } // syncDeploymentStatus checks if the status is up-to-date and sync it if necessary