From a8fdf3d78b84679e646ef7aa1c9b51ff539a9a41 Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Thu, 30 Apr 2015 10:58:18 -0700 Subject: [PATCH] Update expiration timeout based on observed latencies --- pkg/client/cache/expiration_cache.go | 2 ++ pkg/controller/controller_utils.go | 9 +++++-- pkg/controller/replication_controller.go | 33 +++++++++++++++++------- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/pkg/client/cache/expiration_cache.go b/pkg/client/cache/expiration_cache.go index dac736304a6..1d8d40e7d1c 100644 --- a/pkg/client/cache/expiration_cache.go +++ b/pkg/client/cache/expiration_cache.go @@ -18,6 +18,7 @@ package cache import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/golang/glog" "time" ) @@ -81,6 +82,7 @@ func (c *ExpirationCache) getOrExpire(key string) (interface{}, bool) { return nil, false } if c.expirationPolicy.IsExpired(timestampedItem) { + glog.V(4).Infof("Entry %v: %+v has expired", key, timestampedItem.obj) // Since expiration happens lazily on read, don't hold up // the reader trying to acquire a write lock for the delete. // The next reader will retry the delete even if this one diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index a81cda1a1d2..5ea0c44f2ad 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -19,6 +19,8 @@ package controller import ( "encoding/json" "fmt" + "time" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/api/validation" "github.com/GoogleCloudPlatform/kubernetes/pkg/client" @@ -28,7 +30,6 @@ import ( "github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait" "github.com/golang/glog" "sync/atomic" - "time" ) const CreatedByAnnotation = "kubernetes.io/created-by" @@ -106,7 +107,9 @@ func (r *RCExpectations) setExpectations(rc *api.ReplicationController, add, del if err != nil { return err } - return r.Add(&PodExpectations{add: int64(add), del: int64(del), key: rcKey}) + podExp := &PodExpectations{add: int64(add), del: int64(del), key: rcKey} + glog.V(4).Infof("Setting expectations %+v", podExp) + return r.Add(podExp) } func (r *RCExpectations) ExpectCreations(rc *api.ReplicationController, adds int) error { @@ -124,6 +127,8 @@ func (r *RCExpectations) lowerExpectations(rc *api.ReplicationController, add, d glog.V(2).Infof("Controller has both add and del expectations %+v", podExp) } podExp.Seen(int64(add), int64(del)) + // The expectations might've been modified since the update on the previous line. + glog.V(4).Infof("Lowering expectations %+v", podExp) } } diff --git a/pkg/controller/replication_controller.go b/pkg/controller/replication_controller.go index c747cd79ce5..8b90c5de2e6 100644 --- a/pkg/controller/replication_controller.go +++ b/pkg/controller/replication_controller.go @@ -42,20 +42,28 @@ var ( const ( // We'll attempt to recompute the required replicas of all replication controllers - // the have fulfilled their expectations at least this often. + // the have fulfilled their expectations at least this often. This recomputation + // happens based on contents in local pod storage. FullControllerResyncPeriod = 30 * time.Second - // If a watch misdelivers info about a pod, it'll take this long - // to rectify the number of replicas. + // If a watch misdelivers info about a pod, it'll take at least this long + // to rectify the number of replicas. Note that dropped deletes are only + // rectified after the expectation times out because we don't know the + // final resting state of the pod. PodRelistPeriod = 5 * time.Minute - // If a watch drops an (add, delete) event for a pod, it'll take this long - // before a dormant rc waiting for those packets is woken up anyway. This - // should typically be somewhere between the PodRelistPeriod and the - // FullControllerResyncPeriod. It is specifically targeted at the case - // where some problem prevents an update of expectations, without it the - // RC could stay asleep forever. - ExpectationsTimeout = 2 * time.Minute + // If a watch drops a delete event for a pod, it'll take this long + // before a dormant rc waiting for those packets is woken up anyway. It is + // specifically targeted at the case where some problem prevents an update + // of expectations, without it the RC could stay asleep forever. This should + // be set based on the expected latency of watch events. + + // TODO: Set this per expectation, based on its size. + // Currently an rc can service (create *and* observe the watch events for said + // creation) about 10-20 pods a second, so it takes about 3.5 min to service + // 3000 pods. Just creation is limited to 30qps, and watching happens with + // ~10-30s latency/pod at scale. + ExpectationsTimeout = 6 * time.Minute ) // ReplicationManager is responsible for synchronizing ReplicationController objects stored @@ -220,6 +228,11 @@ func (rm *ReplicationManager) deletePod(obj interface{}) { } return } + // When a delete is dropped, the relist will notice a pod in the store not + // in the list, leading to the insertion of a tombstone key. Since we don't + // know which rc to wake up/update expectations, we rely on the ttl on the + // expectation expiring. The rc syncs via the 30s periodic resync and notices + // fewer pods than its replica count. podKey, err := framework.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { glog.Errorf("Couldn't get key for object %+v: %v", obj, err)