Ratelimit replica creation

This commit is contained in:
Prashanth Balasubramanian 2015-05-06 14:39:14 -07:00
parent 12de230bb6
commit 4fdd5bc3f3
7 changed files with 142 additions and 26 deletions

View File

@ -214,7 +214,7 @@ func startComponents(firstManifestURL, secondManifestURL, apiVersion string) (st
// ensure the service endpoints are sync'd several times within the window that the integration tests wait // ensure the service endpoints are sync'd several times within the window that the integration tests wait
go endpoints.Run(3, util.NeverStop) go endpoints.Run(3, util.NeverStop)
controllerManager := replicationControllerPkg.NewReplicationManager(cl) controllerManager := replicationControllerPkg.NewReplicationManager(cl, replicationControllerPkg.BurstReplicas)
// TODO: Write an integration test for the replication controllers watch. // TODO: Write an integration test for the replication controllers watch.
go controllerManager.Run(3, util.NeverStop) go controllerManager.Run(3, util.NeverStop)

View File

@ -213,7 +213,7 @@ func (s *CMServer) Run(_ []string) error {
endpoints := service.NewEndpointController(kubeClient) endpoints := service.NewEndpointController(kubeClient)
go endpoints.Run(s.ConcurrentEndpointSyncs, util.NeverStop) go endpoints.Run(s.ConcurrentEndpointSyncs, util.NeverStop)
controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient) controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient, replicationControllerPkg.BurstReplicas)
go controllerManager.Run(s.ConcurrentRCSyncs, util.NeverStop) go controllerManager.Run(s.ConcurrentRCSyncs, util.NeverStop)
cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile) cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)

View File

@ -143,7 +143,7 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU,
endpoints := service.NewEndpointController(cl) endpoints := service.NewEndpointController(cl)
go endpoints.Run(5, util.NeverStop) go endpoints.Run(5, util.NeverStop)
controllerManager := controller.NewReplicationManager(cl) controllerManager := controller.NewReplicationManager(cl, controller.BurstReplicas)
go controllerManager.Run(5, util.NeverStop) go controllerManager.Run(5, util.NeverStop)
} }

View File

@ -524,7 +524,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown // NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
// (regardless of its current value) in the master, without contacting kubelet. // (regardless of its current value) in the master, without contacting kubelet.
if readyCondition == nil { if readyCondition == nil {
glog.V(2).Infof("node %v is never updated by kubelet") glog.V(2).Infof("node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{ node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
Type: api.NodeReady, Type: api.NodeReady,
Status: api.ConditionUnknown, Status: api.ConditionUnknown,

View File

@ -84,7 +84,7 @@ func (r *RCExpectations) SatisfiedExpectations(rc *api.ReplicationController) bo
if podExp.Fulfilled() { if podExp.Fulfilled() {
return true return true
} else { } else {
glog.V(4).Infof("Controller %v still waiting on expectations %#v", podExp) glog.V(4).Infof("Controller still waiting on expectations %#v", podExp)
return false return false
} }
} else if err != nil { } else if err != nil {

View File

@ -58,12 +58,15 @@ const (
// of expectations, without it the RC could stay asleep forever. This should // of expectations, without it the RC could stay asleep forever. This should
// be set based on the expected latency of watch events. // be set based on the expected latency of watch events.
// //
// TODO: Set this per expectation, based on its size.
// Currently an rc can service (create *and* observe the watch events for said // Currently an rc can service (create *and* observe the watch events for said
// creation) about 10-20 pods a second, so it takes about 3.5 min to service // creation) about 10-20 pods a second, so it takes about 1 min to service
// 3000 pods. Just creation is limited to 30qps, and watching happens with // 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
// ~10-30s latency/pod at scale. // latency/pod at the scale of 3000 pods over 100 nodes.
ExpectationsTimeout = 6 * time.Minute ExpectationsTimeout = 3 * time.Minute
// Realistic value of the burstReplica field for the replication manager based off
// performance requirements for kubernetes 1.0.
BurstReplicas = 500
) )
// ReplicationManager is responsible for synchronizing ReplicationController objects stored // ReplicationManager is responsible for synchronizing ReplicationController objects stored
@ -72,6 +75,9 @@ type ReplicationManager struct {
kubeClient client.Interface kubeClient client.Interface
podControl PodControlInterface podControl PodControlInterface
// An rc is temporarily suspended after creating/deleting these many replicas.
// It resumes normal action after observing the watch events for them.
burstReplicas int
// To allow injection of syncReplicationController for testing. // To allow injection of syncReplicationController for testing.
syncHandler func(rcKey string) error syncHandler func(rcKey string) error
// A TTLCache of pod creates/deletes each rc expects to see // A TTLCache of pod creates/deletes each rc expects to see
@ -89,7 +95,7 @@ type ReplicationManager struct {
} }
// NewReplicationManager creates a new ReplicationManager. // NewReplicationManager creates a new ReplicationManager.
func NewReplicationManager(kubeClient client.Interface) *ReplicationManager { func NewReplicationManager(kubeClient client.Interface, burstReplicas int) *ReplicationManager {
eventBroadcaster := record.NewBroadcaster() eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(kubeClient.Events("")) eventBroadcaster.StartRecordingToSink(kubeClient.Events(""))
@ -99,6 +105,7 @@ func NewReplicationManager(kubeClient client.Interface) *ReplicationManager {
kubeClient: kubeClient, kubeClient: kubeClient,
recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "replication-controller"}), recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "replication-controller"}),
}, },
burstReplicas: burstReplicas,
expectations: NewRCExpectations(), expectations: NewRCExpectations(),
queue: workqueue.New(), queue: workqueue.New(),
} }
@ -277,15 +284,19 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
diff := len(filteredPods) - controller.Spec.Replicas diff := len(filteredPods) - controller.Spec.Replicas
if diff < 0 { if diff < 0 {
diff *= -1 diff *= -1
if diff > rm.burstReplicas {
diff = rm.burstReplicas
}
rm.expectations.ExpectCreations(controller, diff) rm.expectations.ExpectCreations(controller, diff)
wait := sync.WaitGroup{} wait := sync.WaitGroup{}
wait.Add(diff) wait.Add(diff)
glog.V(2).Infof("Too few %q replicas, creating %d", controller.Name, diff) glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
for i := 0; i < diff; i++ { for i := 0; i < diff; i++ {
go func() { go func() {
defer wait.Done() defer wait.Done()
if err := rm.podControl.createReplica(controller.Namespace, controller); err != nil { if err := rm.podControl.createReplica(controller.Namespace, controller); err != nil {
// Decrement the expected number of creates because the informer won't observe this pod // Decrement the expected number of creates because the informer won't observe this pod
glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
rm.expectations.CreationObserved(controller) rm.expectations.CreationObserved(controller)
util.HandleError(err) util.HandleError(err)
} }
@ -293,8 +304,11 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
} }
wait.Wait() wait.Wait()
} else if diff > 0 { } else if diff > 0 {
if diff > rm.burstReplicas {
diff = rm.burstReplicas
}
rm.expectations.ExpectDeletions(controller, diff) rm.expectations.ExpectDeletions(controller, diff)
glog.V(2).Infof("Too many %q replicas, deleting %d", controller.Name, diff) glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
// Sort the pods in the order such that not-ready < ready, unscheduled // Sort the pods in the order such that not-ready < ready, unscheduled
// < scheduled, and pending < running. This ensures that we delete pods // < scheduled, and pending < running. This ensures that we delete pods
// in the earlier stages whenever possible. // in the earlier stages whenever possible.
@ -307,6 +321,7 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
defer wait.Done() defer wait.Done()
if err := rm.podControl.deletePod(controller.Namespace, filteredPods[ix].Name); err != nil { if err := rm.podControl.deletePod(controller.Namespace, filteredPods[ix].Name); err != nil {
// Decrement the expected number of deletes because the informer won't observe this deletion // Decrement the expected number of deletes because the informer won't observe this deletion
glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
rm.expectations.DeletionObserved(controller) rm.expectations.DeletionObserved(controller)
} }
}(i) }(i)

View File

@ -225,7 +225,7 @@ func startManagerAndWait(manager *ReplicationManager, pods int, t *testing.T) ch
func TestSyncReplicationControllerDoesNothing(t *testing.T) { func TestSyncReplicationControllerDoesNothing(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}) client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
fakePodControl := FakePodControl{} fakePodControl := FakePodControl{}
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
// 2 running pods, a controller with 2 replicas, sync is a no-op // 2 running pods, a controller with 2 replicas, sync is a no-op
controllerSpec := newReplicationController(2) controllerSpec := newReplicationController(2)
@ -240,7 +240,7 @@ func TestSyncReplicationControllerDoesNothing(t *testing.T) {
func TestSyncReplicationControllerDeletes(t *testing.T) { func TestSyncReplicationControllerDeletes(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}) client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
fakePodControl := FakePodControl{} fakePodControl := FakePodControl{}
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
manager.podControl = &fakePodControl manager.podControl = &fakePodControl
// 2 running pods and a controller with 1 replica, one pod delete expected // 2 running pods and a controller with 1 replica, one pod delete expected
@ -254,7 +254,7 @@ func TestSyncReplicationControllerDeletes(t *testing.T) {
func TestSyncReplicationControllerCreates(t *testing.T) { func TestSyncReplicationControllerCreates(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}) client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
// A controller with 2 replicas and no pods in the store, 2 creates expected // A controller with 2 replicas and no pods in the store, 2 creates expected
controller := newReplicationController(2) controller := newReplicationController(2)
@ -319,7 +319,7 @@ func TestControllerNoReplicaUpdate(t *testing.T) {
testServer := httptest.NewServer(&fakeHandler) testServer := httptest.NewServer(&fakeHandler)
defer testServer.Close() defer testServer.Close()
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()}) client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
// Steady state for the replication controller, no Status.Replicas updates expected // Steady state for the replication controller, no Status.Replicas updates expected
activePods := 5 activePods := 5
@ -348,7 +348,7 @@ func TestControllerUpdateReplicas(t *testing.T) {
defer testServer.Close() defer testServer.Close()
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()}) client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
// Insufficient number of pods in the system, and Status.Replicas is wrong; // Insufficient number of pods in the system, and Status.Replicas is wrong;
// Status.Replica should update to match number of pods in system, 1 new pod should be created. // Status.Replica should update to match number of pods in system, 1 new pod should be created.
@ -533,7 +533,7 @@ func TestSyncReplicationControllerDormancy(t *testing.T) {
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()}) client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
fakePodControl := FakePodControl{} fakePodControl := FakePodControl{}
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
manager.podControl = &fakePodControl manager.podControl = &fakePodControl
controllerSpec := newReplicationController(2) controllerSpec := newReplicationController(2)
@ -572,7 +572,7 @@ func TestSyncReplicationControllerDormancy(t *testing.T) {
} }
func TestPodControllerLookup(t *testing.T) { func TestPodControllerLookup(t *testing.T) {
manager := NewReplicationManager(client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})) manager := NewReplicationManager(client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}), BurstReplicas)
testCases := []struct { testCases := []struct {
inRCs []*api.ReplicationController inRCs []*api.ReplicationController
pod *api.Pod pod *api.Pod
@ -638,7 +638,7 @@ type FakeWatcher struct {
func TestWatchControllers(t *testing.T) { func TestWatchControllers(t *testing.T) {
fakeWatch := watch.NewFake() fakeWatch := watch.NewFake()
client := &testclient.Fake{Watch: fakeWatch} client := &testclient.Fake{Watch: fakeWatch}
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
var testControllerSpec api.ReplicationController var testControllerSpec api.ReplicationController
received := make(chan string) received := make(chan string)
@ -679,7 +679,7 @@ func TestWatchControllers(t *testing.T) {
func TestWatchPods(t *testing.T) { func TestWatchPods(t *testing.T) {
fakeWatch := watch.NewFake() fakeWatch := watch.NewFake()
client := &testclient.Fake{Watch: fakeWatch} client := &testclient.Fake{Watch: fakeWatch}
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
// Put one rc and one pod into the controller's stores // Put one rc and one pod into the controller's stores
testControllerSpec := newReplicationController(1) testControllerSpec := newReplicationController(1)
@ -722,7 +722,7 @@ func TestWatchPods(t *testing.T) {
func TestUpdatePods(t *testing.T) { func TestUpdatePods(t *testing.T) {
fakeWatch := watch.NewFake() fakeWatch := watch.NewFake()
client := &testclient.Fake{Watch: fakeWatch} client := &testclient.Fake{Watch: fakeWatch}
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
received := make(chan string) received := make(chan string)
@ -780,7 +780,7 @@ func TestControllerUpdateRequeue(t *testing.T) {
defer testServer.Close() defer testServer.Close()
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()}) client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
manager := NewReplicationManager(client) manager := NewReplicationManager(client, BurstReplicas)
rc := newReplicationController(1) rc := newReplicationController(1)
manager.controllerStore.Store.Add(rc) manager.controllerStore.Store.Add(rc)
@ -852,3 +852,104 @@ func TestControllerUpdateStatusWithFailure(t *testing.T) {
t.Errorf("Expected 1 get and 2 updates, got %d gets %d updates", gets, updates) t.Errorf("Expected 1 get and 2 updates, got %d gets %d updates", gets, updates)
} }
} }
func doTestControllerBurstReplicas(t *testing.T, burstReplicas, numReplicas int) {
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
fakePodControl := FakePodControl{}
manager := NewReplicationManager(client, burstReplicas)
manager.podControl = &fakePodControl
controllerSpec := newReplicationController(numReplicas)
manager.controllerStore.Store.Add(controllerSpec)
expectedPods := 0
pods := newPodList(nil, numReplicas, api.PodPending, controllerSpec)
// Size up the controller, then size it down, and confirm the expected create/delete pattern
for _, replicas := range []int{numReplicas, 0} {
controllerSpec.Spec.Replicas = replicas
manager.controllerStore.Store.Add(controllerSpec)
for i := 0; i < numReplicas; i += burstReplicas {
manager.syncReplicationController(getKey(controllerSpec, t))
// The store accrues active pods. It's also used by the rc to determine how many
// replicas to create.
activePods := len(manager.podStore.Store.List())
if replicas != 0 {
// This is the number of pods currently "in flight". They were created by the rc manager above,
// which then puts the rc to sleep till all of them have been observed.
expectedPods = replicas - activePods
if expectedPods > burstReplicas {
expectedPods = burstReplicas
}
// This validates the rc manager sync actually created pods
validateSyncReplication(t, &fakePodControl, expectedPods, 0)
// This simulates the watch events for all but 1 of the expected pods.
// None of these should wake the controller because it has expectations==BurstReplicas.
for _, pod := range pods.Items[:expectedPods-1] {
manager.podStore.Store.Add(&pod)
manager.addPod(&pod)
}
podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
if !exists || err != nil {
t.Fatalf("Did not find expectations for rc.")
}
if add, _ := podExp.getExpectations(); add != 1 {
t.Fatalf("Expectations are wrong %v", podExp)
}
} else {
expectedPods = (replicas - activePods) * -1
if expectedPods > burstReplicas {
expectedPods = burstReplicas
}
validateSyncReplication(t, &fakePodControl, 0, expectedPods)
for _, pod := range pods.Items[:expectedPods-1] {
manager.podStore.Store.Delete(&pod)
manager.deletePod(&pod)
}
podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
if !exists || err != nil {
t.Fatalf("Did not find expectations for rc.")
}
if _, del := podExp.getExpectations(); del != 1 {
t.Fatalf("Expectations are wrong %v", podExp)
}
}
// Check that the rc didn't take any action for all the above pods
fakePodControl.clear()
manager.syncReplicationController(getKey(controllerSpec, t))
validateSyncReplication(t, &fakePodControl, 0, 0)
// Create/Delete the last pod
// The last add pod will decrease the expectation of the rc to 0,
// which will cause it to create/delete the remaining replicas upto burstReplicas.
if replicas != 0 {
manager.podStore.Store.Add(&pods.Items[expectedPods-1])
manager.addPod(&pods.Items[expectedPods-1])
} else {
manager.podStore.Store.Delete(&pods.Items[expectedPods-1])
manager.deletePod(&pods.Items[expectedPods-1])
}
pods.Items = pods.Items[expectedPods:]
}
// Confirm that we've created the right number of replicas
activePods := len(manager.podStore.Store.List())
if activePods != controllerSpec.Spec.Replicas {
t.Fatalf("Unexpected number of active pods, expected %d, got %d", controllerSpec.Spec.Replicas, activePods)
}
// Replenish the pod list, since we cut it down sizing up
pods = newPodList(nil, replicas, api.PodRunning, controllerSpec)
}
}
func TestControllerBurstReplicas(t *testing.T) {
doTestControllerBurstReplicas(t, 5, 30)
doTestControllerBurstReplicas(t, 5, 12)
doTestControllerBurstReplicas(t, 3, 2)
}