diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index e7894708264..aa063b6a9f1 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -201,7 +201,7 @@ func (s *CMServer) Run(_ []string) error { nodeController := nodeControllerPkg.NewNodeController(cloud, s.MinionRegexp, s.MachineList, nodeResources, kubeClient, kubeletClient, s.RegisterRetryCount, s.PodEvictionTimeout, util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst), - s.NodeMonitorGracePeriod, s.NodeStartupGracePeriod, s.NodeMonitorPeriod, s.ClusterName ) + s.NodeMonitorGracePeriod, s.NodeStartupGracePeriod, s.NodeMonitorPeriod, s.ClusterName) nodeController.Run(s.NodeSyncPeriod, s.SyncNodeList) resourceQuotaManager := resourcequota.NewResourceQuotaManager(kubeClient) diff --git a/pkg/cloudprovider/controller/nodecontroller.go b/pkg/cloudprovider/controller/nodecontroller.go index 9d0989ffca4..72f3657ccd1 100644 --- a/pkg/cloudprovider/controller/nodecontroller.go +++ b/pkg/cloudprovider/controller/nodecontroller.go @@ -88,7 +88,9 @@ type NodeController struct { // check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod. // TODO: Change node status monitor to watch based. nodeMonitorPeriod time.Duration - clusterName string + clusterName string + // Should external services be reconciled during syncing cloud nodes, even though the nodes were not changed. + reconcileServices bool // Method for easy mocking in unittest. lookupIP func(host string) ([]net.IP, error) now func() util.Time @@ -223,22 +225,23 @@ func (nc *NodeController) RegisterNodes(nodes *api.NodeList, retryCount int, ret } // reconcileExternalServices updates balancers for external services, so that they will match the nodes given. -func (nc *NodeController) reconcileExternalServices(nodes *api.NodeList) { +// Returns true if something went wrong and we should call reconcile again. +func (nc *NodeController) reconcileExternalServices(nodes *api.NodeList) (shouldRetry bool) { balancer, ok := nc.cloud.TCPLoadBalancer() if !ok { glog.Error("The cloud provider does not support external TCP load balancers.") - return + return false } zones, ok := nc.cloud.Zones() if !ok { glog.Error("The cloud provider does not support zone enumeration.") - return + return false } zone, err := zones.GetZone() if err != nil { glog.Errorf("Error while getting zone: %v", err) - return + return false } hosts := []string{} @@ -249,8 +252,9 @@ func (nc *NodeController) reconcileExternalServices(nodes *api.NodeList) { services, err := nc.kubeClient.Services(api.NamespaceAll).List(labels.Everything()) if err != nil { glog.Errorf("Error while listing services: %v", err) - return + return true } + shouldRetry = false for _, service := range services.Items { if service.Spec.CreateExternalLoadBalancer { nonTCPPort := false @@ -269,9 +273,11 @@ func (nc *NodeController) reconcileExternalServices(nodes *api.NodeList) { err := balancer.UpdateTCPLoadBalancer(name, zone.Region, hosts) if err != nil { glog.Errorf("External error while updating TCP load balancer: %v.", err) + shouldRetry = true } } } + return shouldRetry } // SyncCloudNodes synchronizes the list of instances from cloudprovider to master server. @@ -327,8 +333,11 @@ func (nc *NodeController) SyncCloudNodes() error { } // Make external services aware of nodes currently present in the cluster. - if nodesChanged { - nc.reconcileExternalServices(matches) + if nodesChanged || nc.reconcileServices { + nc.reconcileServices = nc.reconcileExternalServices(matches) + if nc.reconcileServices { + glog.Error("Reconcilation of external services failed and will be retried during the next sync.") + } } return nil diff --git a/pkg/cloudprovider/controller/nodecontroller_test.go b/pkg/cloudprovider/controller/nodecontroller_test.go index af5ff3a339b..b0d17c2dcdf 100644 --- a/pkg/cloudprovider/controller/nodecontroller_test.go +++ b/pkg/cloudprovider/controller/nodecontroller_test.go @@ -633,7 +633,7 @@ func TestSyncCloudNodesReconcilesExternalService(t *testing.T) { // Set of nodes does not change: do nothing. fakeNodeHandler: &FakeNodeHandler{ Existing: []*api.Node{newNode("node0"), newNode("node1")}, - Fake: testclient.NewSimpleFake(&api.ServiceList{Items: []api.Service{*newService("service0", true), *newService("service1", false)}})}, + Fake: testclient.NewSimpleFake(&api.ServiceList{Items: []api.Service{*newService("service0", true), *newService("service1", false)}})}, fakeCloud: &fake_cloud.FakeCloud{ Machines: []string{"node0", "node1"}, }, @@ -645,7 +645,7 @@ func TestSyncCloudNodesReconcilesExternalService(t *testing.T) { // Delete "node1", target pool for "service0" should shrink. fakeNodeHandler: &FakeNodeHandler{ Existing: []*api.Node{newNode("node0"), newNode("node1")}, - Fake: testclient.NewSimpleFake(&api.ServiceList{Items: []api.Service{*newService("service0", true), *newService("service1", false)}})}, + Fake: testclient.NewSimpleFake(&api.ServiceList{Items: []api.Service{*newService("service0", true), *newService("service1", false)}})}, fakeCloud: &fake_cloud.FakeCloud{ Machines: []string{"node0"}, }, @@ -659,7 +659,7 @@ func TestSyncCloudNodesReconcilesExternalService(t *testing.T) { // Add "node1", target pool for "service0" should grow. fakeNodeHandler: &FakeNodeHandler{ Existing: []*api.Node{newNode("node0")}, - Fake: testclient.NewSimpleFake(&api.ServiceList{Items: []api.Service{*newService("service0", true), *newService("service1", false)}})}, + Fake: testclient.NewSimpleFake(&api.ServiceList{Items: []api.Service{*newService("service0", true), *newService("service1", false)}})}, fakeCloud: &fake_cloud.FakeCloud{ Machines: []string{"node0", "node1"}, },