Move the logic for reconciling the host targets of external load balancers

from the node controller to the service controller before impending changes
to the node controller make it not fit there anymore.
This commit is contained in:
Alex Robinson
2015-04-22 20:54:44 +00:00
parent f7831dcd93
commit 6ae8e40d3d
6 changed files with 215 additions and 150 deletions

View File

@@ -79,7 +79,9 @@ func New(cloud cloudprovider.Interface, kubeClient client.Interface, clusterName
// Run starts a background goroutine that watches for changes to services that
// have (or had) externalLoadBalancers=true and ensures that they have external
// load balancers created and deleted appropriately.
func (s *ServiceController) Run() error {
// nodeSyncPeriod controls how often we check the cluster's nodes to determine
// if external load balancers need to be updated to point to a new set.
func (s *ServiceController) Run(nodeSyncPeriod time.Duration) error {
if err := s.init(); err != nil {
return err
}
@@ -101,6 +103,11 @@ func (s *ServiceController) Run() error {
for i := 0; i < workerGoroutines; i++ {
go s.watchServices(serviceQueue)
}
nodeLister := &cache.StoreToNodeLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}
nodeLW := cache.NewListWatchFromClient(s.kubeClient.(*client.Client), "nodes", api.NamespaceAll, fields.Everything())
cache.NewReflector(nodeLW, &api.Node{}, nodeLister.Store, 0).Run()
go s.nodeSyncLoop(nodeLister, nodeSyncPeriod)
return nil
}
@@ -367,6 +374,18 @@ func (s *serviceCache) ListKeys() []string {
return keys
}
// ListKeys implements the interface required by DeltaFIFO to list the keys we
// already know about.
func (s *serviceCache) allServices() []*cachedService {
s.mu.Lock()
defer s.mu.Unlock()
services := make([]*cachedService, 0, len(s.serviceMap))
for _, v := range s.serviceMap {
services = append(services, v)
}
return services
}
func (s *serviceCache) get(serviceName string) (*cachedService, bool) {
s.mu.Lock()
defer s.mu.Unlock()
@@ -445,13 +464,39 @@ func portsEqual(x, y *api.Service) bool {
if err != nil {
return false
}
if len(xPorts) != len(yPorts) {
return intSlicesEqual(xPorts, yPorts)
}
func intSlicesEqual(x, y []int) bool {
if len(x) != len(y) {
return false
}
sort.Ints(xPorts)
sort.Ints(yPorts)
for i := range xPorts {
if xPorts[i] != yPorts[i] {
if !sort.IntsAreSorted(x) {
sort.Ints(x)
}
if !sort.IntsAreSorted(y) {
sort.Ints(y)
}
for i := range x {
if x[i] != y[i] {
return false
}
}
return true
}
func stringSlicesEqual(x, y []string) bool {
if len(x) != len(y) {
return false
}
if !sort.StringsAreSorted(x) {
sort.Strings(x)
}
if !sort.StringsAreSorted(y) {
sort.Strings(y)
}
for i := range x {
if x[i] != y[i] {
return false
}
}
@@ -465,3 +510,78 @@ func hostsFromNodeList(list *api.NodeList) []string {
}
return result
}
// nodeSyncLoop handles updating the hosts pointed to by all external load
// balancers whenever the set of nodes in the cluster changes.
func (s *ServiceController) nodeSyncLoop(nodeLister *cache.StoreToNodeLister, period time.Duration) {
var prevHosts []string
var servicesToUpdate []*cachedService
// TODO: Eliminate the unneeded now variable once we stop compiling in go1.3.
// It's needed at the moment because go1.3 requires ranges to be assigned to
// something to compile, and gofmt1.4 complains about using `_ = range`.
for now := range time.Tick(period) {
_ = now
nodes, err := nodeLister.List()
if err != nil {
glog.Errorf("Failed to retrieve current set of nodes from node lister: %v", err)
continue
}
newHosts := hostsFromNodeList(&nodes)
if stringSlicesEqual(newHosts, prevHosts) {
// The set of nodes in the cluster hasn't changed, but we can retry
// updating any services that we failed to update last time around.
servicesToUpdate = s.updateLoadBalancerHosts(servicesToUpdate, newHosts)
continue
}
glog.Infof("Detected change in list of current cluster nodes. New node set: %v", newHosts)
// Try updating all services, and save the ones that fail to try again next
// round.
servicesToUpdate = s.cache.allServices()
numServices := len(servicesToUpdate)
servicesToUpdate = s.updateLoadBalancerHosts(servicesToUpdate, newHosts)
glog.Infof("Successfully updated %d out of %d external load balancers to direct traffic to the updated set of nodes",
numServices-len(servicesToUpdate), numServices)
prevHosts = newHosts
}
}
// updateLoadBalancerHosts updates all existing external load balancers so that
// they will match the list of hosts provided.
// Returns the list of services that couldn't be updated.
func (s *ServiceController) updateLoadBalancerHosts(services []*cachedService, hosts []string) (servicesToRetry []*cachedService) {
for _, service := range services {
func() {
service.mu.Lock()
defer service.mu.Unlock()
if err := s.lockedUpdateLoadBalancerHosts(service.service, hosts); err != nil {
glog.Errorf("External error while updating TCP load balancer: %v.", err)
servicesToRetry = append(servicesToRetry, service)
}
}()
}
return servicesToRetry
}
// Updates the external load balancer of a service, assuming we hold the mutex
// associated with the service.
func (s *ServiceController) lockedUpdateLoadBalancerHosts(service *api.Service, hosts []string) error {
if !service.Spec.CreateExternalLoadBalancer {
return nil
}
name := cloudprovider.GetLoadBalancerName(service)
err := s.balancer.UpdateTCPLoadBalancer(name, s.zone.Region, hosts)
if err == nil {
return nil
}
// It's only an actual error if the load balancer still exists.
if exists, err := s.balancer.TCPLoadBalancerExists(name, s.zone.Region); err != nil {
glog.Errorf("External error while checking if TCP load balancer %q exists: name, %v")
} else if !exists {
return nil
}
return err
}

View File

@@ -17,6 +17,7 @@ limitations under the License.
package servicecontroller
import (
"reflect"
"testing"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
@@ -27,6 +28,10 @@ import (
const region = "us-central"
func newService(name string, uid types.UID, external bool) *api.Service {
return &api.Service{ObjectMeta: api.ObjectMeta{Name: name, Namespace: "namespace", UID: uid}, Spec: api.ServiceSpec{CreateExternalLoadBalancer: external}}
}
func TestCreateExternalLoadBalancer(t *testing.T) {
table := []struct {
service *api.Service
@@ -124,4 +129,82 @@ func TestCreateExternalLoadBalancer(t *testing.T) {
}
}
// TODO: Finish converting and update comments
func TestUpdateNodesInExternalLoadBalancer(t *testing.T) {
hosts := []string{"node0", "node1", "node73"}
table := []struct {
services []*api.Service
expectedUpdateCalls []fake_cloud.FakeUpdateBalancerCall
}{
{
// No services present: no calls should be made.
services: []*api.Service{},
expectedUpdateCalls: nil,
},
{
// Services do not have external load balancers: no calls should be made.
services: []*api.Service{
newService("s0", "111", false),
newService("s1", "222", false),
},
expectedUpdateCalls: nil,
},
{
// Services does have an external load balancer: one call should be made.
services: []*api.Service{
newService("s0", "333", true),
},
expectedUpdateCalls: []fake_cloud.FakeUpdateBalancerCall{
{Name: "a333", Region: region, Hosts: []string{"node0", "node1", "node73"}},
},
},
{
// Three services have an external load balancer: three calls.
services: []*api.Service{
newService("s0", "444", true),
newService("s1", "555", true),
newService("s2", "666", true),
},
expectedUpdateCalls: []fake_cloud.FakeUpdateBalancerCall{
{Name: "a444", Region: region, Hosts: []string{"node0", "node1", "node73"}},
{Name: "a555", Region: region, Hosts: []string{"node0", "node1", "node73"}},
{Name: "a666", Region: region, Hosts: []string{"node0", "node1", "node73"}},
},
},
{
// Two services have an external load balancer and two don't: two calls.
services: []*api.Service{
newService("s0", "777", false),
newService("s1", "888", true),
newService("s3", "999", true),
newService("s4", "123", false),
},
expectedUpdateCalls: []fake_cloud.FakeUpdateBalancerCall{
{Name: "a888", Region: region, Hosts: []string{"node0", "node1", "node73"}},
{Name: "a999", Region: region, Hosts: []string{"node0", "node1", "node73"}},
},
},
}
for _, item := range table {
cloud := &fake_cloud.FakeCloud{}
cloud.Region = region
client := &testclient.Fake{}
controller := New(cloud, client, "test-cluster2")
controller.init()
cloud.Calls = nil // ignore any cloud calls made in init()
var services []*cachedService
for _, service := range item.services {
services = append(services, &cachedService{service: service})
}
if err := controller.updateLoadBalancerHosts(services, hosts); err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(item.expectedUpdateCalls, cloud.UpdateCalls) {
t.Errorf("expected update calls mismatch, expected %+v, got %+v", item.expectedUpdateCalls, cloud.UpdateCalls)
}
}
}
// TODO(a-robinson): Add tests for update/sync/delete.