Implement multi-port endpoints

Instead of endpoints being a flat list, it is now a list of "subsets"
where each is a struct of {Addresses, Ports}.  To generate the list of
endpoints you need to take union of the Cartesian products of the
subsets.  This is compact in the vast majority of cases, yet still
represents named ports and corner cases (e.g. each pod has a different
port number).

This also stores subsets in a deterministic order (sorted by hash) to
avoid spurious updates and comparison problems.

This is a fully compatible change - old objects and clients will
keepworking as long as they don't need the new functionality.

This is the prep for multi-port Services, which will add API to produce
endpoints in this new structure.
This commit is contained in:
Tim Hockin
2015-03-20 14:24:43 -07:00
parent 3eda80b3c5
commit 8ae203825b
43 changed files with 2090 additions and 1030 deletions

View File

@@ -50,15 +50,28 @@ type affinityPolicy struct {
ttlMinutes int
}
// servicePort is the type that the balancer uses to key stored state.
type servicePort struct {
types.NamespacedName
port string
}
func (sp servicePort) String() string {
return fmt.Sprintf("%s:%s", sp.NamespacedName, sp.port)
}
// LoadBalancerRR is a round-robin load balancer.
type LoadBalancerRR struct {
lock sync.RWMutex
services map[types.NamespacedName]*balancerState
services map[servicePort]*balancerState
}
// Ensure this implements LoadBalancer.
var _ LoadBalancer = &LoadBalancerRR{}
type balancerState struct {
endpoints []string
index int
endpoints []string // a list of "ip:port" style strings
index int // current index into endpoints
affinity affinityPolicy
}
@@ -73,29 +86,30 @@ func newAffinityPolicy(affinityType api.AffinityType, ttlMinutes int) *affinityP
// NewLoadBalancerRR returns a new LoadBalancerRR.
func NewLoadBalancerRR() *LoadBalancerRR {
return &LoadBalancerRR{
services: map[types.NamespacedName]*balancerState{},
services: map[servicePort]*balancerState{},
}
}
func (lb *LoadBalancerRR) NewService(service types.NamespacedName, affinityType api.AffinityType, ttlMinutes int) error {
func (lb *LoadBalancerRR) NewService(service types.NamespacedName, port string, affinityType api.AffinityType, ttlMinutes int) error {
svcPort := servicePort{service, port}
lb.lock.Lock()
defer lb.lock.Unlock()
lb.newServiceInternal(service, affinityType, ttlMinutes)
lb.newServiceInternal(svcPort, affinityType, ttlMinutes)
return nil
}
// This assumes that lb.lock is already held.
func (lb *LoadBalancerRR) newServiceInternal(service types.NamespacedName, affinityType api.AffinityType, ttlMinutes int) *balancerState {
func (lb *LoadBalancerRR) newServiceInternal(svcPort servicePort, affinityType api.AffinityType, ttlMinutes int) *balancerState {
if ttlMinutes == 0 {
ttlMinutes = 180 //default to 3 hours if not specified. Should 0 be unlimeted instead????
}
if _, exists := lb.services[service]; !exists {
lb.services[service] = &balancerState{affinity: *newAffinityPolicy(affinityType, ttlMinutes)}
glog.V(4).Infof("LoadBalancerRR service %q did not exist, created", service)
if _, exists := lb.services[svcPort]; !exists {
lb.services[svcPort] = &balancerState{affinity: *newAffinityPolicy(affinityType, ttlMinutes)}
glog.V(4).Infof("LoadBalancerRR service %q did not exist, created", svcPort)
}
return lb.services[service]
return lb.services[svcPort]
}
// return true if this service is using some form of session affinity.
@@ -109,21 +123,22 @@ func isSessionAffinity(affinity *affinityPolicy) bool {
// NextEndpoint returns a service endpoint.
// The service endpoint is chosen using the round-robin algorithm.
func (lb *LoadBalancerRR) NextEndpoint(service types.NamespacedName, srcAddr net.Addr) (string, error) {
func (lb *LoadBalancerRR) NextEndpoint(service types.NamespacedName, port string, srcAddr net.Addr) (string, error) {
svcPort := servicePort{service, port}
// Coarse locking is simple. We can get more fine-grained if/when we
// can prove it matters.
lb.lock.Lock()
defer lb.lock.Unlock()
key := service
state, exists := lb.services[key]
state, exists := lb.services[svcPort]
if !exists || state == nil {
return "", ErrMissingServiceEntry
}
if len(state.endpoints) == 0 {
return "", ErrMissingEndpoints
}
glog.V(4).Infof("NextEndpoint for service %q, srcAddr=%v: endpoints: %+v", service, srcAddr, state.endpoints)
glog.V(4).Infof("NextEndpoint for service %q, srcAddr=%v: endpoints: %+v", svcPort, srcAddr, state.endpoints)
sessionAffinityEnabled := isSessionAffinity(&state.affinity)
@@ -140,7 +155,7 @@ func (lb *LoadBalancerRR) NextEndpoint(service types.NamespacedName, srcAddr net
// Affinity wins.
endpoint := sessionAffinity.endpoint
sessionAffinity.lastUsed = time.Now()
glog.V(4).Infof("NextEndpoint for service %q from IP %s with sessionAffinity %+v: %s", service, ipaddr, sessionAffinity, endpoint)
glog.V(4).Infof("NextEndpoint for service %q from IP %s with sessionAffinity %+v: %s", svcPort, ipaddr, sessionAffinity, endpoint)
return endpoint, nil
}
}
@@ -164,28 +179,33 @@ func (lb *LoadBalancerRR) NextEndpoint(service types.NamespacedName, srcAddr net
return endpoint, nil
}
func isValidEndpoint(ep *api.Endpoint) bool {
return ep.IP != "" && ep.Port > 0
type hostPortPair struct {
host string
port int
}
func filterValidEndpoints(endpoints []api.Endpoint) []string {
func isValidEndpoint(hpp *hostPortPair) bool {
return hpp.host != "" && hpp.port > 0
}
func flattenValidEndpoints(endpoints []hostPortPair) []string {
// Convert Endpoint objects into strings for easier use later. Ignore
// the protocol field - we'll get that from the Service objects.
var result []string
for i := range endpoints {
ep := &endpoints[i]
if isValidEndpoint(ep) {
result = append(result, net.JoinHostPort(ep.IP, strconv.Itoa(ep.Port)))
hpp := &endpoints[i]
if isValidEndpoint(hpp) {
result = append(result, net.JoinHostPort(hpp.host, strconv.Itoa(hpp.port)))
}
}
return result
}
// Remove any session affinity records associated to a particular endpoint (for example when a pod goes down).
func removeSessionAffinityByEndpoint(state *balancerState, service types.NamespacedName, endpoint string) {
func removeSessionAffinityByEndpoint(state *balancerState, svcPort servicePort, endpoint string) {
for _, affinity := range state.affinity.affinityMap {
if affinity.endpoint == endpoint {
glog.V(4).Infof("Removing client: %s from affinityMap for service %q", affinity.endpoint, service)
glog.V(4).Infof("Removing client: %s from affinityMap for service %q", affinity.endpoint, svcPort)
delete(state.affinity.affinityMap, affinity.clientIP)
}
}
@@ -194,12 +214,12 @@ func removeSessionAffinityByEndpoint(state *balancerState, service types.Namespa
// Loop through the valid endpoints and then the endpoints associated with the Load Balancer.
// Then remove any session affinity records that are not in both lists.
// This assumes the lb.lock is held.
func (lb *LoadBalancerRR) updateAffinityMap(service types.NamespacedName, newEndpoints []string) {
func (lb *LoadBalancerRR) updateAffinityMap(svcPort servicePort, newEndpoints []string) {
allEndpoints := map[string]int{}
for _, newEndpoint := range newEndpoints {
allEndpoints[newEndpoint] = 1
}
state, exists := lb.services[service]
state, exists := lb.services[svcPort]
if !exists {
return
}
@@ -208,8 +228,8 @@ func (lb *LoadBalancerRR) updateAffinityMap(service types.NamespacedName, newEnd
}
for mKey, mVal := range allEndpoints {
if mVal == 1 {
glog.V(3).Infof("Delete endpoint %s for service %q", mKey, service)
removeSessionAffinityByEndpoint(state, service, mKey)
glog.V(3).Infof("Delete endpoint %s for service %q", mKey, svcPort)
removeSessionAffinityByEndpoint(state, svcPort, mKey)
}
}
}
@@ -218,33 +238,52 @@ func (lb *LoadBalancerRR) updateAffinityMap(service types.NamespacedName, newEnd
// Registered endpoints are updated if found in the update set or
// unregistered if missing from the update set.
func (lb *LoadBalancerRR) OnUpdate(allEndpoints []api.Endpoints) {
registeredEndpoints := make(map[types.NamespacedName]bool)
registeredEndpoints := make(map[servicePort]bool)
lb.lock.Lock()
defer lb.lock.Unlock()
// Update endpoints for services.
for _, svcEndpoints := range allEndpoints {
name := types.NamespacedName{svcEndpoints.Namespace, svcEndpoints.Name}
key := name
state, exists := lb.services[key]
curEndpoints := []string{}
if state != nil {
curEndpoints = state.endpoints
}
newEndpoints := filterValidEndpoints(svcEndpoints.Endpoints)
if !exists || state == nil || len(curEndpoints) != len(newEndpoints) || !slicesEquiv(slice.CopyStrings(curEndpoints), newEndpoints) {
glog.V(3).Infof("LoadBalancerRR: Setting endpoints for %s to %+v", svcEndpoints.Name, svcEndpoints.Endpoints)
lb.updateAffinityMap(key, newEndpoints)
// On update can be called without NewService being called externally.
// To be safe we will call it here. A new service will only be created
// if one does not already exist.
state = lb.newServiceInternal(name, api.AffinityTypeNone, 0)
state.endpoints = slice.ShuffleStrings(newEndpoints)
for i := range allEndpoints {
svcEndpoints := &allEndpoints[i]
// Reset the round-robin index.
state.index = 0
// We need to build a map of portname -> all ip:ports for that
// portname. Explode Endpoints.Subsets[*] into this structure.
portsToEndpoints := map[string][]hostPortPair{}
for i := range svcEndpoints.Subsets {
ss := &svcEndpoints.Subsets[i]
for i := range ss.Ports {
port := &ss.Ports[i]
for i := range ss.Addresses {
addr := &ss.Addresses[i]
portsToEndpoints[port.Name] = append(portsToEndpoints[port.Name], hostPortPair{addr.IP, port.Port})
// Ignore the protocol field - we'll get that from the Service objects.
}
}
}
for portname := range portsToEndpoints {
svcPort := servicePort{types.NamespacedName{svcEndpoints.Namespace, svcEndpoints.Name}, portname}
state, exists := lb.services[svcPort]
curEndpoints := []string{}
if state != nil {
curEndpoints = state.endpoints
}
newEndpoints := flattenValidEndpoints(portsToEndpoints[portname])
if !exists || state == nil || len(curEndpoints) != len(newEndpoints) || !slicesEquiv(slice.CopyStrings(curEndpoints), newEndpoints) {
glog.V(3).Infof("LoadBalancerRR: Setting endpoints for %s to %+v", svcPort, newEndpoints)
lb.updateAffinityMap(svcPort, newEndpoints)
// OnUpdate can be called without NewService being called externally.
// To be safe we will call it here. A new service will only be created
// if one does not already exist.
state = lb.newServiceInternal(svcPort, api.AffinityTypeNone, 0)
state.endpoints = slice.ShuffleStrings(newEndpoints)
// Reset the round-robin index.
state.index = 0
}
registeredEndpoints[svcPort] = true
}
registeredEndpoints[key] = true
}
// Remove endpoints missing from the update.
for k := range lb.services {
@@ -266,19 +305,20 @@ func slicesEquiv(lhs, rhs []string) bool {
return false
}
func (lb *LoadBalancerRR) CleanupStaleStickySessions(service types.NamespacedName) {
func (lb *LoadBalancerRR) CleanupStaleStickySessions(service types.NamespacedName, port string) {
svcPort := servicePort{service, port}
lb.lock.Lock()
defer lb.lock.Unlock()
key := service
state, exists := lb.services[key]
state, exists := lb.services[svcPort]
if !exists {
glog.Warning("CleanupStaleStickySessions called for non-existent balancer key %q", service)
glog.Warning("CleanupStaleStickySessions called for non-existent balancer key %q", svcPort)
return
}
for ip, affinity := range state.affinity.affinityMap {
if int(time.Now().Sub(affinity.lastUsed).Minutes()) >= state.affinity.ttlMinutes {
glog.V(4).Infof("Removing client %s from affinityMap for service %q", affinity.clientIP, service)
glog.V(4).Infof("Removing client %s from affinityMap for service %q", affinity.clientIP, svcPort)
delete(state.affinity.affinityMap, ip)
}
}