overhaul proxy healthchecks

The existing healthcheck lib was pretty complicated and was hiding some
bugs (like the count always being 1),  This is a reboot of the interface
and implementation to be significantly simpler and better tested.
This commit is contained in:
Tim Hockin
2017-04-01 21:14:30 -07:00
parent 7664b97ed2
commit 87d3f2c622
11 changed files with 595 additions and 648 deletions

View File

@@ -219,7 +219,7 @@ type Proxier struct {
nodeIP net.IP
portMapper portOpener
recorder record.EventRecorder
healthChecker healthChecker
healthChecker healthcheck.Server
}
type localPort struct {
@@ -251,17 +251,6 @@ func (l *listenPortOpener) OpenLocalPort(lp *localPort) (closeable, error) {
return openLocalPort(lp)
}
type healthChecker interface {
UpdateEndpoints(serviceName types.NamespacedName, endpointUIDs sets.String)
}
// TODO: the healthcheck pkg should offer a type
type globalHealthChecker struct{}
func (globalHealthChecker) UpdateEndpoints(serviceName types.NamespacedName, endpointUIDs sets.String) {
healthcheck.UpdateEndpoints(serviceName, endpointUIDs)
}
// Proxier implements ProxyProvider
var _ proxy.ProxyProvider = &Proxier{}
@@ -315,8 +304,7 @@ func NewProxier(ipt utiliptables.Interface,
glog.Warningf("clusterCIDR not specified, unable to distinguish between internal and external traffic")
}
healthChecker := globalHealthChecker{}
go healthcheck.Run()
healthChecker := healthcheck.NewServer(hostname, recorder, nil, nil) // use default implementations of deps
var throttle flowcontrol.RateLimiter
// Defaulting back to not limit sync rate when minSyncPeriod is 0.
@@ -450,18 +438,12 @@ func (proxier *Proxier) SyncLoop() {
}
}
type healthCheckPort struct {
namespace types.NamespacedName
nodeport int
}
// Accepts a list of Services and the existing service map. Returns the new
// service map, a list of healthcheck ports to add to or remove from the health
// checking listener service, and a set of stale UDP services.
func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMap) (proxyServiceMap, []healthCheckPort, []healthCheckPort, sets.String) {
// service map, a map of healthcheck ports, and a set of stale UDP
// services.
func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMap) (proxyServiceMap, map[types.NamespacedName]uint16, sets.String) {
newServiceMap := make(proxyServiceMap)
healthCheckAdd := make([]healthCheckPort, 0)
healthCheckDel := make([]healthCheckPort, 0)
hcPorts := make(map[types.NamespacedName]uint16)
for _, service := range allServices {
svcName := types.NamespacedName{
@@ -497,12 +479,8 @@ func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMa
glog.V(1).Infof("Updating existing service %q at %s:%d/%s", serviceName, info.clusterIP, servicePort.Port, servicePort.Protocol)
}
if !exists || !equal {
if info.onlyNodeLocalEndpoints && info.healthCheckNodePort > 0 {
healthCheckAdd = append(healthCheckAdd, healthCheckPort{serviceName.NamespacedName, info.healthCheckNodePort})
} else {
healthCheckDel = append(healthCheckDel, healthCheckPort{serviceName.NamespacedName, 0})
}
if info.onlyNodeLocalEndpoints {
hcPorts[svcName] = uint16(info.healthCheckNodePort)
}
newServiceMap[serviceName] = info
@@ -510,6 +488,13 @@ func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMa
}
}
for nsn, port := range hcPorts {
if port == 0 {
glog.Errorf("Service %q has no healthcheck nodeport", nsn)
delete(hcPorts, nsn)
}
}
staleUDPServices := sets.NewString()
// Remove serviceports missing from the update.
for name, info := range oldServiceMap {
@@ -518,13 +503,10 @@ func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMa
if info.protocol == api.ProtocolUDP {
staleUDPServices.Insert(info.clusterIP.String())
}
if info.onlyNodeLocalEndpoints && info.healthCheckNodePort > 0 {
healthCheckDel = append(healthCheckDel, healthCheckPort{name.NamespacedName, info.healthCheckNodePort})
}
}
}
return newServiceMap, healthCheckAdd, healthCheckDel, staleUDPServices
return newServiceMap, hcPorts, staleUDPServices
}
// OnServiceUpdate tracks the active set of service proxies.
@@ -537,19 +519,11 @@ func (proxier *Proxier) OnServiceUpdate(allServices []*api.Service) {
}
proxier.allServices = allServices
newServiceMap, hcAdd, hcDel, staleUDPServices := buildNewServiceMap(allServices, proxier.serviceMap)
for _, hc := range hcAdd {
glog.V(4).Infof("Adding health check for %+v, port %v", hc.namespace, hc.nodeport)
// Turn on healthcheck responder to listen on the health check nodePort
// FIXME: handle failures from adding the service
healthcheck.AddServiceListener(hc.namespace, hc.nodeport)
}
for _, hc := range hcDel {
// Remove ServiceListener health check nodePorts from the health checker
// TODO - Stats
glog.V(4).Infof("Deleting health check for %+v, port %v", hc.namespace, hc.nodeport)
// FIXME: handle failures from deleting the service
healthcheck.DeleteServiceListener(hc.namespace, hc.nodeport)
newServiceMap, hcPorts, staleUDPServices := buildNewServiceMap(allServices, proxier.serviceMap)
// update healthcheck ports
if err := proxier.healthChecker.SyncServices(hcPorts); err != nil {
glog.Errorf("Error syncing healtcheck ports: %v", err)
}
if len(newServiceMap) != len(proxier.serviceMap) || !reflect.DeepEqual(newServiceMap, proxier.serviceMap) {
@@ -585,7 +559,7 @@ func (proxier *Proxier) OnEndpointsUpdate(allEndpoints []*api.Endpoints) {
// Convert a slice of api.Endpoints objects into a map of service-port -> endpoints.
func buildNewEndpointsMap(allEndpoints []*api.Endpoints, curMap proxyEndpointMap, hostname string,
healthChecker healthChecker) (newMap proxyEndpointMap, staleSet map[endpointServicePair]bool) {
healthChecker healthcheck.Server) (newMap proxyEndpointMap, staleSet map[endpointServicePair]bool) {
// return values
newMap = make(proxyEndpointMap)
@@ -618,41 +592,28 @@ func buildNewEndpointsMap(allEndpoints []*api.Endpoints, curMap proxyEndpointMap
return
}
// Update service health check. We include entries from the current map,
// with zero-length value, to trigger the healthchecker to stop reporting
// health for that service.
//
// This whole mechanism may be over-designed. It builds a list of endpoints
// per service, filters for local endpoints, builds a string that is the
// same as the name, and then passes each (name, list) pair over a channel.
//
// I am pretty sure that there's no way there can be more than one entry in
// the final list, and passing an empty list as a delete signal is weird.
// It could probably be simplified to a synchronous function call of a set
// of NamespacedNames. I am not making that simplification at this time.
//
// ServicePortName includes the port name, which doesn't matter for
// healthchecks. It's possible that a single update both added and removed
// ports on the same IP, so we need to make sure that removals are counted,
// with additions overriding them. Track all endpoints so we can find local
// ones.
epsBySvcName := map[types.NamespacedName][]*endpointsInfo{}
for svcPort := range curMap {
epsBySvcName[svcPort.NamespacedName] = nil
}
// accumulate local IPs per service, ignoring ports
localIPs := map[types.NamespacedName]sets.String{}
for svcPort := range newMap {
epsBySvcName[svcPort.NamespacedName] = append(epsBySvcName[svcPort.NamespacedName], newMap[svcPort]...)
}
for nsn, eps := range epsBySvcName {
// Use a set instead of a slice to provide deduplication
epSet := sets.NewString()
for _, ep := range eps {
for _, ep := range newMap[svcPort] {
if ep.isLocal {
// kube-proxy health check only needs local endpoints
epSet.Insert(fmt.Sprintf("%s/%s", nsn.Namespace, nsn.Name))
nsn := svcPort.NamespacedName
if localIPs[nsn] == nil {
localIPs[nsn] = sets.NewString()
}
ip := strings.Split(ep.endpoint, ":")[0] // just the IP part
localIPs[nsn].Insert(ip)
}
}
healthChecker.UpdateEndpoints(nsn, epSet)
}
// produce a count per service
localEndpointCounts := map[types.NamespacedName]int{}
for nsn, ips := range localIPs {
localEndpointCounts[nsn] = len(ips)
}
// update healthcheck endpoints
if err := healthChecker.SyncEndpoints(localEndpointCounts); err != nil {
glog.Errorf("Error syncing healthcheck endoints: %v", err)
}
return newMap, staleSet