mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-06 07:57:35 +00:00
overhaul proxy healthchecks
The existing healthcheck lib was pretty complicated and was hiding some bugs (like the count always being 1), This is a reboot of the interface and implementation to be significantly simpler and better tested.
This commit is contained in:
@@ -219,7 +219,7 @@ type Proxier struct {
|
||||
nodeIP net.IP
|
||||
portMapper portOpener
|
||||
recorder record.EventRecorder
|
||||
healthChecker healthChecker
|
||||
healthChecker healthcheck.Server
|
||||
}
|
||||
|
||||
type localPort struct {
|
||||
@@ -251,17 +251,6 @@ func (l *listenPortOpener) OpenLocalPort(lp *localPort) (closeable, error) {
|
||||
return openLocalPort(lp)
|
||||
}
|
||||
|
||||
type healthChecker interface {
|
||||
UpdateEndpoints(serviceName types.NamespacedName, endpointUIDs sets.String)
|
||||
}
|
||||
|
||||
// TODO: the healthcheck pkg should offer a type
|
||||
type globalHealthChecker struct{}
|
||||
|
||||
func (globalHealthChecker) UpdateEndpoints(serviceName types.NamespacedName, endpointUIDs sets.String) {
|
||||
healthcheck.UpdateEndpoints(serviceName, endpointUIDs)
|
||||
}
|
||||
|
||||
// Proxier implements ProxyProvider
|
||||
var _ proxy.ProxyProvider = &Proxier{}
|
||||
|
||||
@@ -315,8 +304,7 @@ func NewProxier(ipt utiliptables.Interface,
|
||||
glog.Warningf("clusterCIDR not specified, unable to distinguish between internal and external traffic")
|
||||
}
|
||||
|
||||
healthChecker := globalHealthChecker{}
|
||||
go healthcheck.Run()
|
||||
healthChecker := healthcheck.NewServer(hostname, recorder, nil, nil) // use default implementations of deps
|
||||
|
||||
var throttle flowcontrol.RateLimiter
|
||||
// Defaulting back to not limit sync rate when minSyncPeriod is 0.
|
||||
@@ -450,18 +438,12 @@ func (proxier *Proxier) SyncLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
type healthCheckPort struct {
|
||||
namespace types.NamespacedName
|
||||
nodeport int
|
||||
}
|
||||
|
||||
// Accepts a list of Services and the existing service map. Returns the new
|
||||
// service map, a list of healthcheck ports to add to or remove from the health
|
||||
// checking listener service, and a set of stale UDP services.
|
||||
func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMap) (proxyServiceMap, []healthCheckPort, []healthCheckPort, sets.String) {
|
||||
// service map, a map of healthcheck ports, and a set of stale UDP
|
||||
// services.
|
||||
func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMap) (proxyServiceMap, map[types.NamespacedName]uint16, sets.String) {
|
||||
newServiceMap := make(proxyServiceMap)
|
||||
healthCheckAdd := make([]healthCheckPort, 0)
|
||||
healthCheckDel := make([]healthCheckPort, 0)
|
||||
hcPorts := make(map[types.NamespacedName]uint16)
|
||||
|
||||
for _, service := range allServices {
|
||||
svcName := types.NamespacedName{
|
||||
@@ -497,12 +479,8 @@ func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMa
|
||||
glog.V(1).Infof("Updating existing service %q at %s:%d/%s", serviceName, info.clusterIP, servicePort.Port, servicePort.Protocol)
|
||||
}
|
||||
|
||||
if !exists || !equal {
|
||||
if info.onlyNodeLocalEndpoints && info.healthCheckNodePort > 0 {
|
||||
healthCheckAdd = append(healthCheckAdd, healthCheckPort{serviceName.NamespacedName, info.healthCheckNodePort})
|
||||
} else {
|
||||
healthCheckDel = append(healthCheckDel, healthCheckPort{serviceName.NamespacedName, 0})
|
||||
}
|
||||
if info.onlyNodeLocalEndpoints {
|
||||
hcPorts[svcName] = uint16(info.healthCheckNodePort)
|
||||
}
|
||||
|
||||
newServiceMap[serviceName] = info
|
||||
@@ -510,6 +488,13 @@ func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMa
|
||||
}
|
||||
}
|
||||
|
||||
for nsn, port := range hcPorts {
|
||||
if port == 0 {
|
||||
glog.Errorf("Service %q has no healthcheck nodeport", nsn)
|
||||
delete(hcPorts, nsn)
|
||||
}
|
||||
}
|
||||
|
||||
staleUDPServices := sets.NewString()
|
||||
// Remove serviceports missing from the update.
|
||||
for name, info := range oldServiceMap {
|
||||
@@ -518,13 +503,10 @@ func buildNewServiceMap(allServices []*api.Service, oldServiceMap proxyServiceMa
|
||||
if info.protocol == api.ProtocolUDP {
|
||||
staleUDPServices.Insert(info.clusterIP.String())
|
||||
}
|
||||
if info.onlyNodeLocalEndpoints && info.healthCheckNodePort > 0 {
|
||||
healthCheckDel = append(healthCheckDel, healthCheckPort{name.NamespacedName, info.healthCheckNodePort})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return newServiceMap, healthCheckAdd, healthCheckDel, staleUDPServices
|
||||
return newServiceMap, hcPorts, staleUDPServices
|
||||
}
|
||||
|
||||
// OnServiceUpdate tracks the active set of service proxies.
|
||||
@@ -537,19 +519,11 @@ func (proxier *Proxier) OnServiceUpdate(allServices []*api.Service) {
|
||||
}
|
||||
proxier.allServices = allServices
|
||||
|
||||
newServiceMap, hcAdd, hcDel, staleUDPServices := buildNewServiceMap(allServices, proxier.serviceMap)
|
||||
for _, hc := range hcAdd {
|
||||
glog.V(4).Infof("Adding health check for %+v, port %v", hc.namespace, hc.nodeport)
|
||||
// Turn on healthcheck responder to listen on the health check nodePort
|
||||
// FIXME: handle failures from adding the service
|
||||
healthcheck.AddServiceListener(hc.namespace, hc.nodeport)
|
||||
}
|
||||
for _, hc := range hcDel {
|
||||
// Remove ServiceListener health check nodePorts from the health checker
|
||||
// TODO - Stats
|
||||
glog.V(4).Infof("Deleting health check for %+v, port %v", hc.namespace, hc.nodeport)
|
||||
// FIXME: handle failures from deleting the service
|
||||
healthcheck.DeleteServiceListener(hc.namespace, hc.nodeport)
|
||||
newServiceMap, hcPorts, staleUDPServices := buildNewServiceMap(allServices, proxier.serviceMap)
|
||||
|
||||
// update healthcheck ports
|
||||
if err := proxier.healthChecker.SyncServices(hcPorts); err != nil {
|
||||
glog.Errorf("Error syncing healtcheck ports: %v", err)
|
||||
}
|
||||
|
||||
if len(newServiceMap) != len(proxier.serviceMap) || !reflect.DeepEqual(newServiceMap, proxier.serviceMap) {
|
||||
@@ -585,7 +559,7 @@ func (proxier *Proxier) OnEndpointsUpdate(allEndpoints []*api.Endpoints) {
|
||||
|
||||
// Convert a slice of api.Endpoints objects into a map of service-port -> endpoints.
|
||||
func buildNewEndpointsMap(allEndpoints []*api.Endpoints, curMap proxyEndpointMap, hostname string,
|
||||
healthChecker healthChecker) (newMap proxyEndpointMap, staleSet map[endpointServicePair]bool) {
|
||||
healthChecker healthcheck.Server) (newMap proxyEndpointMap, staleSet map[endpointServicePair]bool) {
|
||||
|
||||
// return values
|
||||
newMap = make(proxyEndpointMap)
|
||||
@@ -618,41 +592,28 @@ func buildNewEndpointsMap(allEndpoints []*api.Endpoints, curMap proxyEndpointMap
|
||||
return
|
||||
}
|
||||
|
||||
// Update service health check. We include entries from the current map,
|
||||
// with zero-length value, to trigger the healthchecker to stop reporting
|
||||
// health for that service.
|
||||
//
|
||||
// This whole mechanism may be over-designed. It builds a list of endpoints
|
||||
// per service, filters for local endpoints, builds a string that is the
|
||||
// same as the name, and then passes each (name, list) pair over a channel.
|
||||
//
|
||||
// I am pretty sure that there's no way there can be more than one entry in
|
||||
// the final list, and passing an empty list as a delete signal is weird.
|
||||
// It could probably be simplified to a synchronous function call of a set
|
||||
// of NamespacedNames. I am not making that simplification at this time.
|
||||
//
|
||||
// ServicePortName includes the port name, which doesn't matter for
|
||||
// healthchecks. It's possible that a single update both added and removed
|
||||
// ports on the same IP, so we need to make sure that removals are counted,
|
||||
// with additions overriding them. Track all endpoints so we can find local
|
||||
// ones.
|
||||
epsBySvcName := map[types.NamespacedName][]*endpointsInfo{}
|
||||
for svcPort := range curMap {
|
||||
epsBySvcName[svcPort.NamespacedName] = nil
|
||||
}
|
||||
// accumulate local IPs per service, ignoring ports
|
||||
localIPs := map[types.NamespacedName]sets.String{}
|
||||
for svcPort := range newMap {
|
||||
epsBySvcName[svcPort.NamespacedName] = append(epsBySvcName[svcPort.NamespacedName], newMap[svcPort]...)
|
||||
}
|
||||
for nsn, eps := range epsBySvcName {
|
||||
// Use a set instead of a slice to provide deduplication
|
||||
epSet := sets.NewString()
|
||||
for _, ep := range eps {
|
||||
for _, ep := range newMap[svcPort] {
|
||||
if ep.isLocal {
|
||||
// kube-proxy health check only needs local endpoints
|
||||
epSet.Insert(fmt.Sprintf("%s/%s", nsn.Namespace, nsn.Name))
|
||||
nsn := svcPort.NamespacedName
|
||||
if localIPs[nsn] == nil {
|
||||
localIPs[nsn] = sets.NewString()
|
||||
}
|
||||
ip := strings.Split(ep.endpoint, ":")[0] // just the IP part
|
||||
localIPs[nsn].Insert(ip)
|
||||
}
|
||||
}
|
||||
healthChecker.UpdateEndpoints(nsn, epSet)
|
||||
}
|
||||
// produce a count per service
|
||||
localEndpointCounts := map[types.NamespacedName]int{}
|
||||
for nsn, ips := range localIPs {
|
||||
localEndpointCounts[nsn] = len(ips)
|
||||
}
|
||||
// update healthcheck endpoints
|
||||
if err := healthChecker.SyncEndpoints(localEndpointCounts); err != nil {
|
||||
glog.Errorf("Error syncing healthcheck endoints: %v", err)
|
||||
}
|
||||
|
||||
return newMap, staleSet
|
||||
|
||||
Reference in New Issue
Block a user