From 3ea26e743605d34d9eb55a6dfe38042acbccfb26 Mon Sep 17 00:00:00 2001 From: Nick Sardo Date: Tue, 13 Jun 2017 13:22:12 -0700 Subject: [PATCH] Annotation for opting into backend sharing; Use hash suffix for sharing; Fix resource GC --- .../providers/gce/gce_annotations.go | 16 +- .../gce/gce_loadbalancer_internal.go | 143 ++++++++++-------- .../providers/gce/gce_loadbalancer_naming.go | 23 +-- 3 files changed, 106 insertions(+), 76 deletions(-) diff --git a/pkg/cloudprovider/providers/gce/gce_annotations.go b/pkg/cloudprovider/providers/gce/gce_annotations.go index 98c72577a40..280bdbc7351 100644 --- a/pkg/cloudprovider/providers/gce/gce_annotations.go +++ b/pkg/cloudprovider/providers/gce/gce_annotations.go @@ -21,12 +21,17 @@ import "k8s.io/kubernetes/pkg/api/v1" type LoadBalancerType string const ( - // ServiceAnnotationLoadBalancerType is the annotation used on a service with type LoadBalancer + // ServiceAnnotationLoadBalancerType is annotated on a service with type LoadBalancer // dictates what specific kind of GCP LB should be assembled. // Currently, only "internal" is supported. ServiceAnnotationLoadBalancerType = "cloud.google.com/load-balancer-type" LBTypeInternal LoadBalancerType = "internal" + + // ServiceAnnotationInternalBackendShare is annotated on a service with "true" when users + // want to share GCP Backend Services for a set of internal load balancers. + // ALPHA feature - this may be removed in a future release. + ServiceAnnotationILBBackendShare = "cloud.google.com/load-balancer-backend-share" ) // GetLoadBalancerAnnotationType returns the type of GCP load balancer which should be assembled. @@ -49,3 +54,12 @@ func GetLoadBalancerAnnotationType(service *v1.Service) (LoadBalancerType, bool) return v, false } } + +func GetLoadBalancerAnnotationBackendShare(service *v1.Service) bool { + l, exists := service.Annotations[ServiceAnnotationILBBackendShare] + if exists && l == "true" { + return true + } + + return false +} diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go index 47b7ed90ee8..0d069610020 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go @@ -41,8 +41,8 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s ports, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal loadBalancerName := cloudprovider.GetLoadBalancerName(svc) - shared := !v1_service.RequestsOnlyLocalTraffic(svc) - backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, shared, scheme, protocol, svc.Spec.SessionAffinity) + sharedBackend := serviceIsShared(svc) + backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) backendServiceLink := gce.getBackendServiceLink(backendServiceName) // Ensure instance groups exist and nodes are assigned to groups @@ -52,19 +52,26 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s return nil, err } + existingBSName := getNameFromLink(existingFwdRule.BackendService) + existingBackendService, err := gce.GetRegionBackendService(existingBSName, gce.region) + if err != nil && !isNotFound(err) { + return nil, err + } + // Lock the sharedResourceLock to prevent any deletions of shared resources while assembling shared resources here gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() // Ensure health check for backend service is created - // By default, use the node health check endpoint - hcName := makeHealthCheckName(loadBalancerName, clusterID, shared) + // The health check is shared unless + sharedHealthCheck := !v1_service.RequestsOnlyLocalTraffic(svc) + hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedHealthCheck) hcPath, hcPort := GetNodesHealthCheckPath(), GetNodesHealthCheckPort() - if !shared { + if !sharedHealthCheck { // Service requires a special health check, retrieve the OnlyLocal port & path hcPath, hcPort = v1_service.GetServiceHealthCheckPathPort(svc) } - hc, err := gce.ensureInternalHealthCheck(hcName, nm, shared, hcPath, hcPort) + hc, err := gce.ensureInternalHealthCheck(hcName, nm, sharedHealthCheck, hcPath, hcPort) if err != nil { return nil, err } @@ -73,7 +80,7 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s if gce.OnXPN() { glog.V(2).Infof("ensureInternalLoadBalancer: cluster is on a cross-project network (XPN) network project %v, compute project %v - skipping firewall creation", gce.networkProjectID, gce.projectID) } else { - if err = gce.ensureInternalFirewalls(loadBalancerName, clusterID, nm, svc, strconv.Itoa(int(hcPort)), shared, nodes); err != nil { + if err = gce.ensureInternalFirewalls(loadBalancerName, clusterID, nm, svc, strconv.Itoa(int(hcPort)), sharedHealthCheck, nodes); err != nil { return nil, err } } @@ -95,13 +102,16 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s expectedFwdRule.Network = gce.networkURL } - // Delete the previous internal load balancer if necessary - fwdRuleDeleted, err := gce.clearExistingInternalLB(loadBalancerName, existingFwdRule, expectedFwdRule, backendServiceName) - if err != nil { - return nil, err + fwdRuleDeleted := false + if !fwdRuleEqual(existingFwdRule, expectedFwdRule) { + glog.V(2).Infof("ensureInternalLoadBalancer(%v: deleting existing forwarding rule with IP address %v", loadBalancerName, existingFwdRule.IPAddress) + if err = gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { + return nil, err + } + fwdRuleDeleted = true } - bsDescription := makeBackendServiceDescription(nm, shared) + bsDescription := makeBackendServiceDescription(nm, sharedBackend) err = gce.ensureInternalBackendService(backendServiceName, bsDescription, svc.Spec.SessionAffinity, scheme, protocol, igLinks, hc.SelfLink) if err != nil { return nil, err @@ -115,6 +125,11 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s } } + // Delete the previous internal load balancer if necessary + if err = gce.clearPreviousInternalResources(loadBalancerName, existingBackendService, backendServiceName, hcName); err != nil { + return nil, err + } + // Get the most recent forwarding rule for the new address. existingFwdRule, err = gce.GetRegionForwardingRule(loadBalancerName, gce.region) if err != nil { @@ -126,39 +141,25 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s return status, nil } -func (gce *GCECloud) clearExistingInternalLB(loadBalancerName string, existingFwdRule, expectedFwdRule *compute.ForwardingRule, expectedBSName string) (fwdRuleDeleted bool, err error) { - if existingFwdRule == nil { - return false, nil +func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, existingBackendService *compute.BackendService, expectedBSName, expectedHCName string) error { + if existingBackendService.Name != expectedBSName { + glog.V(2).Infof("clearPreviousInternalResources(%v): expected backend service %q does not match previous %q - deleting backend service", loadBalancerName, expectedBSName, existingBackendService.Name) + if err := gce.teardownInternalBackendService(existingBackendService.Name); err != nil && !isNotFound(err) { + glog.Warningf("clearPreviousInternalResources: could not delete old backend service: %v, err: %v", existingBackendService.Name, err) + } } - if !fwdRuleEqual(existingFwdRule, expectedFwdRule) { - glog.V(2).Infof("clearExistingInternalLB(%v: deleting existing forwarding rule with IP address %v", loadBalancerName, existingFwdRule.IPAddress) - if err = gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { - return false, err + if len(existingBackendService.HealthChecks) == 1 { + existingHCName := getNameFromLink(existingBackendService.HealthChecks[0]) + if existingHCName != expectedHCName { + glog.V(2).Infof("clearPreviousInternalResources(%v): expected health check %q does not match previous %q - deleting health check", loadBalancerName, expectedHCName, existingHCName) + if err := gce.teardownInternalHealthCheckAndFirewall(existingHCName); err != nil { + glog.Warningf("clearPreviousInternalResources: could not delete existing healthcheck: %v, err: %v", existingHCName, err) + } } - fwdRuleDeleted = true } - existingBSName := getNameFromLink(existingFwdRule.BackendService) - bs, err := gce.GetRegionBackendService(existingBSName, gce.region) - if err == nil { - if bs.Name != expectedBSName { - glog.V(2).Infof("clearExistingInternalLB(%v): expected backend service %q does not match actual %q - deleting backend service & healthcheck & firewall", loadBalancerName, expectedBSName, bs.Name) - // Delete the backend service as well in case it's switching between shared, nonshared, tcp, udp. - var existingHCName string - if len(bs.HealthChecks) == 1 { - existingHCName = getNameFromLink(bs.HealthChecks[0]) - } - if err = gce.teardownInternalBackendResources(existingBSName, existingHCName); err != nil { - glog.Warningf("clearExistingInternalLB: could not delete old resources: %v", err) - } else { - glog.V(2).Infof("clearExistingInternalLB: done deleting old resources") - } - } - } else { - glog.Warningf("clearExistingInternalLB(%v): failed to retrieve existing backend service %v", loadBalancerName, existingBSName) - } - return fwdRuleDeleted, nil + return nil } func (gce *GCECloud) updateInternalLoadBalancer(clusterName, clusterID string, svc *v1.Service, nodes []*v1.Node) error { @@ -175,8 +176,8 @@ func (gce *GCECloud) updateInternalLoadBalancer(clusterName, clusterID string, s _, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal loadBalancerName := cloudprovider.GetLoadBalancerName(svc) - shared := !v1_service.RequestsOnlyLocalTraffic(svc) - backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, shared, scheme, protocol, svc.Spec.SessionAffinity) + sharedBackend := serviceIsShared(svc) + backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) // Ensure the backend service has the proper backend instance-group links return gce.ensureInternalBackendServiceGroups(backendServiceName, igLinks) } @@ -185,29 +186,28 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st loadBalancerName := cloudprovider.GetLoadBalancerName(svc) _, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal - shared := !v1_service.RequestsOnlyLocalTraffic(svc) - var err error + sharedBackend := serviceIsShared(svc) gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting region internal forwarding rule", loadBalancerName) - if err = gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { + if err := gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { return err } - backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, shared, scheme, protocol, svc.Spec.SessionAffinity) + backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting region backend service %v", loadBalancerName, backendServiceName) - if err = gce.DeleteRegionBackendService(backendServiceName, gce.region); err != nil && !isNotFoundOrInUse(err) { + if err := gce.DeleteRegionBackendService(backendServiceName, gce.region); err != nil && !isNotFoundOrInUse(err) { return err } // Only delete the health check & health check firewall if they aren't being used by another LB. If we get // an ResourceInuseBy error, then we can skip deleting the firewall. hcInUse := false - hcName := makeHealthCheckName(loadBalancerName, clusterID, shared) + hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedBackend) glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting health check %v", loadBalancerName, hcName) - if err = gce.DeleteHealthCheck(hcName); err != nil && !isNotFoundOrInUse(err) { + if err := gce.DeleteHealthCheck(hcName); err != nil && !isNotFoundOrInUse(err) { return err } else if isInUsedByError(err) { glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): healthcheck %v still in use", loadBalancerName, hcName) @@ -215,7 +215,7 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st } glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting firewall for traffic", loadBalancerName) - if err = gce.DeleteFirewall(loadBalancerName); err != nil { + if err := gce.DeleteFirewall(loadBalancerName); err != nil { return err } @@ -223,48 +223,54 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): skipping firewall for healthcheck", loadBalancerName) } else { glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting firewall for healthcheck", loadBalancerName) - fwHCName := makeHealthCheckFirewallkName(loadBalancerName, clusterID, shared) - if err = gce.DeleteFirewall(fwHCName); err != nil && !isInUsedByError(err) { + fwHCName := makeHealthCheckFirewallName(loadBalancerName, clusterID, sharedBackend) + if err := gce.DeleteFirewall(fwHCName); err != nil && !isInUsedByError(err) { return err } } // Try deleting instance groups - expect ResourceInuse error if needed by other LBs igName := makeInstanceGroupName(clusterID) - if err = gce.ensureInternalInstanceGroupsDeleted(igName); err != nil && !isInUsedByError(err) { + if err := gce.ensureInternalInstanceGroupsDeleted(igName); err != nil && !isInUsedByError(err) { return err } return nil } -func (gce *GCECloud) teardownInternalBackendResources(bsName, hcName string) error { +func (gce *GCECloud) teardownInternalBackendService(bsName string) error { if err := gce.DeleteRegionBackendService(bsName, gce.region); err != nil { if isNotFound(err) { - glog.V(2).Infof("backend service already deleted: %v, err: %v", bsName, err) + glog.V(2).Infof("teardownInternalBackendService(%v): backend service already deleted. err: %v", bsName, err) + return nil } else if err != nil && isInUsedByError(err) { - glog.V(2).Infof("backend service in use: %v, err: %v", bsName, err) + glog.V(2).Infof("teardownInternalBackendService(%v): backend service in use.", bsName) + return nil } else { return fmt.Errorf("failed to delete backend service: %v, err: %v", bsName, err) } } + glog.V(2).Infof("teardownInternalBackendService(%v): backend service deleted", bsName) + return nil +} - if hcName == "" { - return nil - } +func (gce *GCECloud) teardownInternalHealthCheckAndFirewall(hcName string) error { hcInUse := false if err := gce.DeleteHealthCheck(hcName); err != nil { if isNotFound(err) { - glog.V(2).Infof("health check already deleted: %v, err: %v", hcName, err) + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check does not exist.", hcName) + return nil } else if err != nil && isInUsedByError(err) { hcInUse = true - glog.V(2).Infof("health check in use: %v, err: %v", hcName, err) + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check in use.", hcName) + return nil } else { return fmt.Errorf("failed to delete health check: %v, err: %v", hcName, err) } } + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check deleted", hcName) - hcFirewallName := makeHealthCheckFirewallkNameFromHC(hcName) + hcFirewallName := makeHealthCheckFirewallNameFromHC(hcName) if hcInUse { glog.V(2).Infof("skipping deletion of health check firewall: %v", hcFirewallName) return nil @@ -273,6 +279,7 @@ func (gce *GCECloud) teardownInternalBackendResources(bsName, hcName string) err if err := gce.DeleteFirewall(hcFirewallName); err != nil && !isNotFound(err) { return fmt.Errorf("failed to delete health check firewall: %v, err: %v", hcFirewallName, err) } + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check firewall deleted", hcFirewallName) return nil } @@ -315,7 +322,7 @@ func (gce *GCECloud) ensureInternalFirewall(fwName, fwDesc string, sourceRanges return gce.UpdateFirewall(expectedFirewall) } -func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, clusterID string, nm types.NamespacedName, svc *v1.Service, healthCheckPort string, shared bool, nodes []*v1.Node) error { +func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, clusterID string, nm types.NamespacedName, svc *v1.Service, healthCheckPort string, sharedHealthCheck bool, nodes []*v1.Node) error { // First firewall is for ingress traffic fwDesc := makeFirewallDescription(nm.String(), svc.Spec.LoadBalancerIP) ports, protocol := getPortsAndProtocol(svc.Spec.Ports) @@ -329,7 +336,7 @@ func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, clusterID string, } // Second firewall is for health checking nodes / services - fwHCName := makeHealthCheckFirewallkName(loadBalancerName, clusterID, shared) + fwHCName := makeHealthCheckFirewallName(loadBalancerName, clusterID, sharedHealthCheck) hcSrcRanges := LoadBalancerSrcRanges() return gce.ensureInternalFirewall(fwHCName, "", hcSrcRanges, []string{healthCheckPort}, v1.ProtocolTCP, nodes) } @@ -496,6 +503,8 @@ func (gce *GCECloud) ensureInternalBackendService(name, description string, affi } glog.V(2).Infof("ensureInternalBackendService: updating backend service %v", name) + // Set fingerprint for optimistic locking + expectedBS.Fingerprint = bs.Fingerprint if err := gce.UpdateRegionBackendService(expectedBS, gce.region); err != nil { return err } @@ -503,6 +512,7 @@ func (gce *GCECloud) ensureInternalBackendService(name, description string, affi return nil } +// ensureInternalBackendServiceGroups updates backend services if their list of backend instance groups is incorrect. func (gce *GCECloud) ensureInternalBackendServiceGroups(name string, igLinks []string) error { glog.V(2).Infof("ensureInternalBackendServiceGroups(%v): checking existing backend service's groups", name) bs, err := gce.GetRegionBackendService(name, gce.region) @@ -516,7 +526,6 @@ func (gce *GCECloud) ensureInternalBackendServiceGroups(name string, igLinks []s } glog.V(2).Infof("ensureInternalBackendServiceGroups: updating backend service %v", name) - bs.Backends = backends if err := gce.UpdateRegionBackendService(bs, gce.region); err != nil { return err } @@ -524,6 +533,10 @@ func (gce *GCECloud) ensureInternalBackendServiceGroups(name string, igLinks []s return nil } +func serviceIsShared(svc *v1.Service) bool { + return GetLoadBalancerAnnotationBackendShare(svc) && !v1_service.RequestsOnlyLocalTraffic(svc) +} + func backendsFromGroupLinks(igLinks []string) []*compute.Backend { var backends []*compute.Backend for _, igLink := range igLinks { diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go index 7279be63ffe..01f0488abb8 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go @@ -17,6 +17,8 @@ limitations under the License. package gce import ( + "crypto/sha1" + "encoding/hex" "fmt" "strings" @@ -33,15 +35,16 @@ func makeInstanceGroupName(clusterID string) string { func makeBackendServiceName(loadBalancerName, clusterID string, shared bool, scheme lbScheme, protocol v1.Protocol, svcAffinity v1.ServiceAffinity) string { if shared { - affinity := "" - switch svcAffinity { - case v1.ServiceAffinityClientIP: - affinity = "clientip" - default: - affinity = "noaffinity" - } + hash := sha1.New() - return fmt.Sprintf("k8s-%s-%s-%s-%s", clusterID, strings.ToLower(string(scheme)), strings.ToLower(string(protocol)), affinity) + // For every non-nil option, hash its value. Currently, only service affinity is relevant. + hash.Write([]byte(string(svcAffinity))) + + hashed := hex.EncodeToString(hash.Sum(nil)) + hashed = hashed[:16] + + // 3 + 1 + 16 + 1 + 8 + 1 + 3 + 16 + return fmt.Sprintf("k8s-%s-%s-%s-nmv1-%s", clusterID, strings.ToLower(string(scheme)), strings.ToLower(string(protocol)), hashed) } return loadBalancerName } @@ -54,11 +57,11 @@ func makeHealthCheckName(loadBalancerName, clusterID string, shared bool) string return loadBalancerName } -func makeHealthCheckFirewallkNameFromHC(healthCheckName string) string { +func makeHealthCheckFirewallNameFromHC(healthCheckName string) string { return healthCheckName + "-hc" } -func makeHealthCheckFirewallkName(loadBalancerName, clusterID string, shared bool) string { +func makeHealthCheckFirewallName(loadBalancerName, clusterID string, shared bool) string { if shared { return fmt.Sprintf("k8s-%s-node-hc", clusterID) }