From 3ea26e743605d34d9eb55a6dfe38042acbccfb26 Mon Sep 17 00:00:00 2001 From: Nick Sardo Date: Tue, 13 Jun 2017 13:22:12 -0700 Subject: [PATCH 1/2] Annotation for opting into backend sharing; Use hash suffix for sharing; Fix resource GC --- .../providers/gce/gce_annotations.go | 16 +- .../gce/gce_loadbalancer_internal.go | 143 ++++++++++-------- .../providers/gce/gce_loadbalancer_naming.go | 23 +-- 3 files changed, 106 insertions(+), 76 deletions(-) diff --git a/pkg/cloudprovider/providers/gce/gce_annotations.go b/pkg/cloudprovider/providers/gce/gce_annotations.go index 98c72577a40..280bdbc7351 100644 --- a/pkg/cloudprovider/providers/gce/gce_annotations.go +++ b/pkg/cloudprovider/providers/gce/gce_annotations.go @@ -21,12 +21,17 @@ import "k8s.io/kubernetes/pkg/api/v1" type LoadBalancerType string const ( - // ServiceAnnotationLoadBalancerType is the annotation used on a service with type LoadBalancer + // ServiceAnnotationLoadBalancerType is annotated on a service with type LoadBalancer // dictates what specific kind of GCP LB should be assembled. // Currently, only "internal" is supported. ServiceAnnotationLoadBalancerType = "cloud.google.com/load-balancer-type" LBTypeInternal LoadBalancerType = "internal" + + // ServiceAnnotationInternalBackendShare is annotated on a service with "true" when users + // want to share GCP Backend Services for a set of internal load balancers. + // ALPHA feature - this may be removed in a future release. + ServiceAnnotationILBBackendShare = "cloud.google.com/load-balancer-backend-share" ) // GetLoadBalancerAnnotationType returns the type of GCP load balancer which should be assembled. @@ -49,3 +54,12 @@ func GetLoadBalancerAnnotationType(service *v1.Service) (LoadBalancerType, bool) return v, false } } + +func GetLoadBalancerAnnotationBackendShare(service *v1.Service) bool { + l, exists := service.Annotations[ServiceAnnotationILBBackendShare] + if exists && l == "true" { + return true + } + + return false +} diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go index 47b7ed90ee8..0d069610020 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go @@ -41,8 +41,8 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s ports, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal loadBalancerName := cloudprovider.GetLoadBalancerName(svc) - shared := !v1_service.RequestsOnlyLocalTraffic(svc) - backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, shared, scheme, protocol, svc.Spec.SessionAffinity) + sharedBackend := serviceIsShared(svc) + backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) backendServiceLink := gce.getBackendServiceLink(backendServiceName) // Ensure instance groups exist and nodes are assigned to groups @@ -52,19 +52,26 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s return nil, err } + existingBSName := getNameFromLink(existingFwdRule.BackendService) + existingBackendService, err := gce.GetRegionBackendService(existingBSName, gce.region) + if err != nil && !isNotFound(err) { + return nil, err + } + // Lock the sharedResourceLock to prevent any deletions of shared resources while assembling shared resources here gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() // Ensure health check for backend service is created - // By default, use the node health check endpoint - hcName := makeHealthCheckName(loadBalancerName, clusterID, shared) + // The health check is shared unless + sharedHealthCheck := !v1_service.RequestsOnlyLocalTraffic(svc) + hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedHealthCheck) hcPath, hcPort := GetNodesHealthCheckPath(), GetNodesHealthCheckPort() - if !shared { + if !sharedHealthCheck { // Service requires a special health check, retrieve the OnlyLocal port & path hcPath, hcPort = v1_service.GetServiceHealthCheckPathPort(svc) } - hc, err := gce.ensureInternalHealthCheck(hcName, nm, shared, hcPath, hcPort) + hc, err := gce.ensureInternalHealthCheck(hcName, nm, sharedHealthCheck, hcPath, hcPort) if err != nil { return nil, err } @@ -73,7 +80,7 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s if gce.OnXPN() { glog.V(2).Infof("ensureInternalLoadBalancer: cluster is on a cross-project network (XPN) network project %v, compute project %v - skipping firewall creation", gce.networkProjectID, gce.projectID) } else { - if err = gce.ensureInternalFirewalls(loadBalancerName, clusterID, nm, svc, strconv.Itoa(int(hcPort)), shared, nodes); err != nil { + if err = gce.ensureInternalFirewalls(loadBalancerName, clusterID, nm, svc, strconv.Itoa(int(hcPort)), sharedHealthCheck, nodes); err != nil { return nil, err } } @@ -95,13 +102,16 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s expectedFwdRule.Network = gce.networkURL } - // Delete the previous internal load balancer if necessary - fwdRuleDeleted, err := gce.clearExistingInternalLB(loadBalancerName, existingFwdRule, expectedFwdRule, backendServiceName) - if err != nil { - return nil, err + fwdRuleDeleted := false + if !fwdRuleEqual(existingFwdRule, expectedFwdRule) { + glog.V(2).Infof("ensureInternalLoadBalancer(%v: deleting existing forwarding rule with IP address %v", loadBalancerName, existingFwdRule.IPAddress) + if err = gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { + return nil, err + } + fwdRuleDeleted = true } - bsDescription := makeBackendServiceDescription(nm, shared) + bsDescription := makeBackendServiceDescription(nm, sharedBackend) err = gce.ensureInternalBackendService(backendServiceName, bsDescription, svc.Spec.SessionAffinity, scheme, protocol, igLinks, hc.SelfLink) if err != nil { return nil, err @@ -115,6 +125,11 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s } } + // Delete the previous internal load balancer if necessary + if err = gce.clearPreviousInternalResources(loadBalancerName, existingBackendService, backendServiceName, hcName); err != nil { + return nil, err + } + // Get the most recent forwarding rule for the new address. existingFwdRule, err = gce.GetRegionForwardingRule(loadBalancerName, gce.region) if err != nil { @@ -126,39 +141,25 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s return status, nil } -func (gce *GCECloud) clearExistingInternalLB(loadBalancerName string, existingFwdRule, expectedFwdRule *compute.ForwardingRule, expectedBSName string) (fwdRuleDeleted bool, err error) { - if existingFwdRule == nil { - return false, nil +func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, existingBackendService *compute.BackendService, expectedBSName, expectedHCName string) error { + if existingBackendService.Name != expectedBSName { + glog.V(2).Infof("clearPreviousInternalResources(%v): expected backend service %q does not match previous %q - deleting backend service", loadBalancerName, expectedBSName, existingBackendService.Name) + if err := gce.teardownInternalBackendService(existingBackendService.Name); err != nil && !isNotFound(err) { + glog.Warningf("clearPreviousInternalResources: could not delete old backend service: %v, err: %v", existingBackendService.Name, err) + } } - if !fwdRuleEqual(existingFwdRule, expectedFwdRule) { - glog.V(2).Infof("clearExistingInternalLB(%v: deleting existing forwarding rule with IP address %v", loadBalancerName, existingFwdRule.IPAddress) - if err = gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { - return false, err + if len(existingBackendService.HealthChecks) == 1 { + existingHCName := getNameFromLink(existingBackendService.HealthChecks[0]) + if existingHCName != expectedHCName { + glog.V(2).Infof("clearPreviousInternalResources(%v): expected health check %q does not match previous %q - deleting health check", loadBalancerName, expectedHCName, existingHCName) + if err := gce.teardownInternalHealthCheckAndFirewall(existingHCName); err != nil { + glog.Warningf("clearPreviousInternalResources: could not delete existing healthcheck: %v, err: %v", existingHCName, err) + } } - fwdRuleDeleted = true } - existingBSName := getNameFromLink(existingFwdRule.BackendService) - bs, err := gce.GetRegionBackendService(existingBSName, gce.region) - if err == nil { - if bs.Name != expectedBSName { - glog.V(2).Infof("clearExistingInternalLB(%v): expected backend service %q does not match actual %q - deleting backend service & healthcheck & firewall", loadBalancerName, expectedBSName, bs.Name) - // Delete the backend service as well in case it's switching between shared, nonshared, tcp, udp. - var existingHCName string - if len(bs.HealthChecks) == 1 { - existingHCName = getNameFromLink(bs.HealthChecks[0]) - } - if err = gce.teardownInternalBackendResources(existingBSName, existingHCName); err != nil { - glog.Warningf("clearExistingInternalLB: could not delete old resources: %v", err) - } else { - glog.V(2).Infof("clearExistingInternalLB: done deleting old resources") - } - } - } else { - glog.Warningf("clearExistingInternalLB(%v): failed to retrieve existing backend service %v", loadBalancerName, existingBSName) - } - return fwdRuleDeleted, nil + return nil } func (gce *GCECloud) updateInternalLoadBalancer(clusterName, clusterID string, svc *v1.Service, nodes []*v1.Node) error { @@ -175,8 +176,8 @@ func (gce *GCECloud) updateInternalLoadBalancer(clusterName, clusterID string, s _, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal loadBalancerName := cloudprovider.GetLoadBalancerName(svc) - shared := !v1_service.RequestsOnlyLocalTraffic(svc) - backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, shared, scheme, protocol, svc.Spec.SessionAffinity) + sharedBackend := serviceIsShared(svc) + backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) // Ensure the backend service has the proper backend instance-group links return gce.ensureInternalBackendServiceGroups(backendServiceName, igLinks) } @@ -185,29 +186,28 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st loadBalancerName := cloudprovider.GetLoadBalancerName(svc) _, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal - shared := !v1_service.RequestsOnlyLocalTraffic(svc) - var err error + sharedBackend := serviceIsShared(svc) gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting region internal forwarding rule", loadBalancerName) - if err = gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { + if err := gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { return err } - backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, shared, scheme, protocol, svc.Spec.SessionAffinity) + backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting region backend service %v", loadBalancerName, backendServiceName) - if err = gce.DeleteRegionBackendService(backendServiceName, gce.region); err != nil && !isNotFoundOrInUse(err) { + if err := gce.DeleteRegionBackendService(backendServiceName, gce.region); err != nil && !isNotFoundOrInUse(err) { return err } // Only delete the health check & health check firewall if they aren't being used by another LB. If we get // an ResourceInuseBy error, then we can skip deleting the firewall. hcInUse := false - hcName := makeHealthCheckName(loadBalancerName, clusterID, shared) + hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedBackend) glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting health check %v", loadBalancerName, hcName) - if err = gce.DeleteHealthCheck(hcName); err != nil && !isNotFoundOrInUse(err) { + if err := gce.DeleteHealthCheck(hcName); err != nil && !isNotFoundOrInUse(err) { return err } else if isInUsedByError(err) { glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): healthcheck %v still in use", loadBalancerName, hcName) @@ -215,7 +215,7 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st } glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting firewall for traffic", loadBalancerName) - if err = gce.DeleteFirewall(loadBalancerName); err != nil { + if err := gce.DeleteFirewall(loadBalancerName); err != nil { return err } @@ -223,48 +223,54 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): skipping firewall for healthcheck", loadBalancerName) } else { glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting firewall for healthcheck", loadBalancerName) - fwHCName := makeHealthCheckFirewallkName(loadBalancerName, clusterID, shared) - if err = gce.DeleteFirewall(fwHCName); err != nil && !isInUsedByError(err) { + fwHCName := makeHealthCheckFirewallName(loadBalancerName, clusterID, sharedBackend) + if err := gce.DeleteFirewall(fwHCName); err != nil && !isInUsedByError(err) { return err } } // Try deleting instance groups - expect ResourceInuse error if needed by other LBs igName := makeInstanceGroupName(clusterID) - if err = gce.ensureInternalInstanceGroupsDeleted(igName); err != nil && !isInUsedByError(err) { + if err := gce.ensureInternalInstanceGroupsDeleted(igName); err != nil && !isInUsedByError(err) { return err } return nil } -func (gce *GCECloud) teardownInternalBackendResources(bsName, hcName string) error { +func (gce *GCECloud) teardownInternalBackendService(bsName string) error { if err := gce.DeleteRegionBackendService(bsName, gce.region); err != nil { if isNotFound(err) { - glog.V(2).Infof("backend service already deleted: %v, err: %v", bsName, err) + glog.V(2).Infof("teardownInternalBackendService(%v): backend service already deleted. err: %v", bsName, err) + return nil } else if err != nil && isInUsedByError(err) { - glog.V(2).Infof("backend service in use: %v, err: %v", bsName, err) + glog.V(2).Infof("teardownInternalBackendService(%v): backend service in use.", bsName) + return nil } else { return fmt.Errorf("failed to delete backend service: %v, err: %v", bsName, err) } } + glog.V(2).Infof("teardownInternalBackendService(%v): backend service deleted", bsName) + return nil +} - if hcName == "" { - return nil - } +func (gce *GCECloud) teardownInternalHealthCheckAndFirewall(hcName string) error { hcInUse := false if err := gce.DeleteHealthCheck(hcName); err != nil { if isNotFound(err) { - glog.V(2).Infof("health check already deleted: %v, err: %v", hcName, err) + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check does not exist.", hcName) + return nil } else if err != nil && isInUsedByError(err) { hcInUse = true - glog.V(2).Infof("health check in use: %v, err: %v", hcName, err) + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check in use.", hcName) + return nil } else { return fmt.Errorf("failed to delete health check: %v, err: %v", hcName, err) } } + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check deleted", hcName) - hcFirewallName := makeHealthCheckFirewallkNameFromHC(hcName) + hcFirewallName := makeHealthCheckFirewallNameFromHC(hcName) if hcInUse { glog.V(2).Infof("skipping deletion of health check firewall: %v", hcFirewallName) return nil @@ -273,6 +279,7 @@ func (gce *GCECloud) teardownInternalBackendResources(bsName, hcName string) err if err := gce.DeleteFirewall(hcFirewallName); err != nil && !isNotFound(err) { return fmt.Errorf("failed to delete health check firewall: %v, err: %v", hcFirewallName, err) } + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check firewall deleted", hcFirewallName) return nil } @@ -315,7 +322,7 @@ func (gce *GCECloud) ensureInternalFirewall(fwName, fwDesc string, sourceRanges return gce.UpdateFirewall(expectedFirewall) } -func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, clusterID string, nm types.NamespacedName, svc *v1.Service, healthCheckPort string, shared bool, nodes []*v1.Node) error { +func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, clusterID string, nm types.NamespacedName, svc *v1.Service, healthCheckPort string, sharedHealthCheck bool, nodes []*v1.Node) error { // First firewall is for ingress traffic fwDesc := makeFirewallDescription(nm.String(), svc.Spec.LoadBalancerIP) ports, protocol := getPortsAndProtocol(svc.Spec.Ports) @@ -329,7 +336,7 @@ func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, clusterID string, } // Second firewall is for health checking nodes / services - fwHCName := makeHealthCheckFirewallkName(loadBalancerName, clusterID, shared) + fwHCName := makeHealthCheckFirewallName(loadBalancerName, clusterID, sharedHealthCheck) hcSrcRanges := LoadBalancerSrcRanges() return gce.ensureInternalFirewall(fwHCName, "", hcSrcRanges, []string{healthCheckPort}, v1.ProtocolTCP, nodes) } @@ -496,6 +503,8 @@ func (gce *GCECloud) ensureInternalBackendService(name, description string, affi } glog.V(2).Infof("ensureInternalBackendService: updating backend service %v", name) + // Set fingerprint for optimistic locking + expectedBS.Fingerprint = bs.Fingerprint if err := gce.UpdateRegionBackendService(expectedBS, gce.region); err != nil { return err } @@ -503,6 +512,7 @@ func (gce *GCECloud) ensureInternalBackendService(name, description string, affi return nil } +// ensureInternalBackendServiceGroups updates backend services if their list of backend instance groups is incorrect. func (gce *GCECloud) ensureInternalBackendServiceGroups(name string, igLinks []string) error { glog.V(2).Infof("ensureInternalBackendServiceGroups(%v): checking existing backend service's groups", name) bs, err := gce.GetRegionBackendService(name, gce.region) @@ -516,7 +526,6 @@ func (gce *GCECloud) ensureInternalBackendServiceGroups(name string, igLinks []s } glog.V(2).Infof("ensureInternalBackendServiceGroups: updating backend service %v", name) - bs.Backends = backends if err := gce.UpdateRegionBackendService(bs, gce.region); err != nil { return err } @@ -524,6 +533,10 @@ func (gce *GCECloud) ensureInternalBackendServiceGroups(name string, igLinks []s return nil } +func serviceIsShared(svc *v1.Service) bool { + return GetLoadBalancerAnnotationBackendShare(svc) && !v1_service.RequestsOnlyLocalTraffic(svc) +} + func backendsFromGroupLinks(igLinks []string) []*compute.Backend { var backends []*compute.Backend for _, igLink := range igLinks { diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go index 7279be63ffe..01f0488abb8 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go @@ -17,6 +17,8 @@ limitations under the License. package gce import ( + "crypto/sha1" + "encoding/hex" "fmt" "strings" @@ -33,15 +35,16 @@ func makeInstanceGroupName(clusterID string) string { func makeBackendServiceName(loadBalancerName, clusterID string, shared bool, scheme lbScheme, protocol v1.Protocol, svcAffinity v1.ServiceAffinity) string { if shared { - affinity := "" - switch svcAffinity { - case v1.ServiceAffinityClientIP: - affinity = "clientip" - default: - affinity = "noaffinity" - } + hash := sha1.New() - return fmt.Sprintf("k8s-%s-%s-%s-%s", clusterID, strings.ToLower(string(scheme)), strings.ToLower(string(protocol)), affinity) + // For every non-nil option, hash its value. Currently, only service affinity is relevant. + hash.Write([]byte(string(svcAffinity))) + + hashed := hex.EncodeToString(hash.Sum(nil)) + hashed = hashed[:16] + + // 3 + 1 + 16 + 1 + 8 + 1 + 3 + 16 + return fmt.Sprintf("k8s-%s-%s-%s-nmv1-%s", clusterID, strings.ToLower(string(scheme)), strings.ToLower(string(protocol)), hashed) } return loadBalancerName } @@ -54,11 +57,11 @@ func makeHealthCheckName(loadBalancerName, clusterID string, shared bool) string return loadBalancerName } -func makeHealthCheckFirewallkNameFromHC(healthCheckName string) string { +func makeHealthCheckFirewallNameFromHC(healthCheckName string) string { return healthCheckName + "-hc" } -func makeHealthCheckFirewallkName(loadBalancerName, clusterID string, shared bool) string { +func makeHealthCheckFirewallName(loadBalancerName, clusterID string, shared bool) string { if shared { return fmt.Sprintf("k8s-%s-node-hc", clusterID) } From efc2989dde1d80c4da65e6129a5c47f1dccf1f6d Mon Sep 17 00:00:00 2001 From: Nick Sardo Date: Tue, 13 Jun 2017 15:39:41 -0700 Subject: [PATCH 2/2] Final fixes --- .../providers/gce/gce_annotations.go | 2 + .../gce/gce_loadbalancer_internal.go | 96 ++++++++----------- .../providers/gce/gce_loadbalancer_naming.go | 11 ++- 3 files changed, 53 insertions(+), 56 deletions(-) diff --git a/pkg/cloudprovider/providers/gce/gce_annotations.go b/pkg/cloudprovider/providers/gce/gce_annotations.go index 280bdbc7351..a7f6683fb62 100644 --- a/pkg/cloudprovider/providers/gce/gce_annotations.go +++ b/pkg/cloudprovider/providers/gce/gce_annotations.go @@ -55,6 +55,8 @@ func GetLoadBalancerAnnotationType(service *v1.Service) (LoadBalancerType, bool) } } +// GetLoadBalancerAnnotationBackendShare returns whether this service's backend service should be +// shared with other load balancers. Health checks and the healthcheck firewall will be shared regardless. func GetLoadBalancerAnnotationBackendShare(service *v1.Service) bool { l, exists := service.Annotations[ServiceAnnotationILBBackendShare] if exists && l == "true" { diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go index 0d069610020..cd981080f75 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go @@ -41,7 +41,7 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s ports, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal loadBalancerName := cloudprovider.GetLoadBalancerName(svc) - sharedBackend := serviceIsShared(svc) + sharedBackend := shareBackendService(svc) backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) backendServiceLink := gce.getBackendServiceLink(backendServiceName) @@ -52,18 +52,21 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s return nil, err } - existingBSName := getNameFromLink(existingFwdRule.BackendService) - existingBackendService, err := gce.GetRegionBackendService(existingBSName, gce.region) - if err != nil && !isNotFound(err) { - return nil, err + // Get existing backend service (if exists) + var existingBackendService *compute.BackendService + if existingFwdRule != nil && existingFwdRule.BackendService != "" { + existingBSName := getNameFromLink(existingFwdRule.BackendService) + if existingBackendService, err = gce.GetRegionBackendService(existingBSName, gce.region); err != nil && !isNotFound(err) { + return nil, err + } } // Lock the sharedResourceLock to prevent any deletions of shared resources while assembling shared resources here gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() - // Ensure health check for backend service is created - // The health check is shared unless + // Ensure health check exists before creating the backend service. The health check is shared + // if externalTrafficPolicy=Cluster. sharedHealthCheck := !v1_service.RequestsOnlyLocalTraffic(svc) hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedHealthCheck) hcPath, hcPort := GetNodesHealthCheckPath(), GetNodesHealthCheckPort() @@ -103,8 +106,8 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s } fwdRuleDeleted := false - if !fwdRuleEqual(existingFwdRule, expectedFwdRule) { - glog.V(2).Infof("ensureInternalLoadBalancer(%v: deleting existing forwarding rule with IP address %v", loadBalancerName, existingFwdRule.IPAddress) + if existingFwdRule != nil && !fwdRuleEqual(existingFwdRule, expectedFwdRule) { + glog.V(2).Infof("ensureInternalLoadBalancer(%v): deleting existing forwarding rule with IP address %v", loadBalancerName, existingFwdRule.IPAddress) if err = gce.DeleteRegionForwardingRule(loadBalancerName, gce.region); err != nil && !isNotFound(err) { return nil, err } @@ -119,15 +122,15 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s // If we previously deleted the forwarding rule or it never existed, finally create it. if fwdRuleDeleted || existingFwdRule == nil { - glog.V(2).Infof("ensureInternalLoadBalancer(%v(%v)): creating forwarding rule", loadBalancerName, svc.Name) + glog.V(2).Infof("ensureInternalLoadBalancer(%v): creating forwarding rule", loadBalancerName) if err = gce.CreateRegionForwardingRule(expectedFwdRule, gce.region); err != nil { return nil, err } } - // Delete the previous internal load balancer if necessary - if err = gce.clearPreviousInternalResources(loadBalancerName, existingBackendService, backendServiceName, hcName); err != nil { - return nil, err + // Delete the previous internal load balancer resources if necessary + if existingBackendService != nil { + gce.clearPreviousInternalResources(loadBalancerName, existingBackendService, backendServiceName, hcName) } // Get the most recent forwarding rule for the new address. @@ -141,7 +144,8 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s return status, nil } -func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, existingBackendService *compute.BackendService, expectedBSName, expectedHCName string) error { +func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, existingBackendService *compute.BackendService, expectedBSName, expectedHCName string) { + // If a new backend service was created, delete the old one. if existingBackendService.Name != expectedBSName { glog.V(2).Infof("clearPreviousInternalResources(%v): expected backend service %q does not match previous %q - deleting backend service", loadBalancerName, expectedBSName, existingBackendService.Name) if err := gce.teardownInternalBackendService(existingBackendService.Name); err != nil && !isNotFound(err) { @@ -149,6 +153,7 @@ func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, exi } } + // If a new health check was created, delete the old one. if len(existingBackendService.HealthChecks) == 1 { existingHCName := getNameFromLink(existingBackendService.HealthChecks[0]) if existingHCName != expectedHCName { @@ -157,11 +162,13 @@ func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, exi glog.Warningf("clearPreviousInternalResources: could not delete existing healthcheck: %v, err: %v", existingHCName, err) } } + } else if len(existingBackendService.HealthChecks) > 1 { + glog.Warningf("clearPreviousInternalResources(%v): more than one health check on the backend service %v, %v", loadBalancerName, existingBackendService.Name, existingBackendService.HealthChecks) } - - return nil } +// updateInternalLoadBalancer is called when the list of nodes has changed. Therefore, only the instance groups +// and possibly the backend service need to be updated. func (gce *GCECloud) updateInternalLoadBalancer(clusterName, clusterID string, svc *v1.Service, nodes []*v1.Node) error { gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() @@ -176,9 +183,8 @@ func (gce *GCECloud) updateInternalLoadBalancer(clusterName, clusterID string, s _, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal loadBalancerName := cloudprovider.GetLoadBalancerName(svc) - sharedBackend := serviceIsShared(svc) - backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) - // Ensure the backend service has the proper backend instance-group links + backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, shareBackendService(svc), scheme, protocol, svc.Spec.SessionAffinity) + // Ensure the backend service has the proper backend/instance-group links return gce.ensureInternalBackendServiceGroups(backendServiceName, igLinks) } @@ -186,7 +192,8 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st loadBalancerName := cloudprovider.GetLoadBalancerName(svc) _, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := schemeInternal - sharedBackend := serviceIsShared(svc) + sharedBackend := shareBackendService(svc) + sharedHealthCheck := !v1_service.RequestsOnlyLocalTraffic(svc) gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() @@ -198,35 +205,19 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st backendServiceName := makeBackendServiceName(loadBalancerName, clusterID, sharedBackend, scheme, protocol, svc.Spec.SessionAffinity) glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting region backend service %v", loadBalancerName, backendServiceName) - if err := gce.DeleteRegionBackendService(backendServiceName, gce.region); err != nil && !isNotFoundOrInUse(err) { + if err := gce.teardownInternalBackendService(backendServiceName); err != nil { return err } - // Only delete the health check & health check firewall if they aren't being used by another LB. If we get - // an ResourceInuseBy error, then we can skip deleting the firewall. - hcInUse := false - hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedBackend) - glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting health check %v", loadBalancerName, hcName) - if err := gce.DeleteHealthCheck(hcName); err != nil && !isNotFoundOrInUse(err) { - return err - } else if isInUsedByError(err) { - glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): healthcheck %v still in use", loadBalancerName, hcName) - hcInUse = true - } - glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting firewall for traffic", loadBalancerName) if err := gce.DeleteFirewall(loadBalancerName); err != nil { return err } - if hcInUse { - glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): skipping firewall for healthcheck", loadBalancerName) - } else { - glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting firewall for healthcheck", loadBalancerName) - fwHCName := makeHealthCheckFirewallName(loadBalancerName, clusterID, sharedBackend) - if err := gce.DeleteFirewall(fwHCName); err != nil && !isInUsedByError(err) { - return err - } + hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedHealthCheck) + glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting health check %v and its firewall", loadBalancerName, hcName) + if err := gce.teardownInternalHealthCheckAndFirewall(hcName); err != nil { + return err } // Try deleting instance groups - expect ResourceInuse error if needed by other LBs @@ -243,7 +234,7 @@ func (gce *GCECloud) teardownInternalBackendService(bsName string) error { if isNotFound(err) { glog.V(2).Infof("teardownInternalBackendService(%v): backend service already deleted. err: %v", bsName, err) return nil - } else if err != nil && isInUsedByError(err) { + } else if isInUsedByError(err) { glog.V(2).Infof("teardownInternalBackendService(%v): backend service in use.", bsName) return nil } else { @@ -255,13 +246,11 @@ func (gce *GCECloud) teardownInternalBackendService(bsName string) error { } func (gce *GCECloud) teardownInternalHealthCheckAndFirewall(hcName string) error { - hcInUse := false if err := gce.DeleteHealthCheck(hcName); err != nil { if isNotFound(err) { glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check does not exist.", hcName) - return nil - } else if err != nil && isInUsedByError(err) { - hcInUse = true + // Purposely do not early return - double check the firewall does not exist + } else if isInUsedByError(err) { glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check in use.", hcName) return nil } else { @@ -271,11 +260,6 @@ func (gce *GCECloud) teardownInternalHealthCheckAndFirewall(hcName string) error glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check deleted", hcName) hcFirewallName := makeHealthCheckFirewallNameFromHC(hcName) - if hcInUse { - glog.V(2).Infof("skipping deletion of health check firewall: %v", hcFirewallName) - return nil - } - if err := gce.DeleteFirewall(hcFirewallName); err != nil && !isNotFound(err) { return fmt.Errorf("failed to delete health check firewall: %v, err: %v", hcFirewallName, err) } @@ -455,9 +439,9 @@ func (gce *GCECloud) ensureInternalInstanceGroupsDeleted(name string) error { return err } - glog.V(2).Infof("ensureInternalInstanceGroupsDeleted(%v): deleting instance group in all %d zones", name, len(zones)) + glog.V(2).Infof("ensureInternalInstanceGroupsDeleted(%v): attempting delete instance group in all %d zones", name, len(zones)) for _, z := range zones { - if err := gce.DeleteInstanceGroup(name, z.Name); err != nil && !isNotFound(err) { + if err := gce.DeleteInstanceGroup(name, z.Name); err != nil && !isNotFoundOrInUse(err) { return err } } @@ -533,7 +517,7 @@ func (gce *GCECloud) ensureInternalBackendServiceGroups(name string, igLinks []s return nil } -func serviceIsShared(svc *v1.Service) bool { +func shareBackendService(svc *v1.Service) bool { return GetLoadBalancerAnnotationBackendShare(svc) && !v1_service.RequestsOnlyLocalTraffic(svc) } @@ -644,6 +628,10 @@ func (gce *GCECloud) getBackendServiceLink(name string) string { } func getNameFromLink(link string) string { + if link == "" { + return "" + } + fields := strings.Split(link, "/") return fields[len(fields)-1] } diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go index 01f0488abb8..9a51a244a58 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_naming.go @@ -43,7 +43,14 @@ func makeBackendServiceName(loadBalancerName, clusterID string, shared bool, sch hashed := hex.EncodeToString(hash.Sum(nil)) hashed = hashed[:16] - // 3 + 1 + 16 + 1 + 8 + 1 + 3 + 16 + // k8s- 4 + // {clusterid}- 17 + // {scheme}- 9 (internal/external) + // {protocol}- 4 (tcp/udp) + // nmv1- 5 (naming convention version) + // {suffix} 16 (hash of settings) + // ----------------- + // 55 characters used return fmt.Sprintf("k8s-%s-%s-%s-nmv1-%s", clusterID, strings.ToLower(string(scheme)), strings.ToLower(string(protocol)), hashed) } return loadBalancerName @@ -72,7 +79,7 @@ func makeBackendServiceDescription(nm types.NamespacedName, shared bool) string if shared { return "" } - return fmt.Sprintf(`{"kubernetes.io/service-name":"%s"`, nm.String()) + return fmt.Sprintf(`{"kubernetes.io/service-name":"%s"}`, nm.String()) } // External Load Balancer