From 676b95e09761f22d53dcd6259a6feb0dc9a4ffcf Mon Sep 17 00:00:00 2001 From: Nick Sardo Date: Fri, 1 Sep 2017 16:30:17 -0700 Subject: [PATCH] Gracefully handle permission errors when attempting to create firewall rules --- pkg/cloudprovider/providers/gce/BUILD | 3 + pkg/cloudprovider/providers/gce/gce.go | 16 +++++ .../providers/gce/gce_clusterid.go | 2 +- .../gce/gce_loadbalancer_external.go | 72 ++++++++++++------- .../gce/gce_loadbalancer_internal.go | 55 +++++++++----- pkg/cloudprovider/providers/gce/gce_util.go | 46 ++++++++++-- test/e2e/framework/util.go | 2 +- 7 files changed, 144 insertions(+), 52 deletions(-) diff --git a/pkg/cloudprovider/providers/gce/BUILD b/pkg/cloudprovider/providers/gce/BUILD index 65f7a5e7d62..5921c4d62ed 100644 --- a/pkg/cloudprovider/providers/gce/BUILD +++ b/pkg/cloudprovider/providers/gce/BUILD @@ -76,7 +76,10 @@ go_library( "//vendor/k8s.io/apiserver/pkg/server/options/encryptionconfig:go_default_library", "//vendor/k8s.io/apiserver/pkg/storage/value/encrypt/envelope:go_default_library", "//vendor/k8s.io/client-go/kubernetes:go_default_library", + "//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library", + "//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library", "//vendor/k8s.io/client-go/tools/cache:go_default_library", + "//vendor/k8s.io/client-go/tools/record:go_default_library", "//vendor/k8s.io/client-go/util/flowcontrol:go_default_library", ], ) diff --git a/pkg/cloudprovider/providers/gce/gce.go b/pkg/cloudprovider/providers/gce/gce.go index b28f61c6bcc..fb15109c693 100644 --- a/pkg/cloudprovider/providers/gce/gce.go +++ b/pkg/cloudprovider/providers/gce/gce.go @@ -30,10 +30,15 @@ import ( "cloud.google.com/go/compute/metadata" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/server/options/encryptionconfig" "k8s.io/apiserver/pkg/storage/value/encrypt/envelope" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/tools/record" "k8s.io/client-go/util/flowcontrol" "k8s.io/kubernetes/pkg/cloudprovider" "k8s.io/kubernetes/pkg/controller" @@ -99,7 +104,10 @@ type GCECloud struct { serviceAlpha *computealpha.Service containerService *container.Service cloudkmsService *cloudkms.Service + client clientset.Interface clientBuilder controller.ControllerClientBuilder + eventBroadcaster record.EventBroadcaster + eventRecorder record.EventRecorder projectID string region string localZone string // The zone in which we are running @@ -531,6 +539,14 @@ func tryConvertToProjectNames(configProject, configNetworkProject string, servic // This must be called before utilizing the funcs of gce.ClusterID func (gce *GCECloud) Initialize(clientBuilder controller.ControllerClientBuilder) { gce.clientBuilder = clientBuilder + gce.client = clientBuilder.ClientOrDie("cloud-provider") + + if gce.OnXPN() { + gce.eventBroadcaster = record.NewBroadcaster() + gce.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(gce.client.Core().RESTClient()).Events("")}) + gce.eventRecorder = gce.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "gce-cloudprovider"}) + } + go gce.watchClusterID() } diff --git a/pkg/cloudprovider/providers/gce/gce_clusterid.go b/pkg/cloudprovider/providers/gce/gce_clusterid.go index efcadc1ca25..6f1b667c8ef 100644 --- a/pkg/cloudprovider/providers/gce/gce_clusterid.go +++ b/pkg/cloudprovider/providers/gce/gce_clusterid.go @@ -62,7 +62,7 @@ type ClusterID struct { func (gce *GCECloud) watchClusterID() { gce.ClusterID = ClusterID{ cfgMapKey: fmt.Sprintf("%v/%v", UIDNamespace, UIDConfigMapName), - client: gce.clientBuilder.ClientOrDie("cloud-provider"), + client: gce.client, } mapEventHandler := cache.ResourceEventHandlerFuncs{ diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_external.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_external.go index 0b8d6e03830..c4f3beccb67 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_external.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_external.go @@ -181,13 +181,13 @@ func (gce *GCECloud) ensureExternalLoadBalancer(clusterName, clusterID string, a // without needing to be deleted and recreated. if firewallExists { glog.Infof("EnsureLoadBalancer(%v(%v)): updating firewall", loadBalancerName, serviceName) - if err := gce.updateFirewall(makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil { + if err := gce.updateFirewall(apiService, makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil { return nil, err } glog.Infof("EnsureLoadBalancer(%v(%v)): updated firewall", loadBalancerName, serviceName) } else { glog.Infof("EnsureLoadBalancer(%v(%v)): creating firewall", loadBalancerName, serviceName) - if err := gce.createFirewall(makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil { + if err := gce.createFirewall(apiService, makeFirewallName(loadBalancerName), gce.region, desc, sourceRanges, ports, hosts); err != nil { return nil, err } glog.Infof("EnsureLoadBalancer(%v(%v)): created firewall", loadBalancerName, serviceName) @@ -259,7 +259,7 @@ func (gce *GCECloud) ensureExternalLoadBalancer(clusterName, clusterID string, a if hcToDelete != nil { hcNames = append(hcNames, hcToDelete.Name) } - if err := gce.DeleteExternalTargetPoolAndChecks(loadBalancerName, gce.region, clusterID, hcNames...); err != nil { + if err := gce.DeleteExternalTargetPoolAndChecks(apiService, loadBalancerName, gce.region, clusterID, hcNames...); err != nil { return nil, fmt.Errorf("failed to delete existing target pool %s for load balancer update: %v", loadBalancerName, err) } glog.Infof("EnsureLoadBalancer(%v(%v)): deleted target pool", loadBalancerName, serviceName) @@ -273,7 +273,7 @@ func (gce *GCECloud) ensureExternalLoadBalancer(clusterName, clusterID string, a createInstances = createInstances[:maxTargetPoolCreateInstances] } // Pass healthchecks to createTargetPool which needs them as health check links in the target pool - if err := gce.createTargetPool(loadBalancerName, serviceName.String(), ipAddressToUse, gce.region, clusterID, createInstances, affinityType, hcToCreate); err != nil { + if err := gce.createTargetPool(apiService, loadBalancerName, serviceName.String(), ipAddressToUse, gce.region, clusterID, createInstances, affinityType, hcToCreate); err != nil { return nil, fmt.Errorf("failed to create target pool %s: %v", loadBalancerName, err) } if hcToCreate != nil { @@ -355,7 +355,16 @@ func (gce *GCECloud) ensureExternalLoadBalancerDeleted(clusterName, clusterID st } errs := utilerrors.AggregateGoroutines( - func() error { return ignoreNotFound(gce.DeleteFirewall(makeFirewallName(loadBalancerName))) }, + func() error { + fwName := makeFirewallName(loadBalancerName) + err := ignoreNotFound(gce.DeleteFirewall(fwName)) + if isForbidden(err) && gce.OnXPN() { + glog.V(4).Infof("ensureExternalLoadBalancerDeleted(%v): do not have permission to delete firewall rule (on XPN). Raising event.", loadBalancerName) + gce.raiseFirewallChangeNeededEvent(service, FirewallToGCloudDeleteCmd(fwName, gce.NetworkProjectID())) + return nil + } + return err + }, // Even though we don't hold on to static IPs for load balancers, it's // possible that EnsureLoadBalancer left one around in a failed // creation/update attempt, so make sure we clean it up here just in case. @@ -366,7 +375,7 @@ func (gce *GCECloud) ensureExternalLoadBalancerDeleted(clusterName, clusterID st if err := ignoreNotFound(gce.DeleteRegionForwardingRule(loadBalancerName, gce.region)); err != nil { return err } - if err := gce.DeleteExternalTargetPoolAndChecks(loadBalancerName, gce.region, clusterID, hcNames...); err != nil { + if err := gce.DeleteExternalTargetPoolAndChecks(service, loadBalancerName, gce.region, clusterID, hcNames...); err != nil { return err } return nil @@ -378,7 +387,7 @@ func (gce *GCECloud) ensureExternalLoadBalancerDeleted(clusterName, clusterID st return nil } -func (gce *GCECloud) DeleteExternalTargetPoolAndChecks(name, region, clusterID string, hcNames ...string) error { +func (gce *GCECloud) DeleteExternalTargetPoolAndChecks(service *v1.Service, name, region, clusterID string, hcNames ...string) error { if err := gce.DeleteTargetPool(name, region); err != nil && isHTTPErrorCode(err, http.StatusNotFound) { glog.Infof("Target pool %s already deleted. Continuing to delete other resources.", name) } else if err != nil { @@ -420,9 +429,10 @@ func (gce *GCECloud) DeleteExternalTargetPoolAndChecks(name, region, clusterID s // So we should delete the health check firewall as well. fwName := MakeHealthCheckFirewallName(clusterID, hcName, isNodesHealthCheck) glog.Infof("Deleting firewall %v.", fwName) - if err := gce.DeleteFirewall(fwName); err != nil { - if isHTTPErrorCode(err, http.StatusNotFound) { - glog.V(4).Infof("Firewall %v is already deleted.", fwName) + if err := ignoreNotFound(gce.DeleteFirewall(fwName)); err != nil { + if isForbidden(err) && gce.OnXPN() { + glog.V(4).Infof("DeleteExternalTargetPoolAndChecks(%v): do not have permission to delete firewall rule (on XPN). Raising event.", hcName) + gce.raiseFirewallChangeNeededEvent(service, FirewallToGCloudDeleteCmd(fwName, gce.NetworkProjectID())) return nil } return err @@ -486,7 +496,7 @@ func verifyUserRequestedIP(s CloudAddressService, region, requestedIP, fwdRuleIP return false, fmt.Errorf("requested ip %q is neither static nor assigned to the LB", requestedIP) } -func (gce *GCECloud) createTargetPool(name, serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, affinityType v1.ServiceAffinity, hc *compute.HttpHealthCheck) error { +func (gce *GCECloud) createTargetPool(svc *v1.Service, name, serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, affinityType v1.ServiceAffinity, hc *compute.HttpHealthCheck) error { // health check management is coupled with targetPools to prevent leaks. A // target pool is the only thing that requires a health check, so we delete // associated checks on teardown, and ensure checks on setup. @@ -499,10 +509,9 @@ func (gce *GCECloud) createTargetPool(name, serviceName, ipAddress, region, clus gce.sharedResourceLock.Lock() defer gce.sharedResourceLock.Unlock() } - if !gce.OnXPN() { - if err := gce.ensureHttpHealthCheckFirewall(serviceName, ipAddress, region, clusterID, hosts, hc.Name, int32(hc.Port), isNodesHealthCheck); err != nil { - return err - } + + if err := gce.ensureHttpHealthCheckFirewall(svc, serviceName, ipAddress, region, clusterID, hosts, hc.Name, int32(hc.Port), isNodesHealthCheck); err != nil { + return err } var err error if hc, err = gce.ensureHttpHealthCheck(hc.Name, hc.RequestPath, int32(hc.Port)); err != nil || hc == nil { @@ -751,11 +760,6 @@ func translateAffinityType(affinityType v1.ServiceAffinity) string { } func (gce *GCECloud) firewallNeedsUpdate(name, serviceName, region, ipAddress string, ports []v1.ServicePort, sourceRanges netsets.IPNet) (exists bool, needsUpdate bool, err error) { - if gce.OnXPN() { - glog.V(2).Infoln("firewallNeedsUpdate: Cluster is on XPN network - skipping firewall creation") - return false, false, nil - } - fw, err := gce.service.Firewalls.Get(gce.NetworkProjectID(), makeFirewallName(name)).Do() if err != nil { if isHTTPErrorCode(err, http.StatusNotFound) { @@ -793,7 +797,7 @@ func (gce *GCECloud) firewallNeedsUpdate(name, serviceName, region, ipAddress st return true, false, nil } -func (gce *GCECloud) ensureHttpHealthCheckFirewall(serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, hcName string, hcPort int32, isNodesHealthCheck bool) error { +func (gce *GCECloud) ensureHttpHealthCheckFirewall(svc *v1.Service, serviceName, ipAddress, region, clusterID string, hosts []*gceInstance, hcName string, hcPort int32, isNodesHealthCheck bool) error { // Prepare the firewall params for creating / checking. desc := fmt.Sprintf(`{"kubernetes.io/cluster-id":"%s"}`, clusterID) if !isNodesHealthCheck { @@ -809,7 +813,7 @@ func (gce *GCECloud) ensureHttpHealthCheckFirewall(serviceName, ipAddress, regio return fmt.Errorf("error getting firewall for health checks: %v", err) } glog.Infof("Creating firewall %v for health checks.", fwName) - if err := gce.createFirewall(fwName, region, desc, sourceRanges, ports, hosts); err != nil { + if err := gce.createFirewall(svc, fwName, region, desc, sourceRanges, ports, hosts); err != nil { return err } glog.Infof("Created firewall %v for health checks.", fwName) @@ -822,7 +826,7 @@ func (gce *GCECloud) ensureHttpHealthCheckFirewall(serviceName, ipAddress, regio !equalStringSets(fw.Allowed[0].Ports, []string{strconv.Itoa(int(ports[0].Port))}) || !equalStringSets(fw.SourceRanges, sourceRanges.StringSlice()) { glog.Warningf("Firewall %v exists but parameters have drifted - updating...", fwName) - if err := gce.updateFirewall(fwName, region, desc, sourceRanges, ports, hosts); err != nil { + if err := gce.updateFirewall(svc, fwName, region, desc, sourceRanges, ports, hosts); err != nil { glog.Warningf("Failed to reconcile firewall %v parameters.", fwName) return err } @@ -870,24 +874,38 @@ func createForwardingRule(s CloudForwardingRuleService, name, serviceName, regio return nil } -func (gce *GCECloud) createFirewall(name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error { +func (gce *GCECloud) createFirewall(svc *v1.Service, name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error { firewall, err := gce.firewallObject(name, region, desc, sourceRanges, ports, hosts) if err != nil { return err } - if err = gce.CreateFirewall(firewall); err != nil && !isHTTPErrorCode(err, http.StatusConflict) { + if err = gce.CreateFirewall(firewall); err != nil { + if isHTTPErrorCode(err, http.StatusConflict) { + return nil + } else if isForbidden(err) && gce.OnXPN() { + glog.V(4).Infof("createFirewall(%v): do not have permission to create firewall rule (on XPN). Raising event.", firewall.Name) + gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudCreateCmd(firewall, gce.NetworkProjectID())) + return nil + } return err } return nil } -func (gce *GCECloud) updateFirewall(name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error { +func (gce *GCECloud) updateFirewall(svc *v1.Service, name, region, desc string, sourceRanges netsets.IPNet, ports []v1.ServicePort, hosts []*gceInstance) error { firewall, err := gce.firewallObject(name, region, desc, sourceRanges, ports, hosts) if err != nil { return err } - if err = gce.UpdateFirewall(firewall); err != nil && !isHTTPErrorCode(err, http.StatusConflict) { + if err = gce.UpdateFirewall(firewall); err != nil { + if isHTTPErrorCode(err, http.StatusConflict) { + return nil + } else if isForbidden(err) && gce.OnXPN() { + glog.V(4).Infof("updateFirewall(%v): do not have permission to update firewall rule (on XPN). Raising event.", firewall.Name) + gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudUpdateCmd(firewall, gce.NetworkProjectID())) + return nil + } return err } return nil diff --git a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go index 60b8cfbc9d4..31d770c849a 100644 --- a/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go +++ b/pkg/cloudprovider/providers/gce/gce_loadbalancer_internal.go @@ -34,8 +34,6 @@ const ( allInstances = "ALL" ) -type lbBalancingMode string - func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v1.Service, existingFwdRule *compute.ForwardingRule, nodes []*v1.Node) (*v1.LoadBalancerStatus, error) { nm := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace} ports, protocol := getPortsAndProtocol(svc.Spec.Ports) @@ -90,12 +88,8 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s glog.V(2).Infof("ensureInternalLoadBalancer(%v): reserved IP %q for the forwarding rule", loadBalancerName, ipToUse) // Ensure firewall rules if necessary - if gce.OnXPN() { - glog.V(2).Infof("ensureInternalLoadBalancer: cluster is on a cross-project network (XPN) network project %v, compute project %v - skipping firewall creation", gce.networkProjectID, gce.projectID) - } else { - if err = gce.ensureInternalFirewalls(loadBalancerName, ipToUse, clusterID, nm, svc, strconv.Itoa(int(hcPort)), sharedHealthCheck, nodes); err != nil { - return nil, err - } + if err = gce.ensureInternalFirewalls(loadBalancerName, ipToUse, clusterID, nm, svc, strconv.Itoa(int(hcPort)), sharedHealthCheck, nodes); err != nil { + return nil, err } expectedFwdRule := &compute.ForwardingRule{ @@ -141,7 +135,7 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s // Delete the previous internal load balancer resources if necessary if existingBackendService != nil { - gce.clearPreviousInternalResources(loadBalancerName, existingBackendService, backendServiceName, hcName) + gce.clearPreviousInternalResources(svc, loadBalancerName, existingBackendService, backendServiceName, hcName) } // Now that the controller knows the forwarding rule exists, we can release the address. @@ -154,7 +148,7 @@ func (gce *GCECloud) ensureInternalLoadBalancer(clusterName, clusterID string, s return status, nil } -func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, existingBackendService *compute.BackendService, expectedBSName, expectedHCName string) { +func (gce *GCECloud) clearPreviousInternalResources(svc *v1.Service, loadBalancerName string, existingBackendService *compute.BackendService, expectedBSName, expectedHCName string) { // If a new backend service was created, delete the old one. if existingBackendService.Name != expectedBSName { glog.V(2).Infof("clearPreviousInternalResources(%v): expected backend service %q does not match previous %q - deleting backend service", loadBalancerName, expectedBSName, existingBackendService.Name) @@ -168,7 +162,7 @@ func (gce *GCECloud) clearPreviousInternalResources(loadBalancerName string, exi existingHCName := getNameFromLink(existingBackendService.HealthChecks[0]) if existingHCName != expectedHCName { glog.V(2).Infof("clearPreviousInternalResources(%v): expected health check %q does not match previous %q - deleting health check", loadBalancerName, expectedHCName, existingHCName) - if err := gce.teardownInternalHealthCheckAndFirewall(existingHCName); err != nil { + if err := gce.teardownInternalHealthCheckAndFirewall(svc, existingHCName); err != nil { glog.Warningf("clearPreviousInternalResources: could not delete existing healthcheck: %v, err: %v", existingHCName, err) } } @@ -224,12 +218,17 @@ func (gce *GCECloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID st glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting firewall for traffic", loadBalancerName) if err := gce.DeleteFirewall(loadBalancerName); err != nil { - return err + if isForbidden(err) && gce.OnXPN() { + glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): could not delete traffic firewall on XPN cluster. Raising event.", loadBalancerName) + gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudDeleteCmd(loadBalancerName, gce.NetworkProjectID())) + } else { + return err + } } hcName := makeHealthCheckName(loadBalancerName, clusterID, sharedHealthCheck) glog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): deleting health check %v and its firewall", loadBalancerName, hcName) - if err := gce.teardownInternalHealthCheckAndFirewall(hcName); err != nil { + if err := gce.teardownInternalHealthCheckAndFirewall(svc, hcName); err != nil { return err } @@ -258,7 +257,7 @@ func (gce *GCECloud) teardownInternalBackendService(bsName string) error { return nil } -func (gce *GCECloud) teardownInternalHealthCheckAndFirewall(hcName string) error { +func (gce *GCECloud) teardownInternalHealthCheckAndFirewall(svc *v1.Service, hcName string) error { if err := gce.DeleteHealthCheck(hcName); err != nil { if isNotFound(err) { glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check does not exist.", hcName) @@ -274,13 +273,19 @@ func (gce *GCECloud) teardownInternalHealthCheckAndFirewall(hcName string) error hcFirewallName := makeHealthCheckFirewallNameFromHC(hcName) if err := gce.DeleteFirewall(hcFirewallName); err != nil && !isNotFound(err) { + if isForbidden(err) && gce.OnXPN() { + glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): could not delete health check traffic firewall on XPN cluster. Raising Event.", hcName) + gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudDeleteCmd(hcFirewallName, gce.NetworkProjectID())) + return nil + } + return fmt.Errorf("failed to delete health check firewall: %v, err: %v", hcFirewallName, err) } glog.V(2).Infof("teardownInternalHealthCheckAndFirewall(%v): health check firewall deleted", hcFirewallName) return nil } -func (gce *GCECloud) ensureInternalFirewall(fwName, fwDesc string, sourceRanges []string, ports []string, protocol v1.Protocol, nodes []*v1.Node) error { +func (gce *GCECloud) ensureInternalFirewall(svc *v1.Service, fwName, fwDesc string, sourceRanges []string, ports []string, protocol v1.Protocol, nodes []*v1.Node) error { glog.V(2).Infof("ensureInternalFirewall(%v): checking existing firewall", fwName) targetTags, err := gce.GetNodeTags(nodeNames(nodes)) if err != nil { @@ -308,7 +313,13 @@ func (gce *GCECloud) ensureInternalFirewall(fwName, fwDesc string, sourceRanges if existingFirewall == nil { glog.V(2).Infof("ensureInternalFirewall(%v): creating firewall", fwName) - return gce.CreateFirewall(expectedFirewall) + err = gce.CreateFirewall(expectedFirewall) + if err != nil && isForbidden(err) && gce.OnXPN() { + glog.V(2).Infof("ensureInternalFirewall(%v): do not have permission to create firewall rule (on XPN). Raising event.", fwName) + gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudCreateCmd(expectedFirewall, gce.NetworkProjectID())) + return nil + } + return err } if firewallRuleEqual(expectedFirewall, existingFirewall) { @@ -316,7 +327,13 @@ func (gce *GCECloud) ensureInternalFirewall(fwName, fwDesc string, sourceRanges } glog.V(2).Infof("ensureInternalFirewall(%v): updating firewall", fwName) - return gce.UpdateFirewall(expectedFirewall) + err = gce.UpdateFirewall(expectedFirewall) + if err != nil && isForbidden(err) && gce.OnXPN() { + glog.V(2).Infof("ensureInternalFirewall(%v): do not have permission to update firewall rule (on XPN). Raising event.", fwName) + gce.raiseFirewallChangeNeededEvent(svc, FirewallToGCloudUpdateCmd(expectedFirewall, gce.NetworkProjectID())) + return nil + } + return err } func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, ipAddress, clusterID string, nm types.NamespacedName, svc *v1.Service, healthCheckPort string, sharedHealthCheck bool, nodes []*v1.Node) error { @@ -327,7 +344,7 @@ func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, ipAddress, cluste if err != nil { return err } - err = gce.ensureInternalFirewall(loadBalancerName, fwDesc, sourceRanges.StringSlice(), ports, protocol, nodes) + err = gce.ensureInternalFirewall(svc, loadBalancerName, fwDesc, sourceRanges.StringSlice(), ports, protocol, nodes) if err != nil { return err } @@ -335,7 +352,7 @@ func (gce *GCECloud) ensureInternalFirewalls(loadBalancerName, ipAddress, cluste // Second firewall is for health checking nodes / services fwHCName := makeHealthCheckFirewallName(loadBalancerName, clusterID, sharedHealthCheck) hcSrcRanges := LoadBalancerSrcRanges() - return gce.ensureInternalFirewall(fwHCName, "", hcSrcRanges, []string{healthCheckPort}, v1.ProtocolTCP, nodes) + return gce.ensureInternalFirewall(svc, fwHCName, "", hcSrcRanges, []string{healthCheckPort}, v1.ProtocolTCP, nodes) } func (gce *GCECloud) ensureInternalHealthCheck(name string, svcName types.NamespacedName, shared bool, path string, port int32) (*compute.HealthCheck, error) { diff --git a/pkg/cloudprovider/providers/gce/gce_util.go b/pkg/cloudprovider/providers/gce/gce_util.go index f21d5113375..484a67e0b37 100644 --- a/pkg/cloudprovider/providers/gce/gce_util.go +++ b/pkg/cloudprovider/providers/gce/gce_util.go @@ -23,6 +23,7 @@ import ( "regexp" "strings" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" @@ -58,6 +59,43 @@ func getProjectAndZone() (string, string, error) { return projectID, zone, nil } +func (gce *GCECloud) raiseFirewallChangeNeededEvent(svc *v1.Service, cmd string) { + msg := fmt.Sprintf("Firewall change required by network admin: `%v`", cmd) + if gce.eventRecorder != nil && svc != nil { + gce.eventRecorder.Event(svc, v1.EventTypeNormal, "LoadBalancerManualChange", msg) + } +} + +// FirewallToGCloudCreateCmd generates a gcloud command to create a firewall with specified params +func FirewallToGCloudCreateCmd(fw *compute.Firewall, projectID string) string { + args := firewallToGcloudArgs(fw, projectID) + return fmt.Sprintf("gcloud compute firewall-rules create %v --network %v %v", fw.Name, getNameFromLink(fw.Network), args) +} + +// FirewallToGCloudCreateCmd generates a gcloud command to update a firewall to specified params +func FirewallToGCloudUpdateCmd(fw *compute.Firewall, projectID string) string { + args := firewallToGcloudArgs(fw, projectID) + return fmt.Sprintf("gcloud compute firewall-rules update %v %v", fw.Name, args) +} + +// FirewallToGCloudCreateCmd generates a gcloud command to delete a firewall to specified params +func FirewallToGCloudDeleteCmd(fwName, projectID string) string { + return fmt.Sprintf("gcloud compute firewall-rules delete %v --project %v", fwName, projectID) +} + +func firewallToGcloudArgs(fw *compute.Firewall, projectID string) string { + var allPorts []string + for _, a := range fw.Allowed { + for _, p := range a.Ports { + allPorts = append(allPorts, fmt.Sprintf("%v:%v", a.IPProtocol, p)) + } + } + allow := strings.Join(allPorts, ",") + srcRngs := strings.Join(fw.SourceRanges, ",") + targets := strings.Join(fw.TargetTags, ",") + return fmt.Sprintf("--description %q --allow %v --source-ranges %v --target-tags %v --project %v", fw.Description, allow, srcRngs, targets, projectID) +} + // Take a GCE instance 'hostname' and break it down to something that can be fed // to the GCE API client library. Basically this means reducing 'kubernetes- // node-2.c.my-proj.internal' to 'kubernetes-node-2' if necessary. @@ -150,6 +188,10 @@ func isNotFoundOrInUse(err error) bool { return isNotFound(err) || isInUsedByError(err) } +func isForbidden(err error) bool { + return isHTTPErrorCode(err, http.StatusForbidden) +} + func makeGoogleAPINotFoundError(message string) error { return &googleapi.Error{Code: http.StatusNotFound, Message: message} } @@ -158,10 +200,6 @@ func makeGoogleAPIError(code int, message string) error { return &googleapi.Error{Code: code, Message: message} } -func isForbidden(err error) bool { - return isHTTPErrorCode(err, http.StatusForbidden) -} - // TODO(#51665): Remove this once Network Tiers becomes Beta in GCP. func handleAlphaNetworkTierGetError(err error) (string, error) { if isForbidden(err) { diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index cbe8d94e710..5138aaed214 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -4780,7 +4780,7 @@ func CleanupGCEResources(c clientset.Interface, loadBalancerName, zone string) ( retErr = fmt.Errorf("%v\n%v", retErr, err) return } - if err := gceCloud.DeleteExternalTargetPoolAndChecks(loadBalancerName, region, clusterID, hcNames...); err != nil && + if err := gceCloud.DeleteExternalTargetPoolAndChecks(nil, loadBalancerName, region, clusterID, hcNames...); err != nil && !IsGoogleAPIHTTPErrorCode(err, http.StatusNotFound) { retErr = fmt.Errorf("%v\n%v", retErr, err) }