diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/BUILD b/staging/src/k8s.io/legacy-cloud-providers/gce/BUILD index 33621e890b0..aa96aaf32b2 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/BUILD +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/BUILD @@ -24,6 +24,7 @@ go_library( "gce_loadbalancer.go", "gce_loadbalancer_external.go", "gce_loadbalancer_internal.go", + "gce_loadbalancer_metrics.go", "gce_loadbalancer_naming.go", "gce_networkendpointgroup.go", "gce_routes.go", @@ -98,6 +99,7 @@ go_test( "gce_healthchecks_test.go", "gce_loadbalancer_external_test.go", "gce_loadbalancer_internal_test.go", + "gce_loadbalancer_metrics_test.go", "gce_loadbalancer_test.go", "gce_loadbalancer_utils_test.go", "gce_test.go", @@ -116,6 +118,7 @@ go_test( "//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud:go_default_library", "//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta:go_default_library", "//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/mock:go_default_library", + "//vendor/github.com/google/go-cmp/cmp:go_default_library", "//vendor/github.com/stretchr/testify/assert:go_default_library", "//vendor/github.com/stretchr/testify/require:go_default_library", "//vendor/golang.org/x/oauth2/google:go_default_library", diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go index 6ba69811b6b..e3c26c1ef8c 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go @@ -163,6 +163,8 @@ type Cloud struct { // Keep a reference of this around so we can inject a new cloud.RateLimiter implementation. s *cloud.Service + + metricsCollector loadbalancerMetricsCollector } // ConfigGlobal is the in memory representation of the gce.conf config data @@ -518,6 +520,7 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) { operationPollRateLimiter: operationPollRateLimiter, AlphaFeatureGate: config.AlphaFeatureGate, nodeZones: map[string]sets.String{}, + metricsCollector: newLoadBalancerMetrics(), } gce.manager = &gceServiceManager{gce} @@ -643,6 +646,7 @@ func (g *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, g.eventRecorder = g.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "g-cloudprovider"}) go g.watchClusterID(stop) + go g.metricsCollector.Run(stop) } // LoadBalancer returns an implementation of LoadBalancer for Google Compute Engine. diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_fake.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_fake.go index 72936211fdc..4f9a8d080fa 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_fake.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_fake.go @@ -76,6 +76,7 @@ func NewFakeGCECloud(vals TestClusterValues) *Cloud { networkProjectID: vals.ProjectID, ClusterID: fakeClusterID(vals.ClusterID), onXPN: vals.OnXPN, + metricsCollector: newLoadBalancerMetrics(), } c := cloud.NewMockGCE(&gceProjectRouter{gce}) gce.c = c diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go index c3062448dd1..ef285b308ec 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go @@ -63,6 +63,16 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v return nil, cloudprovider.ImplementedElsewhere } + nm := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace} + + var serviceState L4ILBServiceState + // Mark the service InSuccess state as false to begin with. + // This will be updated to true if the VIP is configured successfully. + serviceState.InSuccess = false + defer func() { + g.metricsCollector.SetL4ILBService(nm.String(), serviceState) + }() + loadBalancerName := g.GetLoadBalancerName(context.TODO(), clusterName, svc) klog.V(2).Infof("ensureInternalLoadBalancer(%v): Attaching %q finalizer", loadBalancerName, ILBFinalizerV1) if err := addFinalizer(svc, g.client.CoreV1(), ILBFinalizerV1); err != nil { @@ -70,7 +80,6 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v return nil, err } - nm := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace} ports, _, protocol := getPortsAndProtocol(svc.Spec.Ports) if protocol != v1.ProtocolTCP && protocol != v1.ProtocolUDP { return nil, fmt.Errorf("Invalid protocol %s, only TCP and UDP are supported", string(protocol)) @@ -213,6 +222,18 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v return nil, err } + serviceState.InSuccess = true + if options.AllowGlobalAccess { + serviceState.EnabledGlobalAccess = true + } + // SubnetName is overridden to nil value if Alpha feature gate for custom subnet + // is not enabled. So, a non empty subnet name at this point implies that the + // feature is in use. + if options.SubnetName != "" { + serviceState.EnabledCustomSubnet = true + } + klog.V(6).Infof("Internal Loadbalancer for Service %s ensured, updating its state %v in metrics cache", nm, serviceState) + status := &v1.LoadBalancerStatus{} status.Ingress = []v1.LoadBalancerIngress{{IP: updatedFwdRule.IPAddress}} return status, nil @@ -267,6 +288,7 @@ func (g *Cloud) updateInternalLoadBalancer(clusterName, clusterID string, svc *v func (g *Cloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID string, svc *v1.Service) error { loadBalancerName := g.GetLoadBalancerName(context.TODO(), clusterName, svc) + svcNamespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace} _, _, protocol := getPortsAndProtocol(svc.Spec.Ports) scheme := cloud.SchemeInternal sharedBackend := shareBackendService(svc) @@ -326,10 +348,12 @@ func (g *Cloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID string, klog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): Removing %q finalizer", loadBalancerName, ILBFinalizerV1) if err := removeFinalizer(svc, g.client.CoreV1(), ILBFinalizerV1); err != nil { - klog.Errorf("Failed to remove finalizer '%s' on service %s/%s - %v", ILBFinalizerV1, svc.Namespace, svc.Name, err) + klog.Errorf("Failed to remove finalizer '%s' on service %s - %v", ILBFinalizerV1, svcNamespacedName, err) return err } + klog.V(6).Infof("Internal Loadbalancer for Service %s deleted, removing its state from metrics cache", svcNamespacedName) + g.metricsCollector.DeleteL4ILBService(svcNamespacedName.String()) return nil } diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_metrics.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_metrics.go new file mode 100644 index 00000000000..b2d223b2636 --- /dev/null +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_metrics.go @@ -0,0 +1,181 @@ +// +build !providerless + +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gce + +import ( + "sync" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/klog/v2" +) + +const ( + label = "feature" +) + +var ( + metricsInterval = 10 * time.Minute + l4ILBCount = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Name: "number_of_l4_ilbs", + Help: "Number of L4 ILBs", + StabilityLevel: metrics.ALPHA, + }, + []string{label}, + ) +) + +// init registers L4 internal loadbalancer usage metrics. +func init() { + klog.V(3).Infof("Registering Service Controller loadbalancer usage metrics %v", l4ILBCount) + legacyregistry.MustRegister(l4ILBCount) +} + +// LoadBalancerMetrics is a cache that contains loadbalancer service resource +// states for computing usage metrics. +type LoadBalancerMetrics struct { + // l4ILBServiceMap is a map of service key and L4 ILB service state. + l4ILBServiceMap map[string]L4ILBServiceState + + sync.Mutex +} + +type feature string + +func (f feature) String() string { + return string(f) +} + +const ( + l4ILBService = feature("L4ILBService") + l4ILBGlobalAccess = feature("L4ILBGlobalAccess") + l4ILBCustomSubnet = feature("L4ILBCustomSubnet") + // l4ILBInSuccess feature specifies that ILB VIP is configured. + l4ILBInSuccess = feature("L4ILBInSuccess") + // l4ILBInInError feature specifies that an error had occurred for this service + // in ensureInternalLoadbalancer method. + l4ILBInError = feature("L4ILBInError") +) + +// L4ILBServiceState contains Internal Loadbalancer feature states as specified +// in k8s Service. +type L4ILBServiceState struct { + // EnabledGlobalAccess specifies if Global Access is enabled. + EnabledGlobalAccess bool + // EnabledCustomSubNet specifies if Custom Subnet is enabled. + EnabledCustomSubnet bool + // InSuccess specifies if the ILB service VIP is configured. + InSuccess bool +} + +// loadbalancerMetricsCollector is an interface to update/delete L4 loadbalancer +// states in the cache that is used for computing L4 Loadbalancer usage metrics. +type loadbalancerMetricsCollector interface { + // Run starts a goroutine to compute and export metrics a periodic interval. + Run(stopCh <-chan struct{}) + // SetL4ILBService adds/updates L4 ILB service state for given service key. + SetL4ILBService(svcKey string, state L4ILBServiceState) + // DeleteL4ILBService removes the given L4 ILB service key. + DeleteL4ILBService(svcKey string) +} + +// newLoadBalancerMetrics initializes LoadBalancerMetrics and starts a goroutine +// to compute and export metrics periodically. +func newLoadBalancerMetrics() loadbalancerMetricsCollector { + return &LoadBalancerMetrics{ + l4ILBServiceMap: make(map[string]L4ILBServiceState), + } +} + +// Run implements loadbalancerMetricsCollector. +func (lm *LoadBalancerMetrics) Run(stopCh <-chan struct{}) { + klog.V(3).Infof("Loadbalancer Metrics initialized. Metrics will be exported at an interval of %v", metricsInterval) + // Compute and export metrics periodically. + go func() { + // Wait for service states to be populated in the cache before computing metrics. + time.Sleep(metricsInterval) + wait.Until(lm.export, metricsInterval, stopCh) + }() + <-stopCh +} + +// SetL4ILBService implements loadbalancerMetricsCollector. +func (lm *LoadBalancerMetrics) SetL4ILBService(svcKey string, state L4ILBServiceState) { + lm.Lock() + defer lm.Unlock() + + if lm.l4ILBServiceMap == nil { + klog.Fatalf("Loadbalancer Metrics failed to initialize correctly.") + } + lm.l4ILBServiceMap[svcKey] = state +} + +// DeleteL4ILBService implements loadbalancerMetricsCollector. +func (lm *LoadBalancerMetrics) DeleteL4ILBService(svcKey string) { + lm.Lock() + defer lm.Unlock() + + delete(lm.l4ILBServiceMap, svcKey) +} + +// export computes and exports loadbalancer usage metrics. +func (lm *LoadBalancerMetrics) export() { + ilbCount := lm.computeL4ILBMetrics() + klog.V(5).Infof("Exporting L4 ILB usage metrics: %#v", ilbCount) + for feature, count := range ilbCount { + l4ILBCount.With(map[string]string{label: feature.String()}).Set(float64(count)) + } + klog.V(5).Infof("L4 ILB usage metrics exported.") +} + +// computeL4ILBMetrics aggregates L4 ILB metrics in the cache. +func (lm *LoadBalancerMetrics) computeL4ILBMetrics() map[feature]int { + lm.Lock() + defer lm.Unlock() + klog.V(4).Infof("Computing L4 ILB usage metrics from service state map: %#v", lm.l4ILBServiceMap) + counts := map[feature]int{ + l4ILBService: 0, + l4ILBGlobalAccess: 0, + l4ILBCustomSubnet: 0, + l4ILBInSuccess: 0, + l4ILBInError: 0, + } + + for key, state := range lm.l4ILBServiceMap { + klog.V(6).Infof("ILB Service %s has EnabledGlobalAccess: %t, EnabledCustomSubnet: %t, InSuccess: %t", key, state.EnabledGlobalAccess, state.EnabledCustomSubnet, state.InSuccess) + counts[l4ILBService]++ + if !state.InSuccess { + counts[l4ILBInError]++ + // Skip counting other features if the service is in error state. + continue + } + counts[l4ILBInSuccess]++ + if state.EnabledGlobalAccess { + counts[l4ILBGlobalAccess]++ + } + if state.EnabledCustomSubnet { + counts[l4ILBCustomSubnet]++ + } + } + klog.V(4).Info("L4 ILB usage metrics computed.") + return counts +} diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_metrics_test.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_metrics_test.go new file mode 100644 index 00000000000..106b8273251 --- /dev/null +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_metrics_test.go @@ -0,0 +1,168 @@ +// +build !providerless + +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gce + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestComputeL4ILBMetrics(t *testing.T) { + t.Parallel() + for _, tc := range []struct { + desc string + serviceStates []L4ILBServiceState + expectL4ILBCount map[feature]int + }{ + { + desc: "empty input", + serviceStates: []L4ILBServiceState{}, + expectL4ILBCount: map[feature]int{ + l4ILBService: 0, + l4ILBGlobalAccess: 0, + l4ILBCustomSubnet: 0, + l4ILBInSuccess: 0, + l4ILBInError: 0, + }, + }, + { + desc: "one l4 ilb service", + serviceStates: []L4ILBServiceState{ + newL4ILBServiceState(false, false, true), + }, + expectL4ILBCount: map[feature]int{ + l4ILBService: 1, + l4ILBGlobalAccess: 0, + l4ILBCustomSubnet: 0, + l4ILBInSuccess: 1, + l4ILBInError: 0, + }, + }, + { + desc: "l4 ilb service in error state", + serviceStates: []L4ILBServiceState{ + newL4ILBServiceState(false, true, false), + }, + expectL4ILBCount: map[feature]int{ + l4ILBService: 1, + l4ILBGlobalAccess: 0, + l4ILBCustomSubnet: 0, + l4ILBInSuccess: 0, + l4ILBInError: 1, + }, + }, + { + desc: "global access for l4 ilb service enabled", + serviceStates: []L4ILBServiceState{ + newL4ILBServiceState(true, false, true), + }, + expectL4ILBCount: map[feature]int{ + l4ILBService: 1, + l4ILBGlobalAccess: 1, + l4ILBCustomSubnet: 0, + l4ILBInSuccess: 1, + l4ILBInError: 0, + }, + }, + { + desc: "custom subnet for l4 ilb service enabled", + serviceStates: []L4ILBServiceState{ + newL4ILBServiceState(false, true, true), + }, + expectL4ILBCount: map[feature]int{ + l4ILBService: 1, + l4ILBGlobalAccess: 0, + l4ILBCustomSubnet: 1, + l4ILBInSuccess: 1, + l4ILBInError: 0, + }, + }, + { + desc: "both global access and custom subnet for l4 ilb service enabled", + serviceStates: []L4ILBServiceState{ + newL4ILBServiceState(true, true, true), + }, + expectL4ILBCount: map[feature]int{ + l4ILBService: 1, + l4ILBGlobalAccess: 1, + l4ILBCustomSubnet: 1, + l4ILBInSuccess: 1, + l4ILBInError: 0, + }, + }, + { + desc: "many l4 ilb services", + serviceStates: []L4ILBServiceState{ + newL4ILBServiceState(false, false, true), + newL4ILBServiceState(false, true, true), + newL4ILBServiceState(true, false, true), + newL4ILBServiceState(true, true, true), + }, + expectL4ILBCount: map[feature]int{ + l4ILBService: 4, + l4ILBGlobalAccess: 2, + l4ILBCustomSubnet: 2, + l4ILBInSuccess: 4, + l4ILBInError: 0, + }, + }, + { + desc: "many l4 ilb services with some in error state", + serviceStates: []L4ILBServiceState{ + newL4ILBServiceState(false, false, true), + newL4ILBServiceState(false, true, false), + newL4ILBServiceState(false, true, true), + newL4ILBServiceState(true, false, true), + newL4ILBServiceState(true, false, false), + newL4ILBServiceState(true, true, true), + }, + expectL4ILBCount: map[feature]int{ + l4ILBService: 6, + l4ILBGlobalAccess: 2, + l4ILBCustomSubnet: 2, + l4ILBInSuccess: 4, + l4ILBInError: 2, + }, + }, + } { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + newMetrics := LoadBalancerMetrics{ + l4ILBServiceMap: make(map[string]L4ILBServiceState), + } + for i, serviceState := range tc.serviceStates { + newMetrics.SetL4ILBService(string(i), serviceState) + } + got := newMetrics.computeL4ILBMetrics() + if diff := cmp.Diff(tc.expectL4ILBCount, got); diff != "" { + t.Fatalf("Got diff for L4 ILB service counts (-want +got):\n%s", diff) + } + }) + } +} + +func newL4ILBServiceState(globalAccess, customSubnet, inSuccess bool) L4ILBServiceState { + return L4ILBServiceState{ + EnabledGlobalAccess: globalAccess, + EnabledCustomSubnet: customSubnet, + InSuccess: inSuccess, + } +} diff --git a/staging/src/k8s.io/legacy-cloud-providers/go.mod b/staging/src/k8s.io/legacy-cloud-providers/go.mod index 52e25c8cc00..9781c3bd8e6 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/go.mod +++ b/staging/src/k8s.io/legacy-cloud-providers/go.mod @@ -16,6 +16,7 @@ require ( github.com/aws/aws-sdk-go v1.28.2 github.com/dnaeon/go-vcr v1.0.1 // indirect github.com/golang/mock v1.3.1 + github.com/google/go-cmp v0.4.0 github.com/gophercloud/gophercloud v0.1.0 github.com/mitchellh/mapstructure v1.1.2 github.com/rubiojr/go-vhd v0.0.0-20160810183302-0bfd3b39853c