Merge pull request #91700 from skmatti/ilb-metrics

Add usage metrics for GCE Internal Loadbalancers
This commit is contained in:
Kubernetes Prow Robot 2020-06-10 17:12:56 -07:00 committed by GitHub
commit 42797adafc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 384 additions and 2 deletions

View File

@ -24,6 +24,7 @@ go_library(
"gce_loadbalancer.go",
"gce_loadbalancer_external.go",
"gce_loadbalancer_internal.go",
"gce_loadbalancer_metrics.go",
"gce_loadbalancer_naming.go",
"gce_networkendpointgroup.go",
"gce_routes.go",
@ -98,6 +99,7 @@ go_test(
"gce_healthchecks_test.go",
"gce_loadbalancer_external_test.go",
"gce_loadbalancer_internal_test.go",
"gce_loadbalancer_metrics_test.go",
"gce_loadbalancer_test.go",
"gce_loadbalancer_utils_test.go",
"gce_test.go",
@ -116,6 +118,7 @@ go_test(
"//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud:go_default_library",
"//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta:go_default_library",
"//vendor/github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/mock:go_default_library",
"//vendor/github.com/google/go-cmp/cmp:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/github.com/stretchr/testify/require:go_default_library",
"//vendor/golang.org/x/oauth2/google:go_default_library",

View File

@ -163,6 +163,8 @@ type Cloud struct {
// Keep a reference of this around so we can inject a new cloud.RateLimiter implementation.
s *cloud.Service
metricsCollector loadbalancerMetricsCollector
}
// ConfigGlobal is the in memory representation of the gce.conf config data
@ -518,6 +520,7 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
operationPollRateLimiter: operationPollRateLimiter,
AlphaFeatureGate: config.AlphaFeatureGate,
nodeZones: map[string]sets.String{},
metricsCollector: newLoadBalancerMetrics(),
}
gce.manager = &gceServiceManager{gce}
@ -643,6 +646,7 @@ func (g *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder,
g.eventRecorder = g.eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "g-cloudprovider"})
go g.watchClusterID(stop)
go g.metricsCollector.Run(stop)
}
// LoadBalancer returns an implementation of LoadBalancer for Google Compute Engine.

View File

@ -76,6 +76,7 @@ func NewFakeGCECloud(vals TestClusterValues) *Cloud {
networkProjectID: vals.ProjectID,
ClusterID: fakeClusterID(vals.ClusterID),
onXPN: vals.OnXPN,
metricsCollector: newLoadBalancerMetrics(),
}
c := cloud.NewMockGCE(&gceProjectRouter{gce})
gce.c = c

View File

@ -63,6 +63,16 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
return nil, cloudprovider.ImplementedElsewhere
}
nm := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}
var serviceState L4ILBServiceState
// Mark the service InSuccess state as false to begin with.
// This will be updated to true if the VIP is configured successfully.
serviceState.InSuccess = false
defer func() {
g.metricsCollector.SetL4ILBService(nm.String(), serviceState)
}()
loadBalancerName := g.GetLoadBalancerName(context.TODO(), clusterName, svc)
klog.V(2).Infof("ensureInternalLoadBalancer(%v): Attaching %q finalizer", loadBalancerName, ILBFinalizerV1)
if err := addFinalizer(svc, g.client.CoreV1(), ILBFinalizerV1); err != nil {
@ -70,7 +80,6 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
return nil, err
}
nm := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}
ports, _, protocol := getPortsAndProtocol(svc.Spec.Ports)
if protocol != v1.ProtocolTCP && protocol != v1.ProtocolUDP {
return nil, fmt.Errorf("Invalid protocol %s, only TCP and UDP are supported", string(protocol))
@ -213,6 +222,18 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
return nil, err
}
serviceState.InSuccess = true
if options.AllowGlobalAccess {
serviceState.EnabledGlobalAccess = true
}
// SubnetName is overridden to nil value if Alpha feature gate for custom subnet
// is not enabled. So, a non empty subnet name at this point implies that the
// feature is in use.
if options.SubnetName != "" {
serviceState.EnabledCustomSubnet = true
}
klog.V(6).Infof("Internal Loadbalancer for Service %s ensured, updating its state %v in metrics cache", nm, serviceState)
status := &v1.LoadBalancerStatus{}
status.Ingress = []v1.LoadBalancerIngress{{IP: updatedFwdRule.IPAddress}}
return status, nil
@ -267,6 +288,7 @@ func (g *Cloud) updateInternalLoadBalancer(clusterName, clusterID string, svc *v
func (g *Cloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID string, svc *v1.Service) error {
loadBalancerName := g.GetLoadBalancerName(context.TODO(), clusterName, svc)
svcNamespacedName := types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}
_, _, protocol := getPortsAndProtocol(svc.Spec.Ports)
scheme := cloud.SchemeInternal
sharedBackend := shareBackendService(svc)
@ -326,10 +348,12 @@ func (g *Cloud) ensureInternalLoadBalancerDeleted(clusterName, clusterID string,
klog.V(2).Infof("ensureInternalLoadBalancerDeleted(%v): Removing %q finalizer", loadBalancerName, ILBFinalizerV1)
if err := removeFinalizer(svc, g.client.CoreV1(), ILBFinalizerV1); err != nil {
klog.Errorf("Failed to remove finalizer '%s' on service %s/%s - %v", ILBFinalizerV1, svc.Namespace, svc.Name, err)
klog.Errorf("Failed to remove finalizer '%s' on service %s - %v", ILBFinalizerV1, svcNamespacedName, err)
return err
}
klog.V(6).Infof("Internal Loadbalancer for Service %s deleted, removing its state from metrics cache", svcNamespacedName)
g.metricsCollector.DeleteL4ILBService(svcNamespacedName.String())
return nil
}

View File

@ -0,0 +1,181 @@
// +build !providerless
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gce
import (
"sync"
"time"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/klog/v2"
)
const (
label = "feature"
)
var (
metricsInterval = 10 * time.Minute
l4ILBCount = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Name: "number_of_l4_ilbs",
Help: "Number of L4 ILBs",
StabilityLevel: metrics.ALPHA,
},
[]string{label},
)
)
// init registers L4 internal loadbalancer usage metrics.
func init() {
klog.V(3).Infof("Registering Service Controller loadbalancer usage metrics %v", l4ILBCount)
legacyregistry.MustRegister(l4ILBCount)
}
// LoadBalancerMetrics is a cache that contains loadbalancer service resource
// states for computing usage metrics.
type LoadBalancerMetrics struct {
// l4ILBServiceMap is a map of service key and L4 ILB service state.
l4ILBServiceMap map[string]L4ILBServiceState
sync.Mutex
}
type feature string
func (f feature) String() string {
return string(f)
}
const (
l4ILBService = feature("L4ILBService")
l4ILBGlobalAccess = feature("L4ILBGlobalAccess")
l4ILBCustomSubnet = feature("L4ILBCustomSubnet")
// l4ILBInSuccess feature specifies that ILB VIP is configured.
l4ILBInSuccess = feature("L4ILBInSuccess")
// l4ILBInInError feature specifies that an error had occurred for this service
// in ensureInternalLoadbalancer method.
l4ILBInError = feature("L4ILBInError")
)
// L4ILBServiceState contains Internal Loadbalancer feature states as specified
// in k8s Service.
type L4ILBServiceState struct {
// EnabledGlobalAccess specifies if Global Access is enabled.
EnabledGlobalAccess bool
// EnabledCustomSubNet specifies if Custom Subnet is enabled.
EnabledCustomSubnet bool
// InSuccess specifies if the ILB service VIP is configured.
InSuccess bool
}
// loadbalancerMetricsCollector is an interface to update/delete L4 loadbalancer
// states in the cache that is used for computing L4 Loadbalancer usage metrics.
type loadbalancerMetricsCollector interface {
// Run starts a goroutine to compute and export metrics a periodic interval.
Run(stopCh <-chan struct{})
// SetL4ILBService adds/updates L4 ILB service state for given service key.
SetL4ILBService(svcKey string, state L4ILBServiceState)
// DeleteL4ILBService removes the given L4 ILB service key.
DeleteL4ILBService(svcKey string)
}
// newLoadBalancerMetrics initializes LoadBalancerMetrics and starts a goroutine
// to compute and export metrics periodically.
func newLoadBalancerMetrics() loadbalancerMetricsCollector {
return &LoadBalancerMetrics{
l4ILBServiceMap: make(map[string]L4ILBServiceState),
}
}
// Run implements loadbalancerMetricsCollector.
func (lm *LoadBalancerMetrics) Run(stopCh <-chan struct{}) {
klog.V(3).Infof("Loadbalancer Metrics initialized. Metrics will be exported at an interval of %v", metricsInterval)
// Compute and export metrics periodically.
go func() {
// Wait for service states to be populated in the cache before computing metrics.
time.Sleep(metricsInterval)
wait.Until(lm.export, metricsInterval, stopCh)
}()
<-stopCh
}
// SetL4ILBService implements loadbalancerMetricsCollector.
func (lm *LoadBalancerMetrics) SetL4ILBService(svcKey string, state L4ILBServiceState) {
lm.Lock()
defer lm.Unlock()
if lm.l4ILBServiceMap == nil {
klog.Fatalf("Loadbalancer Metrics failed to initialize correctly.")
}
lm.l4ILBServiceMap[svcKey] = state
}
// DeleteL4ILBService implements loadbalancerMetricsCollector.
func (lm *LoadBalancerMetrics) DeleteL4ILBService(svcKey string) {
lm.Lock()
defer lm.Unlock()
delete(lm.l4ILBServiceMap, svcKey)
}
// export computes and exports loadbalancer usage metrics.
func (lm *LoadBalancerMetrics) export() {
ilbCount := lm.computeL4ILBMetrics()
klog.V(5).Infof("Exporting L4 ILB usage metrics: %#v", ilbCount)
for feature, count := range ilbCount {
l4ILBCount.With(map[string]string{label: feature.String()}).Set(float64(count))
}
klog.V(5).Infof("L4 ILB usage metrics exported.")
}
// computeL4ILBMetrics aggregates L4 ILB metrics in the cache.
func (lm *LoadBalancerMetrics) computeL4ILBMetrics() map[feature]int {
lm.Lock()
defer lm.Unlock()
klog.V(4).Infof("Computing L4 ILB usage metrics from service state map: %#v", lm.l4ILBServiceMap)
counts := map[feature]int{
l4ILBService: 0,
l4ILBGlobalAccess: 0,
l4ILBCustomSubnet: 0,
l4ILBInSuccess: 0,
l4ILBInError: 0,
}
for key, state := range lm.l4ILBServiceMap {
klog.V(6).Infof("ILB Service %s has EnabledGlobalAccess: %t, EnabledCustomSubnet: %t, InSuccess: %t", key, state.EnabledGlobalAccess, state.EnabledCustomSubnet, state.InSuccess)
counts[l4ILBService]++
if !state.InSuccess {
counts[l4ILBInError]++
// Skip counting other features if the service is in error state.
continue
}
counts[l4ILBInSuccess]++
if state.EnabledGlobalAccess {
counts[l4ILBGlobalAccess]++
}
if state.EnabledCustomSubnet {
counts[l4ILBCustomSubnet]++
}
}
klog.V(4).Info("L4 ILB usage metrics computed.")
return counts
}

View File

@ -0,0 +1,168 @@
// +build !providerless
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gce
import (
"testing"
"github.com/google/go-cmp/cmp"
)
func TestComputeL4ILBMetrics(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
desc string
serviceStates []L4ILBServiceState
expectL4ILBCount map[feature]int
}{
{
desc: "empty input",
serviceStates: []L4ILBServiceState{},
expectL4ILBCount: map[feature]int{
l4ILBService: 0,
l4ILBGlobalAccess: 0,
l4ILBCustomSubnet: 0,
l4ILBInSuccess: 0,
l4ILBInError: 0,
},
},
{
desc: "one l4 ilb service",
serviceStates: []L4ILBServiceState{
newL4ILBServiceState(false, false, true),
},
expectL4ILBCount: map[feature]int{
l4ILBService: 1,
l4ILBGlobalAccess: 0,
l4ILBCustomSubnet: 0,
l4ILBInSuccess: 1,
l4ILBInError: 0,
},
},
{
desc: "l4 ilb service in error state",
serviceStates: []L4ILBServiceState{
newL4ILBServiceState(false, true, false),
},
expectL4ILBCount: map[feature]int{
l4ILBService: 1,
l4ILBGlobalAccess: 0,
l4ILBCustomSubnet: 0,
l4ILBInSuccess: 0,
l4ILBInError: 1,
},
},
{
desc: "global access for l4 ilb service enabled",
serviceStates: []L4ILBServiceState{
newL4ILBServiceState(true, false, true),
},
expectL4ILBCount: map[feature]int{
l4ILBService: 1,
l4ILBGlobalAccess: 1,
l4ILBCustomSubnet: 0,
l4ILBInSuccess: 1,
l4ILBInError: 0,
},
},
{
desc: "custom subnet for l4 ilb service enabled",
serviceStates: []L4ILBServiceState{
newL4ILBServiceState(false, true, true),
},
expectL4ILBCount: map[feature]int{
l4ILBService: 1,
l4ILBGlobalAccess: 0,
l4ILBCustomSubnet: 1,
l4ILBInSuccess: 1,
l4ILBInError: 0,
},
},
{
desc: "both global access and custom subnet for l4 ilb service enabled",
serviceStates: []L4ILBServiceState{
newL4ILBServiceState(true, true, true),
},
expectL4ILBCount: map[feature]int{
l4ILBService: 1,
l4ILBGlobalAccess: 1,
l4ILBCustomSubnet: 1,
l4ILBInSuccess: 1,
l4ILBInError: 0,
},
},
{
desc: "many l4 ilb services",
serviceStates: []L4ILBServiceState{
newL4ILBServiceState(false, false, true),
newL4ILBServiceState(false, true, true),
newL4ILBServiceState(true, false, true),
newL4ILBServiceState(true, true, true),
},
expectL4ILBCount: map[feature]int{
l4ILBService: 4,
l4ILBGlobalAccess: 2,
l4ILBCustomSubnet: 2,
l4ILBInSuccess: 4,
l4ILBInError: 0,
},
},
{
desc: "many l4 ilb services with some in error state",
serviceStates: []L4ILBServiceState{
newL4ILBServiceState(false, false, true),
newL4ILBServiceState(false, true, false),
newL4ILBServiceState(false, true, true),
newL4ILBServiceState(true, false, true),
newL4ILBServiceState(true, false, false),
newL4ILBServiceState(true, true, true),
},
expectL4ILBCount: map[feature]int{
l4ILBService: 6,
l4ILBGlobalAccess: 2,
l4ILBCustomSubnet: 2,
l4ILBInSuccess: 4,
l4ILBInError: 2,
},
},
} {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
newMetrics := LoadBalancerMetrics{
l4ILBServiceMap: make(map[string]L4ILBServiceState),
}
for i, serviceState := range tc.serviceStates {
newMetrics.SetL4ILBService(string(i), serviceState)
}
got := newMetrics.computeL4ILBMetrics()
if diff := cmp.Diff(tc.expectL4ILBCount, got); diff != "" {
t.Fatalf("Got diff for L4 ILB service counts (-want +got):\n%s", diff)
}
})
}
}
func newL4ILBServiceState(globalAccess, customSubnet, inSuccess bool) L4ILBServiceState {
return L4ILBServiceState{
EnabledGlobalAccess: globalAccess,
EnabledCustomSubnet: customSubnet,
InSuccess: inSuccess,
}
}

View File

@ -16,6 +16,7 @@ require (
github.com/aws/aws-sdk-go v1.28.2
github.com/dnaeon/go-vcr v1.0.1 // indirect
github.com/golang/mock v1.3.1
github.com/google/go-cmp v0.4.0
github.com/gophercloud/gophercloud v0.1.0
github.com/mitchellh/mapstructure v1.1.2
github.com/rubiojr/go-vhd v0.0.0-20160810183302-0bfd3b39853c