diff --git a/pkg/registry/core/service/ipallocator/allocator.go b/pkg/registry/core/service/ipallocator/allocator.go index 7e57dc23d5b..4a705b5286e 100644 --- a/pkg/registry/core/service/ipallocator/allocator.go +++ b/pkg/registry/core/service/ipallocator/allocator.go @@ -83,6 +83,8 @@ type Range struct { // NewAllocatorCIDRRange creates a Range over a net.IPNet, calling allocatorFactory to construct the backing store. func NewAllocatorCIDRRange(cidr *net.IPNet, allocatorFactory allocator.AllocatorFactory) (*Range, error) { + registerMetrics() + max := utilnet.RangeSize(cidr) base := utilnet.BigForIP(cidr.IP) rangeSpec := cidr.String() @@ -165,31 +167,58 @@ func (r *Range) CIDR() net.IPNet { // or has already been reserved. ErrFull will be returned if there // are no addresses left. func (r *Range) Allocate(ip net.IP) error { + label := r.CIDR() ok, offset := r.contains(ip) if !ok { + // update metrics + clusterIPAllocationErrors.WithLabelValues(label.String()).Inc() + return &ErrNotInRange{r.net.String()} } allocated, err := r.alloc.Allocate(offset) if err != nil { + // update metrics + clusterIPAllocationErrors.WithLabelValues(label.String()).Inc() + return err } if !allocated { + // update metrics + clusterIPAllocationErrors.WithLabelValues(label.String()).Inc() + return ErrAllocated } + // update metrics + clusterIPAllocations.WithLabelValues(label.String()).Inc() + clusterIPAllocated.WithLabelValues(label.String()).Set(float64(r.Used())) + clusterIPAvailable.WithLabelValues(label.String()).Set(float64(r.Free())) + return nil } // AllocateNext reserves one of the IPs from the pool. ErrFull may // be returned if there are no addresses left. func (r *Range) AllocateNext() (net.IP, error) { + label := r.CIDR() offset, ok, err := r.alloc.AllocateNext() if err != nil { + // update metrics + clusterIPAllocationErrors.WithLabelValues(label.String()).Inc() + return nil, err } if !ok { + // update metrics + clusterIPAllocationErrors.WithLabelValues(label.String()).Inc() + return nil, ErrFull } + // update metrics + clusterIPAllocations.WithLabelValues(label.String()).Inc() + clusterIPAllocated.WithLabelValues(label.String()).Set(float64(r.Used())) + clusterIPAvailable.WithLabelValues(label.String()).Set(float64(r.Free())) + return utilnet.AddIPOffset(r.base, offset), nil } @@ -202,7 +231,14 @@ func (r *Range) Release(ip net.IP) error { return nil } - return r.alloc.Release(offset) + err := r.alloc.Release(offset) + if err == nil { + // update metrics + label := r.CIDR() + clusterIPAllocated.WithLabelValues(label.String()).Set(float64(r.Used())) + clusterIPAvailable.WithLabelValues(label.String()).Set(float64(r.Free())) + } + return err } // ForEach calls the provided function for each allocated IP. diff --git a/pkg/registry/core/service/ipallocator/allocator_test.go b/pkg/registry/core/service/ipallocator/allocator_test.go index db43f65ef5f..af00c805bee 100644 --- a/pkg/registry/core/service/ipallocator/allocator_test.go +++ b/pkg/registry/core/service/ipallocator/allocator_test.go @@ -21,6 +21,7 @@ import ( "testing" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/component-base/metrics/testutil" api "k8s.io/kubernetes/pkg/apis/core" ) @@ -361,3 +362,154 @@ func TestNewFromSnapshot(t *testing.T) { } } } + +func TestClusterIPMetrics(t *testing.T) { + // create IPv4 allocator + cidrIPv4 := "10.0.0.0/24" + _, clusterCIDRv4, _ := net.ParseCIDR(cidrIPv4) + a, err := NewCIDRRange(clusterCIDRv4) + if err != nil { + t.Fatalf("unexpected error creating CidrSet: %v", err) + } + clearMetrics(map[string]string{"cidr": cidrIPv4}) + // create IPv6 allocator + cidrIPv6 := "2001:db8::/112" + _, clusterCIDRv6, _ := net.ParseCIDR(cidrIPv6) + b, err := NewCIDRRange(clusterCIDRv6) + if err != nil { + t.Fatalf("unexpected error creating CidrSet: %v", err) + } + clearMetrics(map[string]string{"cidr": cidrIPv6}) + + // Check initial state + em := testMetrics{ + free: 0, + used: 0, + allocated: 0, + errors: 0, + } + expectMetrics(t, cidrIPv4, em) + em = testMetrics{ + free: 0, + used: 0, + allocated: 0, + errors: 0, + } + expectMetrics(t, cidrIPv6, em) + + // allocate 2 IPv4 addresses + found := sets.NewString() + for i := 0; i < 2; i++ { + ip, err := a.AllocateNext() + if err != nil { + t.Fatal(err) + } + if found.Has(ip.String()) { + t.Fatalf("already reserved: %s", ip) + } + found.Insert(ip.String()) + } + + em = testMetrics{ + free: 252, + used: 2, + allocated: 2, + errors: 0, + } + expectMetrics(t, cidrIPv4, em) + + // try to allocate the same IP addresses + for s := range found { + if !a.Has(net.ParseIP(s)) { + t.Fatalf("missing: %s", s) + } + if err := a.Allocate(net.ParseIP(s)); err != ErrAllocated { + t.Fatal(err) + } + } + em = testMetrics{ + free: 252, + used: 2, + allocated: 2, + errors: 2, + } + expectMetrics(t, cidrIPv4, em) + + // release the addresses allocated + for s := range found { + if !a.Has(net.ParseIP(s)) { + t.Fatalf("missing: %s", s) + } + if err := a.Release(net.ParseIP(s)); err != nil { + t.Fatal(err) + } + } + em = testMetrics{ + free: 254, + used: 0, + allocated: 2, + errors: 2, + } + expectMetrics(t, cidrIPv4, em) + + // allocate 264 addresses for each allocator + // the full range and 10 more (254 + 10 = 264) for IPv4 + for i := 0; i < 264; i++ { + a.AllocateNext() + b.AllocateNext() + } + em = testMetrics{ + free: 0, + used: 254, + allocated: 256, // this is a counter, we already had 2 allocations and we did 254 more + errors: 12, + } + expectMetrics(t, cidrIPv4, em) + em = testMetrics{ + free: 65271, // IPv6 clusterIP range is capped to 2^16 and consider the broadcast address as valid + used: 264, + allocated: 264, + errors: 0, + } + expectMetrics(t, cidrIPv6, em) +} + +// Metrics helpers +func clearMetrics(labels map[string]string) { + clusterIPAllocated.Delete(labels) + clusterIPAvailable.Delete(labels) + clusterIPAllocations.Delete(labels) + clusterIPAllocationErrors.Delete(labels) +} + +type testMetrics struct { + free float64 + used float64 + allocated float64 + errors float64 +} + +func expectMetrics(t *testing.T, label string, em testMetrics) { + var m testMetrics + var err error + m.free, err = testutil.GetGaugeMetricValue(clusterIPAvailable.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", clusterIPAvailable.Name, err) + } + m.used, err = testutil.GetGaugeMetricValue(clusterIPAllocated.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", clusterIPAllocated.Name, err) + } + m.allocated, err = testutil.GetCounterMetricValue(clusterIPAllocations.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", clusterIPAllocations.Name, err) + } + m.errors, err = testutil.GetCounterMetricValue(clusterIPAllocationErrors.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", clusterIPAllocationErrors.Name, err) + } + + if m != em { + t.Fatalf("metrics error: expected %v, received %v", em, m) + } +} diff --git a/pkg/registry/core/service/ipallocator/metrics.go b/pkg/registry/core/service/ipallocator/metrics.go new file mode 100644 index 00000000000..ffd62f32e3b --- /dev/null +++ b/pkg/registry/core/service/ipallocator/metrics.go @@ -0,0 +1,87 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipallocator + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const ( + namespace = "kube_apiserver" + subsystem = "clusterip_allocator" +) + +var ( + // clusterIPAllocated indicates the amount of cluster IP allocated by Service CIDR. + clusterIPAllocated = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "allocated_ips", + Help: "Gauge measuring the number of allocated IPs for Services", + StabilityLevel: metrics.ALPHA, + }, + []string{"cidr"}, + ) + // clusterIPAvailable indicates the amount of cluster IP available by Service CIDR. + clusterIPAvailable = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "available_ips", + Help: "Gauge measuring the number of available IPs for Services", + StabilityLevel: metrics.ALPHA, + }, + []string{"cidr"}, + ) + // clusterIPAllocation counts the total number of ClusterIP allocation. + clusterIPAllocations = metrics.NewCounterVec( + &metrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "allocation_total", + Help: "Number of Cluster IPs allocations", + StabilityLevel: metrics.ALPHA, + }, + []string{"cidr"}, + ) + // clusterIPAllocationErrors counts the number of error trying to allocate a ClusterIP. + clusterIPAllocationErrors = metrics.NewCounterVec( + &metrics.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "allocation_errors_total", + Help: "Number of errors trying to allocate Cluster IPs", + StabilityLevel: metrics.ALPHA, + }, + []string{"cidr"}, + ) +) + +var registerMetricsOnce sync.Once + +func registerMetrics() { + registerMetricsOnce.Do(func() { + legacyregistry.MustRegister(clusterIPAllocated) + legacyregistry.MustRegister(clusterIPAvailable) + legacyregistry.MustRegister(clusterIPAllocations) + legacyregistry.MustRegister(clusterIPAllocationErrors) + }) +}