Adding new logging, event, and metric to better capture when mirroring addresses is skipped

This commit is contained in:
Rob Scott 2020-07-01 18:30:24 -07:00
parent e701cb0205
commit fa59370e93
No known key found for this signature in database
GPG Key ID: 90C19B2D4A99C91B
6 changed files with 88 additions and 21 deletions

View File

@ -161,6 +161,7 @@ type Controller struct {
// has been synced at least once. Added as a member to the struct to allow
// injection for testing.
endpointSlicesSynced cache.InformerSynced
// endpointSliceTracker tracks the list of EndpointSlices and associated
// resource versions expected for each Endpoints resource. It can help
// determine if a cached EndpointSlice is out of date.

View File

@ -172,7 +172,7 @@ func TestSyncEndpoints(t *testing.T) {
}},
},
endpointSlices: []*discovery.EndpointSlice{},
expectedNumActions: 1,
expectedNumActions: 2, // extra action for creating warning event
expectedNumSlices: 1,
}}

View File

@ -26,4 +26,8 @@ const (
// InvalidIPAddress indicates that an IP address found in an Endpoints
// resource is invalid.
InvalidIPAddress = "InvalidIPAddress"
// TooManyAddressesToMirror indicates that some addresses were not mirrored
// due to an EndpointSubset containing more addresses to mirror than
// MaxEndpointsPerSubset allows.
TooManyAddressesToMirror = "TooManyAddressesToMirror"
)

View File

@ -64,6 +64,18 @@ var (
},
[]string{},
)
// AddressesSkippedPerSync tracks the number of addresses skipped on each
// Endpoints sync due to being invalid or exceeding MaxEndpointsPerSubset.
AddressesSkippedPerSync = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: EndpointSliceMirroringSubsystem,
Name: "addresses_skipped_per_sync",
Help: "Number of addresses skipped on each Endpoints sync due to being invalid or exceeding MaxEndpointsPerSubset",
StabilityLevel: metrics.ALPHA,
Buckets: metrics.ExponentialBuckets(2, 2, 15),
},
[]string{},
)
// EndpointsSyncDuration tracks how long syncEndpoints() takes in a number
// of Seconds.
EndpointsSyncDuration = metrics.NewHistogramVec(
@ -127,6 +139,7 @@ func RegisterMetrics() {
legacyregistry.MustRegister(EndpointsAddedPerSync)
legacyregistry.MustRegister(EndpointsUpdatedPerSync)
legacyregistry.MustRegister(EndpointsRemovedPerSync)
legacyregistry.MustRegister(AddressesSkippedPerSync)
legacyregistry.MustRegister(EndpointsSyncDuration)
legacyregistry.MustRegister(EndpointsDesired)
legacyregistry.MustRegister(NumEndpointSlices)

View File

@ -36,11 +36,25 @@ import (
// reconciler is responsible for transforming current EndpointSlice state into
// desired state
type reconciler struct {
client clientset.Interface
client clientset.Interface
// endpointSliceTracker tracks the list of EndpointSlices and associated
// resource versions expected for each Endpoints resource. It can help
// determine if a cached EndpointSlice is out of date.
endpointSliceTracker *endpointSliceTracker
// eventRecorder allows reconciler to record an event if it finds an invalid
// IP address in an Endpoints resource.
eventRecorder record.EventRecorder
// maxEndpointsPerSubset references the maximum number of endpoints that
// should be added to an EndpointSlice for an EndpointSubset. This allows
// for a simple 1:1 mapping between EndpointSubset and EndpointSlice.
maxEndpointsPerSubset int32
endpointSliceTracker *endpointSliceTracker
metricsCache *metrics.Cache
eventRecorder record.EventRecorder
// metricsCache tracks values for total numbers of desired endpoints as well
// as the efficiency of EndpointSlice endpoints distribution
metricsCache *metrics.Cache
}
// reconcile takes an Endpoints resource and ensures that corresponding
@ -50,38 +64,65 @@ func (r *reconciler) reconcile(endpoints *corev1.Endpoints, existingSlices []*di
// Calculate desired state.
d := newDesiredCalc()
numInvalidAddresses := 0
addressesSkipped := 0
for _, subset := range endpoints.Subsets {
multiKey := d.initPorts(subset.Ports)
totalAddresses := 0
numInvalidAddresses := 0
totalAddresses := len(subset.Addresses) + len(subset.NotReadyAddresses)
totalAddressesAdded := 0
for _, address := range subset.Addresses {
totalAddresses++
if totalAddresses > int(r.maxEndpointsPerSubset) {
// Break if we've reached the max number of addresses to mirror
// per EndpointSubset. This allows for a simple 1:1 mapping between
// EndpointSubset and EndpointSlice.
if totalAddressesAdded >= int(r.maxEndpointsPerSubset) {
break
}
if ok := d.addAddress(address, multiKey, true); !ok {
if ok := d.addAddress(address, multiKey, true); ok {
totalAddressesAdded++
} else {
numInvalidAddresses++
klog.Warningf("Address in %s/%s Endpoints is not a valid IP, it will not be mirrored to an EndpointSlice: %s", endpoints.Namespace, endpoints.Name, address.IP)
}
}
for _, address := range subset.NotReadyAddresses {
totalAddresses++
if totalAddresses > int(r.maxEndpointsPerSubset) {
// Break if we've reached the max number of addresses to mirror
// per EndpointSubset. This allows for a simple 1:1 mapping between
// EndpointSubset and EndpointSlice.
if totalAddressesAdded >= int(r.maxEndpointsPerSubset) {
break
}
if ok := d.addAddress(address, multiKey, false); !ok {
if ok := d.addAddress(address, multiKey, true); ok {
totalAddressesAdded++
} else {
numInvalidAddresses++
klog.Warningf("Address in %s/%s Endpoints is not a valid IP, it will not be mirrored to an EndpointSlice: %s", endpoints.Namespace, endpoints.Name, address.IP)
}
}
if numInvalidAddresses > 0 {
r.eventRecorder.Eventf(endpoints, corev1.EventTypeWarning, InvalidIPAddress,
"Skipped %d invalid IP addresses when mirroring to EndpointSlices", numInvalidAddresses)
}
addressesSkipped += totalAddresses - totalAddressesAdded
}
// This metric includes addresses skipped for being invalid or exceeding
// MaxEndpointsPerSubset.
metrics.AddressesSkippedPerSync.WithLabelValues().Observe(float64(addressesSkipped))
// Record an event on the Endpoints resource if we skipped mirroring for any
// invalid IP addresses.
if numInvalidAddresses > 0 {
r.eventRecorder.Eventf(endpoints, corev1.EventTypeWarning, InvalidIPAddress,
"Skipped %d invalid IP addresses when mirroring to EndpointSlices", numInvalidAddresses)
}
// Record a separate event if we skipped mirroring due to the number of
// addresses exceeding MaxEndpointsPerSubset.
if addressesSkipped > numInvalidAddresses {
klog.Warningf("%d addresses in %s/%s Endpoints were skipped due to exceeding MaxEndpointsPerSubset", addressesSkipped, endpoints.Namespace, endpoints.Name)
r.eventRecorder.Eventf(endpoints, corev1.EventTypeWarning, TooManyAddressesToMirror,
"A max of %d addresses can be mirrored to EndpointSlices per Endpoints subset. %d addresses were skipped", r.maxEndpointsPerSubset, addressesSkipped)
}
// Build data structures for existing state.

View File

@ -533,7 +533,7 @@ func TestReconcile(t *testing.T) {
existingEndpointSlices: []*discovery.EndpointSlice{},
expectedNumSlices: 2,
expectedClientActions: 2,
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 3, addedPerSync: 3, numCreated: 2},
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 3, addedPerSync: 3, skippedPerSync: 2, numCreated: 2},
}, {
testName: "Endpoints with 2 subsets, multiple ports, all invalid addresses",
subsets: []corev1.EndpointSubset{{
@ -582,9 +582,9 @@ func TestReconcile(t *testing.T) {
existingEndpointSlices: []*discovery.EndpointSlice{},
expectedNumSlices: 0,
expectedClientActions: 0,
expectedMetrics: &expectedMetrics{desiredSlices: 0, actualSlices: 0, desiredEndpoints: 0, addedPerSync: 0, numCreated: 0},
expectedMetrics: &expectedMetrics{desiredSlices: 0, actualSlices: 0, desiredEndpoints: 0, addedPerSync: 0, skippedPerSync: 5, numCreated: 0},
}, {
testName: "Endpoints with 2 subsets, multiple ports and addresses, existing EndpointSlice with some addresses",
testName: "Endpoints with 2 subsets, 1 exceeding maxEndpointsPerSubset",
subsets: []corev1.EndpointSubset{{
Ports: []corev1.EndpointPort{{
Name: "http",
@ -632,7 +632,7 @@ func TestReconcile(t *testing.T) {
expectedNumSlices: 2,
expectedClientActions: 2,
maxEndpointsPerSubset: 2,
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 4, addedPerSync: 4, updatedPerSync: 0, removedPerSync: 0, numCreated: 2, numUpdated: 0},
expectedMetrics: &expectedMetrics{desiredSlices: 2, actualSlices: 2, desiredEndpoints: 4, addedPerSync: 4, updatedPerSync: 0, removedPerSync: 0, skippedPerSync: 1, numCreated: 2, numUpdated: 0},
}}
for _, tc := range testCases {
@ -885,6 +885,7 @@ type expectedMetrics struct {
addedPerSync int
updatedPerSync int
removedPerSync int
skippedPerSync int
numCreated int
numUpdated int
numDeleted int
@ -929,6 +930,12 @@ func expectMetrics(t *testing.T, em expectedMetrics) {
t.Errorf("Expected endpointsRemovedPerSync to be %d, got %v", em.removedPerSync, actualRemovedPerSync)
}
actualSkippedPerSync, err := testutil.GetHistogramMetricValue(metrics.AddressesSkippedPerSync.WithLabelValues())
handleErr(t, err, "addressesSkippedPerSync")
if actualSkippedPerSync != float64(em.skippedPerSync) {
t.Errorf("Expected addressesSkippedPerSync to be %d, got %v", em.skippedPerSync, actualSkippedPerSync)
}
actualCreated, err := testutil.GetCounterMetricValue(metrics.EndpointSliceChanges.WithLabelValues("create"))
handleErr(t, err, "endpointSliceChangesCreated")
if actualCreated != float64(em.numCreated) {
@ -962,6 +969,7 @@ func setupMetrics() {
metrics.EndpointsAddedPerSync.Delete(map[string]string{})
metrics.EndpointsUpdatedPerSync.Delete(map[string]string{})
metrics.EndpointsRemovedPerSync.Delete(map[string]string{})
metrics.AddressesSkippedPerSync.Delete(map[string]string{})
metrics.EndpointSliceChanges.Delete(map[string]string{"operation": "create"})
metrics.EndpointSliceChanges.Delete(map[string]string{"operation": "update"})
metrics.EndpointSliceChanges.Delete(map[string]string{"operation": "delete"})