From 599597ca65a1a4aa3359b248b497f61848911e0f Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Sun, 29 Oct 2023 17:04:52 +0000 Subject: [PATCH] fix race on ServiceCIDR deletion When a ServiceCIDR is deleted, the service CIDR controller on the controller manager verifies that is safe to be deleted before removing the finalizer, howerver, since the information of deletion takes time to propragate, there can be a race where the apiserver allocators didn't receive the information of deletion and assign an IP address that will be orphan. To avoid this race, the service cidr controller waits a grace period before removing the finalizer to ensure the allocators do not assign any new IP Address from that range before is completely deleted. Change-Id: Ib34d32c0bdde91c6e84f1d056db9374589b25c0b --- .../servicecidrs/servicecidrs_controller.go | 42 +++++++++----- .../servicecidrs_controller_test.go | 56 ++++++++++++------- 2 files changed, 64 insertions(+), 34 deletions(-) diff --git a/pkg/controller/servicecidrs/servicecidrs_controller.go b/pkg/controller/servicecidrs/servicecidrs_controller.go index 24191adcf6b..0d00f30dac2 100644 --- a/pkg/controller/servicecidrs/servicecidrs_controller.go +++ b/pkg/controller/servicecidrs/servicecidrs_controller.go @@ -57,6 +57,11 @@ const ( controllerName = "service-cidr-controller" ServiceCIDRProtectionFinalizer = "networking.k8s.io/service-cidr-finalizer" + + // deletionGracePeriod is the time in seconds to wait to remove the finalizer from a ServiceCIDR to ensure the + // deletion informations has been propagated to the apiserver allocators to avoid allocating any IP address + // before we complete delete the ServiceCIDR + deletionGracePeriod = 10 * time.Second ) // NewController returns a new *Controller. @@ -358,21 +363,30 @@ func (c *Controller) sync(ctx context.Context, key string) error { if err != nil { return err } - // if there are no IPAddress depending on this ServiceCIDR is safe to remove it - if ok { - return c.removeServiceCIDRFinalizerIfNeeded(ctx, cidr) + if !ok { + // update the status to indicate why the ServiceCIDR can not be deleted + svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions( + metav1apply.Condition(). + WithType(networkingapiv1alpha1.ServiceCIDRConditionReady). + WithStatus(metav1.ConditionFalse). + WithReason(networkingapiv1alpha1.ServiceCIDRReasonTerminating). + WithMessage("There are still IPAddresses referencing the ServiceCIDR, please remove them or create a new ServiceCIDR"). + WithLastTransitionTime(metav1.Now())) + svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus) + _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + return err } - // update the status to indicate why the ServiceCIDR can not be deleted - svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions( - metav1apply.Condition(). - WithType(networkingapiv1alpha1.ServiceCIDRConditionReady). - WithStatus(metav1.ConditionFalse). - WithReason(networkingapiv1alpha1.ServiceCIDRReasonTerminating). - WithMessage("There are still IPAddresses referencing the ServiceCIDR, please remove them or create a new ServiceCIDR"). - WithLastTransitionTime(metav1.Now())) - svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus) - _, err = c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true}) - return err + // If there are no IPAddress depending on this ServiceCIDR is safe to remove it, + // however, there can be a race when the allocators still consider the ServiceCIDR + // ready and allocate a new IPAddress from them, to avoid that, we wait during a + // a grace period to be sure the deletion change has been propagated to the allocators + // and no new IPAddress is going to be allocated. + timeUntilDeleted := deletionGracePeriod - time.Since(cidr.GetDeletionTimestamp().Time) + if timeUntilDeleted > 0 { + c.queue.AddAfter(key, timeUntilDeleted) + return nil + } + return c.removeServiceCIDRFinalizerIfNeeded(ctx, cidr) } // Created or Updated, the ServiceCIDR must have a finalizer. diff --git a/pkg/controller/servicecidrs/servicecidrs_controller_test.go b/pkg/controller/servicecidrs/servicecidrs_controller_test.go index 5a3d9a03355..9c5e759234c 100644 --- a/pkg/controller/servicecidrs/servicecidrs_controller_test.go +++ b/pkg/controller/servicecidrs/servicecidrs_controller_test.go @@ -19,6 +19,7 @@ package servicecidrs import ( "context" "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -33,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/controlplane/controller/defaultservicecidr" "k8s.io/kubernetes/pkg/registry/core/service/ipallocator" netutils "k8s.io/utils/net" + "k8s.io/utils/ptr" ) type testController struct { @@ -79,10 +81,17 @@ func newController(t *testing.T, cidrs []*networkingapiv1alpha1.ServiceCIDR, ips } func TestControllerSync(t *testing.T) { + now := time.Now() + + // ServiceCIDR that is just being deleted deletingServiceCIDR := makeServiceCIDR("deleting-cidr", "192.168.0.0/24", "2001:db2::/64") - now := metav1.Now() deletingServiceCIDR.Finalizers = []string{ServiceCIDRProtectionFinalizer} - deletingServiceCIDR.DeletionTimestamp = &now + deletingServiceCIDR.DeletionTimestamp = ptr.To[metav1.Time](metav1.Now()) + + // ServiceCIDR that has been deleted for longer than the deletionGracePeriod + deletedServiceCIDR := makeServiceCIDR("deleted-cidr", "192.168.0.0/24", "2001:db2::/64") + deletedServiceCIDR.Finalizers = []string{ServiceCIDRProtectionFinalizer} + deletedServiceCIDR.DeletionTimestamp = ptr.To[metav1.Time](metav1.NewTime(now.Add(-deletionGracePeriod - 1*time.Second))) testCases := []struct { name string @@ -112,105 +121,112 @@ func TestControllerSync(t *testing.T) { }, { name: "service CIDR being deleted must remove the finalizer", + cidrs: []*networkingapiv1alpha1.ServiceCIDR{ + deletedServiceCIDR, + }, + cidrSynced: deletedServiceCIDR.Name, + actions: [][]string{{"patch", "servicecidrs", ""}}, + }, + { + name: "service CIDR being deleted but withing the grace period must be requeued not remove the finalizer", // TODO: assert is actually requeued cidrs: []*networkingapiv1alpha1.ServiceCIDR{ deletingServiceCIDR, }, cidrSynced: deletingServiceCIDR.Name, - actions: [][]string{{"patch", "servicecidrs", ""}}, + actions: [][]string{}, }, { name: "service CIDR being deleted with IPv4 addresses should update the status", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("192.168.0.1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", "status"}}, }, { name: "service CIDR being deleted and overlapping same range and IPv4 addresses should remove the finalizer", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, makeServiceCIDR("overlapping", "192.168.0.0/24", "2001:db2::/64"), }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("192.168.0.1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", ""}}, }, { name: "service CIDR being deleted and overlapping and IPv4 addresses should remove the finalizer", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, makeServiceCIDR("overlapping", "192.168.0.0/16", "2001:db2::/64"), }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("192.168.0.1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", ""}}, }, { name: "service CIDR being deleted and not overlapping and IPv4 addresses should update the status", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, makeServiceCIDR("overlapping", "192.168.255.0/26", "2001:db2::/64"), }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("192.168.0.1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", "status"}}, }, - { name: "service CIDR being deleted with IPv6 addresses should update the status", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("2001:db2::1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", "status"}}, }, { name: "service CIDR being deleted and overlapping same range and IPv6 addresses should remove the finalizer", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, makeServiceCIDR("overlapping", "192.168.0.0/24", "2001:db2::/64"), }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("2001:db2::1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", ""}}, }, { name: "service CIDR being deleted and overlapping and IPv6 addresses should remove the finalizer", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, makeServiceCIDR("overlapping", "192.168.0.0/16", "2001:db2::/48"), }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("2001:db2::1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", ""}}, }, { name: "service CIDR being deleted and not overlapping and IPv6 addresses should update the status", cidrs: []*networkingapiv1alpha1.ServiceCIDR{ - deletingServiceCIDR, + deletedServiceCIDR, makeServiceCIDR("overlapping", "192.168.255.0/26", "2001:db2:a:b::/64"), }, ips: []*networkingapiv1alpha1.IPAddress{ makeIPAddress("2001:db2::1"), }, - cidrSynced: deletingServiceCIDR.Name, + cidrSynced: deletedServiceCIDR.Name, actions: [][]string{{"patch", "servicecidrs", "status"}}, }, }