fix race on ServiceCIDR deletion

When a ServiceCIDR is deleted, the service CIDR controller on the
controller manager verifies that is safe to be deleted before removing
the finalizer, howerver, since the information of deletion takes time to
propragate, there can be a race where the apiserver allocators didn't
receive the information of deletion and assign an IP address that will
be orphan.

To avoid this race, the service cidr controller waits a grace period
before removing the finalizer to ensure the allocators do not assign any
new IP Address from that range before is completely deleted.

Change-Id: Ib34d32c0bdde91c6e84f1d056db9374589b25c0b
This commit is contained in:
Antonio Ojea 2023-10-29 17:04:52 +00:00
parent e3a0df26a8
commit 599597ca65
2 changed files with 64 additions and 34 deletions

View File

@ -57,6 +57,11 @@ const (
controllerName = "service-cidr-controller"
ServiceCIDRProtectionFinalizer = "networking.k8s.io/service-cidr-finalizer"
// deletionGracePeriod is the time in seconds to wait to remove the finalizer from a ServiceCIDR to ensure the
// deletion informations has been propagated to the apiserver allocators to avoid allocating any IP address
// before we complete delete the ServiceCIDR
deletionGracePeriod = 10 * time.Second
)
// NewController returns a new *Controller.
@ -358,21 +363,30 @@ func (c *Controller) sync(ctx context.Context, key string) error {
if err != nil {
return err
}
// if there are no IPAddress depending on this ServiceCIDR is safe to remove it
if ok {
return c.removeServiceCIDRFinalizerIfNeeded(ctx, cidr)
if !ok {
// update the status to indicate why the ServiceCIDR can not be deleted
svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions(
metav1apply.Condition().
WithType(networkingapiv1alpha1.ServiceCIDRConditionReady).
WithStatus(metav1.ConditionFalse).
WithReason(networkingapiv1alpha1.ServiceCIDRReasonTerminating).
WithMessage("There are still IPAddresses referencing the ServiceCIDR, please remove them or create a new ServiceCIDR").
WithLastTransitionTime(metav1.Now()))
svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus)
_, err = c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true})
return err
}
// update the status to indicate why the ServiceCIDR can not be deleted
svcApplyStatus := networkingapiv1alpha1apply.ServiceCIDRStatus().WithConditions(
metav1apply.Condition().
WithType(networkingapiv1alpha1.ServiceCIDRConditionReady).
WithStatus(metav1.ConditionFalse).
WithReason(networkingapiv1alpha1.ServiceCIDRReasonTerminating).
WithMessage("There are still IPAddresses referencing the ServiceCIDR, please remove them or create a new ServiceCIDR").
WithLastTransitionTime(metav1.Now()))
svcApply := networkingapiv1alpha1apply.ServiceCIDR(cidr.Name).WithStatus(svcApplyStatus)
_, err = c.client.NetworkingV1alpha1().ServiceCIDRs().ApplyStatus(ctx, svcApply, metav1.ApplyOptions{FieldManager: controllerName, Force: true})
return err
// If there are no IPAddress depending on this ServiceCIDR is safe to remove it,
// however, there can be a race when the allocators still consider the ServiceCIDR
// ready and allocate a new IPAddress from them, to avoid that, we wait during a
// a grace period to be sure the deletion change has been propagated to the allocators
// and no new IPAddress is going to be allocated.
timeUntilDeleted := deletionGracePeriod - time.Since(cidr.GetDeletionTimestamp().Time)
if timeUntilDeleted > 0 {
c.queue.AddAfter(key, timeUntilDeleted)
return nil
}
return c.removeServiceCIDRFinalizerIfNeeded(ctx, cidr)
}
// Created or Updated, the ServiceCIDR must have a finalizer.

View File

@ -19,6 +19,7 @@ package servicecidrs
import (
"context"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
@ -33,6 +34,7 @@ import (
"k8s.io/kubernetes/pkg/controlplane/controller/defaultservicecidr"
"k8s.io/kubernetes/pkg/registry/core/service/ipallocator"
netutils "k8s.io/utils/net"
"k8s.io/utils/ptr"
)
type testController struct {
@ -79,10 +81,17 @@ func newController(t *testing.T, cidrs []*networkingapiv1alpha1.ServiceCIDR, ips
}
func TestControllerSync(t *testing.T) {
now := time.Now()
// ServiceCIDR that is just being deleted
deletingServiceCIDR := makeServiceCIDR("deleting-cidr", "192.168.0.0/24", "2001:db2::/64")
now := metav1.Now()
deletingServiceCIDR.Finalizers = []string{ServiceCIDRProtectionFinalizer}
deletingServiceCIDR.DeletionTimestamp = &now
deletingServiceCIDR.DeletionTimestamp = ptr.To[metav1.Time](metav1.Now())
// ServiceCIDR that has been deleted for longer than the deletionGracePeriod
deletedServiceCIDR := makeServiceCIDR("deleted-cidr", "192.168.0.0/24", "2001:db2::/64")
deletedServiceCIDR.Finalizers = []string{ServiceCIDRProtectionFinalizer}
deletedServiceCIDR.DeletionTimestamp = ptr.To[metav1.Time](metav1.NewTime(now.Add(-deletionGracePeriod - 1*time.Second)))
testCases := []struct {
name string
@ -112,105 +121,112 @@ func TestControllerSync(t *testing.T) {
},
{
name: "service CIDR being deleted must remove the finalizer",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletedServiceCIDR,
},
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", ""}},
},
{
name: "service CIDR being deleted but withing the grace period must be requeued not remove the finalizer", // TODO: assert is actually requeued
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
},
cidrSynced: deletingServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", ""}},
actions: [][]string{},
},
{
name: "service CIDR being deleted with IPv4 addresses should update the status",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("192.168.0.1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", "status"}},
},
{
name: "service CIDR being deleted and overlapping same range and IPv4 addresses should remove the finalizer",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
makeServiceCIDR("overlapping", "192.168.0.0/24", "2001:db2::/64"),
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("192.168.0.1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", ""}},
},
{
name: "service CIDR being deleted and overlapping and IPv4 addresses should remove the finalizer",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
makeServiceCIDR("overlapping", "192.168.0.0/16", "2001:db2::/64"),
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("192.168.0.1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", ""}},
},
{
name: "service CIDR being deleted and not overlapping and IPv4 addresses should update the status",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
makeServiceCIDR("overlapping", "192.168.255.0/26", "2001:db2::/64"),
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("192.168.0.1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", "status"}},
},
{
name: "service CIDR being deleted with IPv6 addresses should update the status",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("2001:db2::1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", "status"}},
},
{
name: "service CIDR being deleted and overlapping same range and IPv6 addresses should remove the finalizer",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
makeServiceCIDR("overlapping", "192.168.0.0/24", "2001:db2::/64"),
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("2001:db2::1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", ""}},
},
{
name: "service CIDR being deleted and overlapping and IPv6 addresses should remove the finalizer",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
makeServiceCIDR("overlapping", "192.168.0.0/16", "2001:db2::/48"),
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("2001:db2::1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", ""}},
},
{
name: "service CIDR being deleted and not overlapping and IPv6 addresses should update the status",
cidrs: []*networkingapiv1alpha1.ServiceCIDR{
deletingServiceCIDR,
deletedServiceCIDR,
makeServiceCIDR("overlapping", "192.168.255.0/26", "2001:db2:a:b::/64"),
},
ips: []*networkingapiv1alpha1.IPAddress{
makeIPAddress("2001:db2::1"),
},
cidrSynced: deletingServiceCIDR.Name,
cidrSynced: deletedServiceCIDR.Name,
actions: [][]string{{"patch", "servicecidrs", "status"}},
},
}