From 75f6c249235b40b24e9ea1efdb1ff81dd76a8d68 Mon Sep 17 00:00:00 2001 From: Rob Scott Date: Tue, 30 Jul 2019 15:42:01 -0700 Subject: [PATCH] Adding EndpointSlice controller --- api/api-rules/violation_exceptions.list | 3 + cmd/kube-controller-manager/app/BUILD | 2 + .../app/controllermanager.go | 2 + cmd/kube-controller-manager/app/discovery.go | 44 ++ cmd/kube-controller-manager/app/options/BUILD | 3 + .../app/options/endpointslicecontroller.go | 81 +++ .../app/options/options.go | 9 + .../app/options/options_test.go | 9 + hack/.golint_failures | 1 + pkg/controller/.import-restrictions | 2 + pkg/controller/BUILD | 2 + pkg/controller/apis/config/BUILD | 1 + pkg/controller/apis/config/types.go | 4 + pkg/controller/apis/config/v1alpha1/BUILD | 1 + .../apis/config/v1alpha1/defaults.go | 3 + pkg/controller/apis/config/v1alpha1/doc.go | 1 + .../v1alpha1/zz_generated.conversion.go | 7 + .../apis/config/zz_generated.deepcopy.go | 1 + pkg/controller/endpoint/BUILD | 19 +- .../endpoint/endpoints_controller.go | 128 +--- .../endpoint/endpoints_controller_test.go | 87 +-- .../endpoint/trigger_time_tracker.go | 163 ----- .../endpoint/trigger_time_tracker_test.go | 204 ------ pkg/controller/endpointslice/BUILD | 89 +++ pkg/controller/endpointslice/config/BUILD | 29 + pkg/controller/endpointslice/config/doc.go | 19 + pkg/controller/endpointslice/config/types.go | 31 + .../endpointslice/config/v1alpha1/BUILD | 36 ++ .../config/v1alpha1/conversion.go | 40 ++ .../endpointslice/config/v1alpha1/defaults.go | 41 ++ .../endpointslice/config/v1alpha1/doc.go | 21 + .../endpointslice/config/v1alpha1/register.go | 34 + .../v1alpha1/zz_generated.conversion.go | 103 +++ .../config/v1alpha1/zz_generated.deepcopy.go | 21 + .../config/zz_generated.deepcopy.go | 37 ++ pkg/controller/endpointslice/endpointset.go | 96 +++ .../endpointslice/endpointslice_controller.go | 343 ++++++++++ .../endpointslice_controller_test.go | 326 ++++++++++ pkg/controller/endpointslice/reconciler.go | 301 +++++++++ .../endpointslice/reconciler_test.go | 607 ++++++++++++++++++ pkg/controller/endpointslice/utils.go | 261 ++++++++ pkg/controller/endpointslice/utils_test.go | 335 ++++++++++ pkg/controller/util/endpoint/BUILD | 49 ++ .../util/endpoint/controller_utils.go | 174 +++++ .../util/endpoint/controller_utils_test.go | 226 +++++++ .../util/endpoint/trigger_time_tracker.go | 161 +++++ .../endpoint/trigger_time_tracker_test.go | 204 ++++++ pkg/features/kube_features.go | 7 + .../rbac/bootstrappolicy/controller_policy.go | 11 + .../authorizer/rbac/bootstrappolicy/policy.go | 1 + .../config/v1alpha1/types.go | 17 + .../config/v1alpha1/zz_generated.deepcopy.go | 17 + 52 files changed, 3852 insertions(+), 562 deletions(-) create mode 100644 cmd/kube-controller-manager/app/discovery.go create mode 100644 cmd/kube-controller-manager/app/options/endpointslicecontroller.go delete mode 100644 pkg/controller/endpoint/trigger_time_tracker.go delete mode 100644 pkg/controller/endpoint/trigger_time_tracker_test.go create mode 100644 pkg/controller/endpointslice/BUILD create mode 100644 pkg/controller/endpointslice/config/BUILD create mode 100644 pkg/controller/endpointslice/config/doc.go create mode 100644 pkg/controller/endpointslice/config/types.go create mode 100644 pkg/controller/endpointslice/config/v1alpha1/BUILD create mode 100644 pkg/controller/endpointslice/config/v1alpha1/conversion.go create mode 100644 pkg/controller/endpointslice/config/v1alpha1/defaults.go create mode 100644 pkg/controller/endpointslice/config/v1alpha1/doc.go create mode 100644 pkg/controller/endpointslice/config/v1alpha1/register.go create mode 100644 pkg/controller/endpointslice/config/v1alpha1/zz_generated.conversion.go create mode 100644 pkg/controller/endpointslice/config/v1alpha1/zz_generated.deepcopy.go create mode 100644 pkg/controller/endpointslice/config/zz_generated.deepcopy.go create mode 100644 pkg/controller/endpointslice/endpointset.go create mode 100644 pkg/controller/endpointslice/endpointslice_controller.go create mode 100644 pkg/controller/endpointslice/endpointslice_controller_test.go create mode 100644 pkg/controller/endpointslice/reconciler.go create mode 100644 pkg/controller/endpointslice/reconciler_test.go create mode 100644 pkg/controller/endpointslice/utils.go create mode 100644 pkg/controller/endpointslice/utils_test.go create mode 100644 pkg/controller/util/endpoint/BUILD create mode 100644 pkg/controller/util/endpoint/controller_utils.go create mode 100644 pkg/controller/util/endpoint/controller_utils_test.go create mode 100644 pkg/controller/util/endpoint/trigger_time_tracker.go create mode 100644 pkg/controller/util/endpoint/trigger_time_tracker_test.go diff --git a/api/api-rules/violation_exceptions.list b/api/api-rules/violation_exceptions.list index c6b9c863a36..ff5a0fff58c 100644 --- a/api/api-rules/violation_exceptions.list +++ b/api/api-rules/violation_exceptions.list @@ -576,6 +576,8 @@ API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,D API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,DeprecatedControllerConfiguration,RegisterRetryCount API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,EndpointControllerConfiguration,ConcurrentEndpointSyncs API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,EndpointControllerConfiguration,EndpointUpdatesBatchPeriod +API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,EndpointSliceControllerConfiguration,ConcurrentServiceEndpointSyncs +API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,EndpointSliceControllerConfiguration,MaxEndpointsPerSlice API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,GarbageCollectorControllerConfiguration,ConcurrentGCSyncs API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,GarbageCollectorControllerConfiguration,EnableGarbageCollector API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,GarbageCollectorControllerConfiguration,GCIgnoredResources @@ -616,6 +618,7 @@ API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,K API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,DeploymentController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,DeprecatedController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,EndpointController +API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,EndpointSliceController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,GarbageCollectorController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,Generic API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,HPAController diff --git a/cmd/kube-controller-manager/app/BUILD b/cmd/kube-controller-manager/app/BUILD index b476d322778..0070c5c4603 100644 --- a/cmd/kube-controller-manager/app/BUILD +++ b/cmd/kube-controller-manager/app/BUILD @@ -11,6 +11,7 @@ go_library( "cloudproviders.go", "controllermanager.go", "core.go", + "discovery.go", "flags_providers.go", "import_known_versions.go", "plugins.go", @@ -54,6 +55,7 @@ go_library( "//pkg/controller/deployment:go_default_library", "//pkg/controller/disruption:go_default_library", "//pkg/controller/endpoint:go_default_library", + "//pkg/controller/endpointslice:go_default_library", "//pkg/controller/garbagecollector:go_default_library", "//pkg/controller/job:go_default_library", "//pkg/controller/namespace:go_default_library", diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 2ff0c41f6b5..18620607016 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -360,6 +360,7 @@ func KnownControllers() []string { // ControllersDisabledByDefault is the set of controllers which is disabled by default var ControllersDisabledByDefault = sets.NewString( "bootstrapsigner", + "endpointslice", "tokencleaner", ) @@ -372,6 +373,7 @@ const ( func NewControllerInitializers(loopMode ControllerLoopMode) map[string]InitFunc { controllers := map[string]InitFunc{} controllers["endpoint"] = startEndpointController + controllers["endpointslice"] = startEndpointSliceController controllers["replicationcontroller"] = startReplicationController controllers["podgc"] = startPodGCController controllers["resourcequota"] = startResourceQuotaController diff --git a/cmd/kube-controller-manager/app/discovery.go b/cmd/kube-controller-manager/app/discovery.go new file mode 100644 index 00000000000..c4b0ef5c5a6 --- /dev/null +++ b/cmd/kube-controller-manager/app/discovery.go @@ -0,0 +1,44 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package app implements a server that runs a set of active +// components. This includes replication controllers, service endpoints and +// nodes. +// +package app + +import ( + "net/http" + + "k8s.io/apimachinery/pkg/runtime/schema" + endpointslicecontroller "k8s.io/kubernetes/pkg/controller/endpointslice" +) + +func startEndpointSliceController(ctx ControllerContext) (http.Handler, bool, error) { + if !ctx.AvailableResources[schema.GroupVersionResource{Group: "discovery", Version: "v1alpha1", Resource: "endpointslices"}] { + return nil, false, nil + } + + go endpointslicecontroller.NewController( + ctx.InformerFactory.Core().V1().Pods(), + ctx.InformerFactory.Core().V1().Services(), + ctx.InformerFactory.Core().V1().Nodes(), + ctx.InformerFactory.Discovery().V1alpha1().EndpointSlices(), + ctx.ComponentConfig.EndpointSliceController.MaxEndpointsPerSlice, + ctx.ClientBuilder.ClientOrDie("endpointslice-controller"), + ).Run(int(ctx.ComponentConfig.EndpointSliceController.ConcurrentServiceEndpointSyncs), ctx.Stop) + return nil, true, nil +} diff --git a/cmd/kube-controller-manager/app/options/BUILD b/cmd/kube-controller-manager/app/options/BUILD index 5d6a80c5e1e..2b6aafdca56 100644 --- a/cmd/kube-controller-manager/app/options/BUILD +++ b/cmd/kube-controller-manager/app/options/BUILD @@ -15,6 +15,7 @@ go_library( "deploymentcontroller.go", "deprecatedcontroller.go", "endpointcontroller.go", + "endpointslicecontroller.go", "garbagecollectorcontroller.go", "hpacontroller.go", "jobcontroller.go", @@ -41,6 +42,7 @@ go_library( "//pkg/controller/daemon/config:go_default_library", "//pkg/controller/deployment/config:go_default_library", "//pkg/controller/endpoint/config:go_default_library", + "//pkg/controller/endpointslice/config:go_default_library", "//pkg/controller/garbagecollector:go_default_library", "//pkg/controller/garbagecollector/config:go_default_library", "//pkg/controller/job/config:go_default_library", @@ -100,6 +102,7 @@ go_test( "//pkg/controller/daemon/config:go_default_library", "//pkg/controller/deployment/config:go_default_library", "//pkg/controller/endpoint/config:go_default_library", + "//pkg/controller/endpointslice/config:go_default_library", "//pkg/controller/garbagecollector/config:go_default_library", "//pkg/controller/job/config:go_default_library", "//pkg/controller/namespace/config:go_default_library", diff --git a/cmd/kube-controller-manager/app/options/endpointslicecontroller.go b/cmd/kube-controller-manager/app/options/endpointslicecontroller.go new file mode 100644 index 00000000000..cc3c88d08e9 --- /dev/null +++ b/cmd/kube-controller-manager/app/options/endpointslicecontroller.go @@ -0,0 +1,81 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + "fmt" + "github.com/spf13/pflag" + + endpointsliceconfig "k8s.io/kubernetes/pkg/controller/endpointslice/config" +) + +const ( + minConcurrentServiceEndpointSyncs = 1 + maxConcurrentServiceEndpointSyncs = 50 + minMaxEndpointsPerSlice = 1 + maxMaxEndpointsPerSlice = 1000 +) + +// EndpointSliceControllerOptions holds the EndpointSliceController options. +type EndpointSliceControllerOptions struct { + *endpointsliceconfig.EndpointSliceControllerConfiguration +} + +// AddFlags adds flags related to EndpointSliceController for controller manager to the specified FlagSet. +func (o *EndpointSliceControllerOptions) AddFlags(fs *pflag.FlagSet) { + if o == nil { + return + } + + fs.Int32Var(&o.ConcurrentServiceEndpointSyncs, "concurrent-service-endpoint-syncs", o.ConcurrentServiceEndpointSyncs, "The number of service endpoint syncing operations that will be done concurrently. Larger number = faster endpoint slice updating, but more CPU (and network) load. Defaults to 5.") + fs.Int32Var(&o.MaxEndpointsPerSlice, "max-endpoints-per-slice", o.MaxEndpointsPerSlice, "The maximum number of endpoints that will be added to an EndpointSlice. More endpoints per slice will result in less endpoint slices, but larger resources. Defaults to 100.") +} + +// ApplyTo fills up EndpointSliceController config with options. +func (o *EndpointSliceControllerOptions) ApplyTo(cfg *endpointsliceconfig.EndpointSliceControllerConfiguration) error { + if o == nil { + return nil + } + + cfg.ConcurrentServiceEndpointSyncs = o.ConcurrentServiceEndpointSyncs + cfg.MaxEndpointsPerSlice = o.MaxEndpointsPerSlice + + return nil +} + +// Validate checks validation of EndpointSliceControllerOptions. +func (o *EndpointSliceControllerOptions) Validate() []error { + if o == nil { + return nil + } + + errs := []error{} + + if o.ConcurrentServiceEndpointSyncs < minConcurrentServiceEndpointSyncs { + errs = append(errs, fmt.Errorf("concurrent-service-endpoint-syncs must not be less than %d, but got %d", minConcurrentServiceEndpointSyncs, o.ConcurrentServiceEndpointSyncs)) + } else if o.ConcurrentServiceEndpointSyncs > maxConcurrentServiceEndpointSyncs { + errs = append(errs, fmt.Errorf("concurrent-service-endpoint-syncs must not be more than %d, but got %d", maxConcurrentServiceEndpointSyncs, o.ConcurrentServiceEndpointSyncs)) + } + + if o.MaxEndpointsPerSlice < minMaxEndpointsPerSlice { + errs = append(errs, fmt.Errorf("max-endpoints-per-slice must not be less than %d, but got %d", minMaxEndpointsPerSlice, o.MaxEndpointsPerSlice)) + } else if o.MaxEndpointsPerSlice > maxMaxEndpointsPerSlice { + errs = append(errs, fmt.Errorf("max-endpoints-per-slice must not be more than %d, but got %d", maxMaxEndpointsPerSlice, o.MaxEndpointsPerSlice)) + } + + return errs +} diff --git a/cmd/kube-controller-manager/app/options/options.go b/cmd/kube-controller-manager/app/options/options.go index c2ef328d6ec..a21a0fe6fc1 100644 --- a/cmd/kube-controller-manager/app/options/options.go +++ b/cmd/kube-controller-manager/app/options/options.go @@ -66,6 +66,7 @@ type KubeControllerManagerOptions struct { StatefulSetController *StatefulSetControllerOptions DeprecatedFlags *DeprecatedControllerOptions EndpointController *EndpointControllerOptions + EndpointSliceController *EndpointSliceControllerOptions GarbageCollectorController *GarbageCollectorControllerOptions HPAController *HPAControllerOptions JobController *JobControllerOptions @@ -124,6 +125,9 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) { EndpointController: &EndpointControllerOptions{ &componentConfig.EndpointController, }, + EndpointSliceController: &EndpointSliceControllerOptions{ + &componentConfig.EndpointSliceController, + }, GarbageCollectorController: &GarbageCollectorControllerOptions{ &componentConfig.GarbageCollectorController, }, @@ -226,6 +230,7 @@ func (s *KubeControllerManagerOptions) Flags(allControllers []string, disabledBy s.DaemonSetController.AddFlags(fss.FlagSet("daemonset controller")) s.DeprecatedFlags.AddFlags(fss.FlagSet("deprecated")) s.EndpointController.AddFlags(fss.FlagSet("endpoint controller")) + s.EndpointSliceController.AddFlags(fss.FlagSet("endpointslice controller")) s.GarbageCollectorController.AddFlags(fss.FlagSet("garbagecollector controller")) s.HPAController.AddFlags(fss.FlagSet("horizontalpodautoscaling controller")) s.JobController.AddFlags(fss.FlagSet("job controller")) @@ -277,6 +282,9 @@ func (s *KubeControllerManagerOptions) ApplyTo(c *kubecontrollerconfig.Config) e if err := s.EndpointController.ApplyTo(&c.ComponentConfig.EndpointController); err != nil { return err } + if err := s.EndpointSliceController.ApplyTo(&c.ComponentConfig.EndpointSliceController); err != nil { + return err + } if err := s.GarbageCollectorController.ApplyTo(&c.ComponentConfig.GarbageCollectorController); err != nil { return err } @@ -355,6 +363,7 @@ func (s *KubeControllerManagerOptions) Validate(allControllers []string, disable errs = append(errs, s.StatefulSetController.Validate()...) errs = append(errs, s.DeprecatedFlags.Validate()...) errs = append(errs, s.EndpointController.Validate()...) + errs = append(errs, s.EndpointSliceController.Validate()...) errs = append(errs, s.GarbageCollectorController.Validate()...) errs = append(errs, s.HPAController.Validate()...) errs = append(errs, s.JobController.Validate()...) diff --git a/cmd/kube-controller-manager/app/options/options_test.go b/cmd/kube-controller-manager/app/options/options_test.go index 67d9242e3b5..d1ede9e882e 100644 --- a/cmd/kube-controller-manager/app/options/options_test.go +++ b/cmd/kube-controller-manager/app/options/options_test.go @@ -35,6 +35,7 @@ import ( daemonconfig "k8s.io/kubernetes/pkg/controller/daemon/config" deploymentconfig "k8s.io/kubernetes/pkg/controller/deployment/config" endpointconfig "k8s.io/kubernetes/pkg/controller/endpoint/config" + endpointsliceconfig "k8s.io/kubernetes/pkg/controller/endpointslice/config" garbagecollectorconfig "k8s.io/kubernetes/pkg/controller/garbagecollector/config" jobconfig "k8s.io/kubernetes/pkg/controller/job/config" namespaceconfig "k8s.io/kubernetes/pkg/controller/namespace/config" @@ -74,6 +75,7 @@ func TestAddFlags(t *testing.T) { "--concurrent-deployment-syncs=10", "--concurrent-statefulset-syncs=15", "--concurrent-endpoint-syncs=10", + "--concurrent-service-endpoint-syncs=10", "--concurrent-gc-syncs=30", "--concurrent-namespace-syncs=20", "--concurrent-replicaset-syncs=10", @@ -111,6 +113,7 @@ func TestAddFlags(t *testing.T) { "--leader-elect-resource-lock=configmap", "--leader-elect-retry-period=5s", "--master=192.168.4.20", + "--max-endpoints-per-slice=200", "--min-resync-period=8h", "--namespace-sync-period=10m", "--node-cidr-mask-size=48", @@ -236,6 +239,12 @@ func TestAddFlags(t *testing.T) { ConcurrentEndpointSyncs: 10, }, }, + EndpointSliceController: &EndpointSliceControllerOptions{ + &endpointsliceconfig.EndpointSliceControllerConfiguration{ + ConcurrentServiceEndpointSyncs: 10, + MaxEndpointsPerSlice: 200, + }, + }, GarbageCollectorController: &GarbageCollectorControllerOptions{ &garbagecollectorconfig.GarbageCollectorControllerConfiguration{ ConcurrentGCSyncs: 30, diff --git a/hack/.golint_failures b/hack/.golint_failures index 124a8f018c9..9043d859031 100644 --- a/hack/.golint_failures +++ b/hack/.golint_failures @@ -66,6 +66,7 @@ pkg/controller/deployment/config/v1alpha1 pkg/controller/disruption pkg/controller/endpoint pkg/controller/endpoint/config/v1alpha1 +pkg/controller/endpointslice/config/v1alpha1 pkg/controller/garbagecollector pkg/controller/garbagecollector/config/v1alpha1 pkg/controller/job diff --git a/pkg/controller/.import-restrictions b/pkg/controller/.import-restrictions index 5e3487e4770..9c1d574230b 100644 --- a/pkg/controller/.import-restrictions +++ b/pkg/controller/.import-restrictions @@ -27,6 +27,7 @@ "k8s.io/api/certificates/v1beta1", "k8s.io/api/core/v1", "k8s.io/api/coordination/v1beta1", + "k8s.io/api/discovery/v1alpha1", "k8s.io/api/extensions/v1beta1", "k8s.io/api/policy/v1beta1", "k8s.io/api/rbac/v1", @@ -146,6 +147,7 @@ "k8s.io/client-go/listers/batch/v1", "k8s.io/client-go/listers/certificates/v1beta1", "k8s.io/client-go/listers/core/v1", + "k8s.io/client-go/listers/discovery/v1alpha1", "k8s.io/client-go/listers/coordination/v1beta1", "k8s.io/client-go/listers/extensions/v1beta1", "k8s.io/client-go/listers/policy/v1beta1", diff --git a/pkg/controller/BUILD b/pkg/controller/BUILD index 4a70c1ff19d..445672cd963 100644 --- a/pkg/controller/BUILD +++ b/pkg/controller/BUILD @@ -119,6 +119,7 @@ filegroup( "//pkg/controller/deployment:all-srcs", "//pkg/controller/disruption:all-srcs", "//pkg/controller/endpoint:all-srcs", + "//pkg/controller/endpointslice:all-srcs", "//pkg/controller/garbagecollector:all-srcs", "//pkg/controller/history:all-srcs", "//pkg/controller/job:all-srcs", @@ -137,6 +138,7 @@ filegroup( "//pkg/controller/testutil:all-srcs", "//pkg/controller/ttl:all-srcs", "//pkg/controller/ttlafterfinished:all-srcs", + "//pkg/controller/util/endpoint:all-srcs", "//pkg/controller/util/node:all-srcs", "//pkg/controller/volume/attachdetach:all-srcs", "//pkg/controller/volume/events:all-srcs", diff --git a/pkg/controller/apis/config/BUILD b/pkg/controller/apis/config/BUILD index 4d2dfa85ace..ae35f0823a6 100644 --- a/pkg/controller/apis/config/BUILD +++ b/pkg/controller/apis/config/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/controller/daemon/config:go_default_library", "//pkg/controller/deployment/config:go_default_library", "//pkg/controller/endpoint/config:go_default_library", + "//pkg/controller/endpointslice/config:go_default_library", "//pkg/controller/garbagecollector/config:go_default_library", "//pkg/controller/job/config:go_default_library", "//pkg/controller/namespace/config:go_default_library", diff --git a/pkg/controller/apis/config/types.go b/pkg/controller/apis/config/types.go index 17671118378..b9f1d0f8fcc 100644 --- a/pkg/controller/apis/config/types.go +++ b/pkg/controller/apis/config/types.go @@ -23,6 +23,7 @@ import ( daemonconfig "k8s.io/kubernetes/pkg/controller/daemon/config" deploymentconfig "k8s.io/kubernetes/pkg/controller/deployment/config" endpointconfig "k8s.io/kubernetes/pkg/controller/endpoint/config" + endpointsliceconfig "k8s.io/kubernetes/pkg/controller/endpointslice/config" garbagecollectorconfig "k8s.io/kubernetes/pkg/controller/garbagecollector/config" jobconfig "k8s.io/kubernetes/pkg/controller/job/config" namespaceconfig "k8s.io/kubernetes/pkg/controller/namespace/config" @@ -74,6 +75,9 @@ type KubeControllerManagerConfiguration struct { // EndpointControllerConfiguration holds configuration for EndpointController // related features. EndpointController endpointconfig.EndpointControllerConfiguration + // EndpointSliceControllerConfiguration holds configuration for + // EndpointSliceController related features. + EndpointSliceController endpointsliceconfig.EndpointSliceControllerConfiguration // GarbageCollectorControllerConfiguration holds configuration for // GarbageCollectorController related features. GarbageCollectorController garbagecollectorconfig.GarbageCollectorControllerConfiguration diff --git a/pkg/controller/apis/config/v1alpha1/BUILD b/pkg/controller/apis/config/v1alpha1/BUILD index dce61f10472..75f0021b17d 100644 --- a/pkg/controller/apis/config/v1alpha1/BUILD +++ b/pkg/controller/apis/config/v1alpha1/BUILD @@ -19,6 +19,7 @@ go_library( "//pkg/controller/daemon/config/v1alpha1:go_default_library", "//pkg/controller/deployment/config/v1alpha1:go_default_library", "//pkg/controller/endpoint/config/v1alpha1:go_default_library", + "//pkg/controller/endpointslice/config/v1alpha1:go_default_library", "//pkg/controller/garbagecollector/config/v1alpha1:go_default_library", "//pkg/controller/job/config/v1alpha1:go_default_library", "//pkg/controller/namespace/config/v1alpha1:go_default_library", diff --git a/pkg/controller/apis/config/v1alpha1/defaults.go b/pkg/controller/apis/config/v1alpha1/defaults.go index 12e47bfbecf..207145a341b 100644 --- a/pkg/controller/apis/config/v1alpha1/defaults.go +++ b/pkg/controller/apis/config/v1alpha1/defaults.go @@ -27,6 +27,7 @@ import ( daemonconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/daemon/config/v1alpha1" deploymentconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/deployment/config/v1alpha1" endpointconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpoint/config/v1alpha1" + endpointsliceconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpointslice/config/v1alpha1" garbagecollectorconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/garbagecollector/config/v1alpha1" jobconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/job/config/v1alpha1" namespaceconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/namespace/config/v1alpha1" @@ -78,6 +79,8 @@ func SetDefaults_KubeControllerManagerConfiguration(obj *kubectrlmgrconfigv1alph statefulsetconfigv1alpha1.RecommendedDefaultStatefulSetControllerConfiguration(&obj.StatefulSetController) // Use the default RecommendedDefaultEndpointControllerConfiguration options endpointconfigv1alpha1.RecommendedDefaultEndpointControllerConfiguration(&obj.EndpointController) + // Use the default RecommendedDefaultEndpointSliceControllerConfiguration options + endpointsliceconfigv1alpha1.RecommendedDefaultEndpointSliceControllerConfiguration(&obj.EndpointSliceController) // Use the default RecommendedDefaultGenericControllerManagerConfiguration options garbagecollectorconfigv1alpha1.RecommendedDefaultGarbageCollectorControllerConfiguration(&obj.GarbageCollectorController) // Use the default RecommendedDefaultJobControllerConfiguration options diff --git a/pkg/controller/apis/config/v1alpha1/doc.go b/pkg/controller/apis/config/v1alpha1/doc.go index 5a1df706512..bacd8cda6c4 100644 --- a/pkg/controller/apis/config/v1alpha1/doc.go +++ b/pkg/controller/apis/config/v1alpha1/doc.go @@ -21,6 +21,7 @@ limitations under the License. // +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/daemon/config/v1alpha1 // +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/deployment/config/v1alpha1 // +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/endpoint/config/v1alpha1 +// +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/endpointslice/config/v1alpha1 // +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/garbagecollector/config/v1alpha1 // +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/job/config/v1alpha1 // +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/namespace/config/v1alpha1 diff --git a/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go b/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go index 287411ce09f..fc05548f67f 100644 --- a/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go +++ b/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go @@ -33,6 +33,7 @@ import ( daemonconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/daemon/config/v1alpha1" deploymentconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/deployment/config/v1alpha1" endpointconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpoint/config/v1alpha1" + endpointsliceconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpointslice/config/v1alpha1" garbagecollectorconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/garbagecollector/config/v1alpha1" jobconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/job/config/v1alpha1" namespaceconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/namespace/config/v1alpha1" @@ -313,6 +314,9 @@ func autoConvert_v1alpha1_KubeControllerManagerConfiguration_To_config_KubeContr if err := endpointconfigv1alpha1.Convert_v1alpha1_EndpointControllerConfiguration_To_config_EndpointControllerConfiguration(&in.EndpointController, &out.EndpointController, s); err != nil { return err } + if err := endpointsliceconfigv1alpha1.Convert_v1alpha1_EndpointSliceControllerConfiguration_To_config_EndpointSliceControllerConfiguration(&in.EndpointSliceController, &out.EndpointSliceController, s); err != nil { + return err + } if err := garbagecollectorconfigv1alpha1.Convert_v1alpha1_GarbageCollectorControllerConfiguration_To_config_GarbageCollectorControllerConfiguration(&in.GarbageCollectorController, &out.GarbageCollectorController, s); err != nil { return err } @@ -391,6 +395,9 @@ func autoConvert_config_KubeControllerManagerConfiguration_To_v1alpha1_KubeContr if err := endpointconfigv1alpha1.Convert_config_EndpointControllerConfiguration_To_v1alpha1_EndpointControllerConfiguration(&in.EndpointController, &out.EndpointController, s); err != nil { return err } + if err := endpointsliceconfigv1alpha1.Convert_config_EndpointSliceControllerConfiguration_To_v1alpha1_EndpointSliceControllerConfiguration(&in.EndpointSliceController, &out.EndpointSliceController, s); err != nil { + return err + } if err := garbagecollectorconfigv1alpha1.Convert_config_GarbageCollectorControllerConfiguration_To_v1alpha1_GarbageCollectorControllerConfiguration(&in.GarbageCollectorController, &out.GarbageCollectorController, s); err != nil { return err } diff --git a/pkg/controller/apis/config/zz_generated.deepcopy.go b/pkg/controller/apis/config/zz_generated.deepcopy.go index 0be838b746f..6d4565fc976 100644 --- a/pkg/controller/apis/config/zz_generated.deepcopy.go +++ b/pkg/controller/apis/config/zz_generated.deepcopy.go @@ -115,6 +115,7 @@ func (in *KubeControllerManagerConfiguration) DeepCopyInto(out *KubeControllerMa out.StatefulSetController = in.StatefulSetController out.DeprecatedController = in.DeprecatedController out.EndpointController = in.EndpointController + out.EndpointSliceController = in.EndpointSliceController in.GarbageCollectorController.DeepCopyInto(&out.GarbageCollectorController) out.HPAController = in.HPAController out.JobController = in.JobController diff --git a/pkg/controller/endpoint/BUILD b/pkg/controller/endpoint/BUILD index 907fcecc517..96355148f78 100644 --- a/pkg/controller/endpoint/BUILD +++ b/pkg/controller/endpoint/BUILD @@ -1,25 +1,20 @@ -package(default_visibility = ["//visibility:public"]) - -load( - "@io_bazel_rules_go//go:def.bzl", - "go_library", - "go_test", -) +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "go_default_library", srcs = [ "doc.go", "endpoints_controller.go", - "trigger_time_tracker.go", ], importpath = "k8s.io/kubernetes/pkg/controller/endpoint", + visibility = ["//visibility:public"], deps = [ "//pkg/api/v1/endpoints:go_default_library", "//pkg/api/v1/pod:go_default_library", "//pkg/apis/core:go_default_library", "//pkg/apis/core/v1/helper:go_default_library", "//pkg/controller:go_default_library", + "//pkg/controller/util/endpoint:go_default_library", "//pkg/features:go_default_library", "//pkg/util/metrics:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", @@ -47,23 +42,20 @@ go_library( go_test( name = "go_default_test", - srcs = [ - "endpoints_controller_test.go", - "trigger_time_tracker_test.go", - ], + srcs = ["endpoints_controller_test.go"], embed = [":go_default_library"], deps = [ "//pkg/api/testapi:go_default_library", "//pkg/api/v1/endpoints:go_default_library", "//pkg/apis/core:go_default_library", "//pkg/controller:go_default_library", + "//pkg/controller/util/endpoint:go_default_library", "//pkg/features:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", "//staging/src/k8s.io/client-go/informers:go_default_library", @@ -89,4 +81,5 @@ filegroup( "//pkg/controller/endpoint/config:all-srcs", ], tags = ["automanaged"], + visibility = ["//visibility:public"], ) diff --git a/pkg/controller/endpoint/endpoints_controller.go b/pkg/controller/endpoint/endpoints_controller.go index 05d7a973a8a..165585754be 100644 --- a/pkg/controller/endpoint/endpoints_controller.go +++ b/pkg/controller/endpoint/endpoints_controller.go @@ -45,6 +45,7 @@ import ( api "k8s.io/kubernetes/pkg/apis/core" helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/controller" + endpointutil "k8s.io/kubernetes/pkg/controller/util/endpoint" "k8s.io/kubernetes/pkg/util/metrics" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -113,7 +114,7 @@ func NewEndpointController(podInformer coreinformers.PodInformer, serviceInforme e.endpointsLister = endpointsInformer.Lister() e.endpointsSynced = endpointsInformer.Informer().HasSynced - e.triggerTimeTracker = NewTriggerTimeTracker() + e.triggerTimeTracker = endpointutil.NewTriggerTimeTracker() e.eventBroadcaster = broadcaster e.eventRecorder = recorder @@ -161,7 +162,7 @@ type EndpointController struct { // triggerTimeTracker is an util used to compute and export the EndpointsLastChangeTriggerTime // annotation. - triggerTimeTracker *TriggerTimeTracker + triggerTimeTracker *endpointutil.TriggerTimeTracker endpointUpdatesBatchPeriod time.Duration } @@ -267,124 +268,33 @@ func podToEndpointAddress(pod *v1.Pod) *v1.EndpointAddress { }} } -func podChanged(oldPod, newPod *v1.Pod) bool { - // If the pod's deletion timestamp is set, remove endpoint from ready address. - if newPod.DeletionTimestamp != oldPod.DeletionTimestamp { - return true - } - // If the pod's readiness has changed, the associated endpoint address - // will move from the unready endpoints set to the ready endpoints. - // So for the purposes of an endpoint, a readiness change on a pod - // means we have a changed pod. - if podutil.IsPodReady(oldPod) != podutil.IsPodReady(newPod) { - return true - } - // Convert the pod to an EndpointAddress, clear inert fields, - // and see if they are the same. Even in a dual stack (multi pod IP) a pod - // will never change just one of its IPs, it will always change all. the below - // comparison to check if a pod has changed will still work - newEndpointAddress := podToEndpointAddress(newPod) - oldEndpointAddress := podToEndpointAddress(oldPod) - // Ignore the ResourceVersion because it changes - // with every pod update. This allows the comparison to - // show equality if all other relevant fields match. - newEndpointAddress.TargetRef.ResourceVersion = "" - oldEndpointAddress.TargetRef.ResourceVersion = "" - if reflect.DeepEqual(newEndpointAddress, oldEndpointAddress) { - // The pod has not changed in any way that impacts the endpoints - return false - } - return true -} +func endpointChanged(pod1, pod2 *v1.Pod) bool { + endpointAddress1 := podToEndpointAddress(pod1) + endpointAddress2 := podToEndpointAddress(pod2) -func determineNeededServiceUpdates(oldServices, services sets.String, podChanged bool) sets.String { - if podChanged { - // if the labels and pod changed, all services need to be updated - services = services.Union(oldServices) - } else { - // if only the labels changed, services not common to - // both the new and old service set (i.e the disjunctive union) - // need to be updated - services = services.Difference(oldServices).Union(oldServices.Difference(services)) - } - return services + endpointAddress1.TargetRef.ResourceVersion = "" + endpointAddress2.TargetRef.ResourceVersion = "" + + return !reflect.DeepEqual(endpointAddress1, endpointAddress2) } // When a pod is updated, figure out what services it used to be a member of // and what services it will be a member of, and enqueue the union of these. // old and cur must be *v1.Pod types. func (e *EndpointController) updatePod(old, cur interface{}) { - newPod := cur.(*v1.Pod) - oldPod := old.(*v1.Pod) - if newPod.ResourceVersion == oldPod.ResourceVersion { - // Periodic resync will send update events for all known pods. - // Two different versions of the same pod will always have different RVs. - return - } - - podChangedFlag := podChanged(oldPod, newPod) - - // Check if the pod labels have changed, indicating a possible - // change in the service membership - labelsChanged := false - if !reflect.DeepEqual(newPod.Labels, oldPod.Labels) || - !hostNameAndDomainAreEqual(newPod, oldPod) { - labelsChanged = true - } - - // If both the pod and labels are unchanged, no update is needed - if !podChangedFlag && !labelsChanged { - return - } - - services, err := e.getPodServiceMemberships(newPod) - if err != nil { - utilruntime.HandleError(fmt.Errorf("Unable to get pod %v/%v's service memberships: %v", newPod.Namespace, newPod.Name, err)) - return - } - - if labelsChanged { - oldServices, err := e.getPodServiceMemberships(oldPod) - if err != nil { - utilruntime.HandleError(fmt.Errorf("Unable to get pod %v/%v's service memberships: %v", oldPod.Namespace, oldPod.Name, err)) - return - } - services = determineNeededServiceUpdates(oldServices, services, podChangedFlag) - } - + services := endpointutil.GetServicesToUpdateOnPodChange(e.serviceLister, old, cur, endpointChanged) for key := range services { e.queue.AddAfter(key, e.endpointUpdatesBatchPeriod) } } -func hostNameAndDomainAreEqual(pod1, pod2 *v1.Pod) bool { - return pod1.Spec.Hostname == pod2.Spec.Hostname && - pod1.Spec.Subdomain == pod2.Spec.Subdomain -} - // When a pod is deleted, enqueue the services the pod used to be a member of. // obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. func (e *EndpointController) deletePod(obj interface{}) { - if _, ok := obj.(*v1.Pod); ok { - // Enqueue all the services that the pod used to be a member - // of. This happens to be exactly the same thing we do when a - // pod is added. - e.addPod(obj) - return + pod := endpointutil.GetPodFromDeleteAction(obj) + if pod != nil { + e.addPod(pod) } - // If we reached here it means the pod was deleted but its final state is unrecorded. - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - utilruntime.HandleError(fmt.Errorf("Couldn't get object from tombstone %#v", obj)) - return - } - pod, ok := tombstone.Obj.(*v1.Pod) - if !ok { - utilruntime.HandleError(fmt.Errorf("Tombstone contained object that is not a Pod: %#v", obj)) - return - } - klog.V(4).Infof("Enqueuing services of deleted pod %s/%s having final state unrecorded", pod.Namespace, pod.Name) - e.addPod(pod) } // obj could be an *v1.Service, or a DeletionFinalStateUnknown marker item. @@ -462,7 +372,7 @@ func (e *EndpointController) syncService(key string) error { if err != nil && !errors.IsNotFound(err) { return err } - e.triggerTimeTracker.DeleteEndpoints(namespace, name) + e.triggerTimeTracker.DeleteService(namespace, name) return nil } @@ -491,11 +401,11 @@ func (e *EndpointController) syncService(key string) error { } } - // We call ComputeEndpointsLastChangeTriggerTime here to make sure that the state of the trigger - // time tracker gets updated even if the sync turns out to be no-op and we don't update the - // endpoints object. + // We call ComputeEndpointLastChangeTriggerTime here to make sure that the + // state of the trigger time tracker gets updated even if the sync turns out + // to be no-op and we don't update the endpoints object. endpointsLastChangeTriggerTime := e.triggerTimeTracker. - ComputeEndpointsLastChangeTriggerTime(namespace, name, service, pods) + ComputeEndpointLastChangeTriggerTime(namespace, service, pods) subsets := []v1.EndpointSubset{} var totalReadyEps int diff --git a/pkg/controller/endpoint/endpoints_controller_test.go b/pkg/controller/endpoint/endpoints_controller_test.go index 565e40a2ba8..96ea347a5fa 100644 --- a/pkg/controller/endpoint/endpoints_controller_test.go +++ b/pkg/controller/endpoint/endpoints_controller_test.go @@ -29,7 +29,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/informers" @@ -42,6 +41,7 @@ import ( endptspkg "k8s.io/kubernetes/pkg/api/v1/endpoints" api "k8s.io/kubernetes/pkg/apis/core" "k8s.io/kubernetes/pkg/controller" + endpointutil "k8s.io/kubernetes/pkg/controller/util/endpoint" "k8s.io/kubernetes/pkg/features" ) @@ -1272,24 +1272,24 @@ func TestPodChanged(t *testing.T) { oldPod := pods[0].(*v1.Pod) newPod := oldPod.DeepCopy() - if podChanged(oldPod, newPod) { + if podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be unchanged for copied pod") } newPod.Spec.NodeName = "changed" - if !podChanged(oldPod, newPod) { + if !podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be changed for pod with NodeName changed") } newPod.Spec.NodeName = oldPod.Spec.NodeName newPod.ObjectMeta.ResourceVersion = "changed" - if podChanged(oldPod, newPod) { + if podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be unchanged for pod with only ResourceVersion changed") } newPod.ObjectMeta.ResourceVersion = oldPod.ObjectMeta.ResourceVersion newPod.Status.PodIP = "1.2.3.1" - if !podChanged(oldPod, newPod) { + if !podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be changed with pod IP address change") } newPod.Status.PodIP = oldPod.Status.PodIP @@ -1306,7 +1306,7 @@ func TestPodChanged(t *testing.T) { IP: "2000::1", }, } - if !podChanged(oldPod, newPod) { + if !podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be changed with adding secondary IP") } // reset @@ -1369,90 +1369,26 @@ func TestPodChanged(t *testing.T) { /* end dual stack testing */ newPod.ObjectMeta.Name = "wrong-name" - if !podChanged(oldPod, newPod) { + if !podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be changed with pod name change") } newPod.ObjectMeta.Name = oldPod.ObjectMeta.Name saveConditions := oldPod.Status.Conditions oldPod.Status.Conditions = nil - if !podChanged(oldPod, newPod) { + if !podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be changed with pod readiness change") } oldPod.Status.Conditions = saveConditions now := metav1.NewTime(time.Now().UTC()) newPod.ObjectMeta.DeletionTimestamp = &now - if !podChanged(oldPod, newPod) { + if !podChangedHelper(oldPod, newPod, endpointChanged) { t.Errorf("Expected pod to be changed with DeletionTimestamp change") } newPod.ObjectMeta.DeletionTimestamp = oldPod.ObjectMeta.DeletionTimestamp.DeepCopy() } -func TestDetermineNeededServiceUpdates(t *testing.T) { - testCases := []struct { - name string - a sets.String - b sets.String - union sets.String - xor sets.String - }{ - { - name: "no services changed", - a: sets.NewString("a", "b", "c"), - b: sets.NewString("a", "b", "c"), - xor: sets.NewString(), - union: sets.NewString("a", "b", "c"), - }, - { - name: "all old services removed, new services added", - a: sets.NewString("a", "b", "c"), - b: sets.NewString("d", "e", "f"), - xor: sets.NewString("a", "b", "c", "d", "e", "f"), - union: sets.NewString("a", "b", "c", "d", "e", "f"), - }, - { - name: "all old services removed, no new services added", - a: sets.NewString("a", "b", "c"), - b: sets.NewString(), - xor: sets.NewString("a", "b", "c"), - union: sets.NewString("a", "b", "c"), - }, - { - name: "no old services, but new services added", - a: sets.NewString(), - b: sets.NewString("a", "b", "c"), - xor: sets.NewString("a", "b", "c"), - union: sets.NewString("a", "b", "c"), - }, - { - name: "one service removed, one service added, two unchanged", - a: sets.NewString("a", "b", "c"), - b: sets.NewString("b", "c", "d"), - xor: sets.NewString("a", "d"), - union: sets.NewString("a", "b", "c", "d"), - }, - { - name: "no services", - a: sets.NewString(), - b: sets.NewString(), - xor: sets.NewString(), - union: sets.NewString(), - }, - } - for _, testCase := range testCases { - retval := determineNeededServiceUpdates(testCase.a, testCase.b, false) - if !retval.Equal(testCase.xor) { - t.Errorf("%s (with podChanged=false): expected: %v got: %v", testCase.name, testCase.xor.List(), retval.List()) - } - - retval = determineNeededServiceUpdates(testCase.a, testCase.b, true) - if !retval.Equal(testCase.union) { - t.Errorf("%s (with podChanged=true): expected: %v got: %v", testCase.name, testCase.union.List(), retval.List()) - } - } -} - func TestLastTriggerChangeTimeAnnotation(t *testing.T) { ns := "other" testServer, endpointsHandler := makeTestServer(t, ns) @@ -1999,3 +1935,8 @@ func TestSyncEndpointsServiceNotFound(t *testing.T) { endpointsHandler.ValidateRequestCount(t, 1) endpointsHandler.ValidateRequest(t, testapi.Default.ResourcePath("endpoints", ns, "foo"), "DELETE", nil) } + +func podChangedHelper(oldPod, newPod *v1.Pod, endpointChanged endpointutil.EndpointsMatch) bool { + podChanged, _ := endpointutil.PodChanged(oldPod, newPod, endpointChanged) + return podChanged +} diff --git a/pkg/controller/endpoint/trigger_time_tracker.go b/pkg/controller/endpoint/trigger_time_tracker.go deleted file mode 100644 index 248df21e2cb..00000000000 --- a/pkg/controller/endpoint/trigger_time_tracker.go +++ /dev/null @@ -1,163 +0,0 @@ -/* -Copyright 2019 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package endpoint - -import ( - "sync" - "time" - - "k8s.io/api/core/v1" - podutil "k8s.io/kubernetes/pkg/api/v1/pod" -) - -// TriggerTimeTracker is a util used to compute the EndpointsLastChangeTriggerTime annotation which -// is exported in the endpoints controller's sync function. -// See the documentation of the EndpointsLastChangeTriggerTime annotation for more details. -// -// Please note that this util may compute a wrong EndpointsLastChangeTriggerTime if a same object -// changes multiple times between two consecutive syncs. We're aware of this limitation but we -// decided to accept it, as fixing it would require a major rewrite of the endpoints controller and -// Informer framework. Such situations, i.e. frequent updates of the same object in a single sync -// period, should be relatively rare and therefore this util should provide a good approximation of -// the EndpointsLastChangeTriggerTime. -// TODO(mm4tt): Implement a more robust mechanism that is not subject to the above limitations. -type TriggerTimeTracker struct { - // endpointsStates is a map, indexed by Endpoints object key, storing the last known Endpoints - // object state observed during the most recent call of the ComputeEndpointsLastChangeTriggerTime - // function. - endpointsStates map[endpointsKey]endpointsState - - // mutex guarding the endpointsStates map. - mutex sync.Mutex -} - -// NewTriggerTimeTracker creates a new instance of the TriggerTimeTracker. -func NewTriggerTimeTracker() *TriggerTimeTracker { - return &TriggerTimeTracker{ - endpointsStates: make(map[endpointsKey]endpointsState), - } -} - -// endpointsKey is a key uniquely identifying an Endpoints object. -type endpointsKey struct { - // namespace, name composing a namespaced name - an unique identifier of every Endpoints object. - namespace, name string -} - -// endpointsState represents a state of an Endpoints object that is known to this util. -type endpointsState struct { - // lastServiceTriggerTime is a service trigger time observed most recently. - lastServiceTriggerTime time.Time - // lastPodTriggerTimes is a map (Pod name -> time) storing the pod trigger times that were - // observed during the most recent call of the ComputeEndpointsLastChangeTriggerTime function. - lastPodTriggerTimes map[string]time.Time -} - -// ComputeEndpointsLastChangeTriggerTime updates the state of the Endpoints object being synced -// and returns the time that should be exported as the EndpointsLastChangeTriggerTime annotation. -// -// If the method returns a 'zero' time the EndpointsLastChangeTriggerTime annotation shouldn't be -// exported. -// -// Please note that this function may compute a wrong EndpointsLastChangeTriggerTime value if the -// same object (pod/service) changes multiple times between two consecutive syncs. -// -// Important: This method is go-routing safe but only when called for different keys. The method -// shouldn't be called concurrently for the same key! This contract is fulfilled in the current -// implementation of the endpoints controller. -func (t *TriggerTimeTracker) ComputeEndpointsLastChangeTriggerTime( - namespace, name string, service *v1.Service, pods []*v1.Pod) time.Time { - - key := endpointsKey{namespace: namespace, name: name} - // As there won't be any concurrent calls for the same key, we need to guard access only to the - // endpointsStates map. - t.mutex.Lock() - state, wasKnown := t.endpointsStates[key] - t.mutex.Unlock() - - // Update the state before returning. - defer func() { - t.mutex.Lock() - t.endpointsStates[key] = state - t.mutex.Unlock() - }() - - // minChangedTriggerTime is the min trigger time of all trigger times that have changed since the - // last sync. - var minChangedTriggerTime time.Time - // TODO(mm4tt): If memory allocation / GC performance impact of recreating map in every call - // turns out to be too expensive, we should consider rewriting this to reuse the existing map. - podTriggerTimes := make(map[string]time.Time) - for _, pod := range pods { - if podTriggerTime := getPodTriggerTime(pod); !podTriggerTime.IsZero() { - podTriggerTimes[pod.Name] = podTriggerTime - if podTriggerTime.After(state.lastPodTriggerTimes[pod.Name]) { - // Pod trigger time has changed since the last sync, update minChangedTriggerTime. - minChangedTriggerTime = min(minChangedTriggerTime, podTriggerTime) - } - } - } - serviceTriggerTime := getServiceTriggerTime(service) - if serviceTriggerTime.After(state.lastServiceTriggerTime) { - // Service trigger time has changed since the last sync, update minChangedTriggerTime. - minChangedTriggerTime = min(minChangedTriggerTime, serviceTriggerTime) - } - - state.lastPodTriggerTimes = podTriggerTimes - state.lastServiceTriggerTime = serviceTriggerTime - - if !wasKnown { - // New Endpoints object / new Service, use Service creationTimestamp. - return service.CreationTimestamp.Time - } else { - // Regular update of the Endpoints object, return min of changed trigger times. - return minChangedTriggerTime - } -} - -// DeleteEndpoints deletes endpoints state stored in this util. -func (t *TriggerTimeTracker) DeleteEndpoints(namespace, name string) { - key := endpointsKey{namespace: namespace, name: name} - t.mutex.Lock() - defer t.mutex.Unlock() - delete(t.endpointsStates, key) -} - -// getPodTriggerTime returns the time of the pod change (trigger) that resulted or will result in -// the endpoints object change. -func getPodTriggerTime(pod *v1.Pod) (triggerTime time.Time) { - if readyCondition := podutil.GetPodReadyCondition(pod.Status); readyCondition != nil { - triggerTime = readyCondition.LastTransitionTime.Time - } - // TODO(#81360): Implement missing cases: deletionTime set, pod label change - return triggerTime -} - -// getServiceTriggerTime returns the time of the service change (trigger) that resulted or will -// result in the endpoints object change. -func getServiceTriggerTime(service *v1.Service) (triggerTime time.Time) { - // TODO(mm4tt): Ideally we should look at service.LastUpdateTime, but such thing doesn't exist. - return service.CreationTimestamp.Time -} - -// min returns minimum of the currentMin and newValue or newValue if the currentMin is not set. -func min(currentMin, newValue time.Time) time.Time { - if currentMin.IsZero() || newValue.Before(currentMin) { - return newValue - } - return currentMin -} diff --git a/pkg/controller/endpoint/trigger_time_tracker_test.go b/pkg/controller/endpoint/trigger_time_tracker_test.go deleted file mode 100644 index b8ebd68f0a5..00000000000 --- a/pkg/controller/endpoint/trigger_time_tracker_test.go +++ /dev/null @@ -1,204 +0,0 @@ -/* -Copyright 2019 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package endpoint - -import ( - "runtime" - "testing" - "time" - - "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/intstr" -) - -var ( - t0 = time.Date(2019, 01, 01, 0, 0, 0, 0, time.UTC) - t1 = t0.Add(time.Second) - t2 = t1.Add(time.Second) - t3 = t2.Add(time.Second) - t4 = t3.Add(time.Second) - t5 = t4.Add(time.Second) - - ns = "ns1" - name = "my-service" -) - -func TestNewService_NoPods(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t2) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service).expect(t2) -} - -func TestNewService_ExistingPods(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t3) - pod1 := createPod(ns, "pod1", t0) - pod2 := createPod(ns, "pod2", t1) - pod3 := createPod(ns, "pod3", t5) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2, pod3). - // Pods were created before service, but trigger time is the time when service was created. - expect(t3) -} - -func TestPodsAdded(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t0) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service).expect(t0) - - pod1 := createPod(ns, "pod1", t2) - pod2 := createPod(ns, "pod2", t1) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2).expect(t1) -} - -func TestPodsUpdated(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t0) - pod1 := createPod(ns, "pod1", t1) - pod2 := createPod(ns, "pod2", t2) - pod3 := createPod(ns, "pod3", t3) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2, pod3).expect(t0) - - pod1 = createPod(ns, "pod1", t5) - pod2 = createPod(ns, "pod2", t4) - // pod3 doesn't change. - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2, pod3).expect(t4) -} - -func TestPodsUpdated_NoOp(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t0) - pod1 := createPod(ns, "pod1", t1) - pod2 := createPod(ns, "pod2", t2) - pod3 := createPod(ns, "pod3", t3) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2, pod3).expect(t0) - - // Nothing has changed. - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2, pod3).expectNil() -} - -func TestPodDeletedThenAdded(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t0) - pod1 := createPod(ns, "pod1", t1) - pod2 := createPod(ns, "pod2", t2) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2).expect(t0) - - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1).expectNil() - - pod2 = createPod(ns, "pod2", t4) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2).expect(t4) -} - -func TestServiceDeletedThenAdded(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t0) - pod1 := createPod(ns, "pod1", t1) - pod2 := createPod(ns, "pod2", t2) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2).expect(t0) - - tester.DeleteEndpoints(ns, name) - - service = createService(ns, name, t3) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2).expect(t3) -} - -func TestServiceUpdated_NoPodChange(t *testing.T) { - tester := newTester(t) - - service := createService(ns, name, t0) - pod1 := createPod(ns, "pod1", t1) - pod2 := createPod(ns, "pod2", t2) - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2).expect(t0) - - // service's ports have changed. - service.Spec = v1.ServiceSpec{ - Selector: map[string]string{}, - Ports: []v1.ServicePort{{Port: 80, TargetPort: intstr.FromInt(8080), Protocol: "TCP"}}, - } - - // Currently we're not able to calculate trigger time for service updates, hence the returned - // value is a nil time. - tester.whenComputeEndpointsLastChangeTriggerTime(ns, name, service, pod1, pod2).expectNil() -} - -// ------- Test Utils ------- - -type tester struct { - *TriggerTimeTracker - t *testing.T -} - -func newTester(t *testing.T) *tester { - return &tester{NewTriggerTimeTracker(), t} -} - -func (t *tester) whenComputeEndpointsLastChangeTriggerTime( - namespace, name string, service *v1.Service, pods ...*v1.Pod) subject { - return subject{t.ComputeEndpointsLastChangeTriggerTime(namespace, name, service, pods), t.t} -} - -type subject struct { - got time.Time - t *testing.T -} - -func (s subject) expect(expected time.Time) { - s.doExpect(expected) -} - -func (s subject) expectNil() { - s.doExpect(time.Time{}) -} - -func (s subject) doExpect(expected time.Time) { - if s.got != expected { - _, fn, line, _ := runtime.Caller(2) - s.t.Errorf("Wrong trigger time in %s:%d expected %s, got %s", fn, line, expected, s.got) - } -} - -func createPod(namespace, name string, readyTime time.Time) *v1.Pod { - return &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name}, - Status: v1.PodStatus{Conditions: []v1.PodCondition{ - { - Type: v1.PodReady, - Status: v1.ConditionTrue, - LastTransitionTime: metav1.NewTime(readyTime), - }, - }, - }, - } -} - -func createService(namespace, name string, creationTime time.Time) *v1.Service { - return &v1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: name, - CreationTimestamp: metav1.NewTime(creationTime), - }, - } -} diff --git a/pkg/controller/endpointslice/BUILD b/pkg/controller/endpointslice/BUILD new file mode 100644 index 00000000000..5d10cc8ab19 --- /dev/null +++ b/pkg/controller/endpointslice/BUILD @@ -0,0 +1,89 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = [ + "endpointset.go", + "endpointslice_controller.go", + "reconciler.go", + "utils.go", + ], + importpath = "k8s.io/kubernetes/pkg/controller/endpointslice", + visibility = ["//visibility:public"], + deps = [ + "//pkg/api/v1/pod:go_default_library", + "//pkg/apis/core:go_default_library", + "//pkg/controller:go_default_library", + "//pkg/controller/util/endpoint:go_default_library", + "//pkg/util/hash:go_default_library", + "//pkg/util/metrics:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/api/discovery/v1alpha1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", + "//staging/src/k8s.io/client-go/informers/core/v1:go_default_library", + "//staging/src/k8s.io/client-go/informers/discovery/v1alpha1:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library", + "//staging/src/k8s.io/client-go/listers/core/v1:go_default_library", + "//staging/src/k8s.io/client-go/listers/discovery/v1alpha1:go_default_library", + "//staging/src/k8s.io/client-go/tools/cache:go_default_library", + "//staging/src/k8s.io/client-go/tools/record:go_default_library", + "//staging/src/k8s.io/client-go/util/workqueue:go_default_library", + "//vendor/k8s.io/klog:go_default_library", + ], +) + +go_test( + name = "go_default_test", + srcs = [ + "endpointslice_controller_test.go", + "reconciler_test.go", + "utils_test.go", + ], + embed = [":go_default_library"], + deps = [ + "//pkg/controller:go_default_library", + "//pkg/controller/util/endpoint:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/api/discovery/v1alpha1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library", + "//staging/src/k8s.io/client-go/informers:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library", + "//staging/src/k8s.io/client-go/listers/core/v1:go_default_library", + "//staging/src/k8s.io/client-go/testing:go_default_library", + "//staging/src/k8s.io/client-go/tools/cache:go_default_library", + "//vendor/github.com/stretchr/testify/assert:go_default_library", + "//vendor/k8s.io/utils/pointer:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [ + ":package-srcs", + "//pkg/controller/endpointslice/config:all-srcs", + ], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/controller/endpointslice/config/BUILD b/pkg/controller/endpointslice/config/BUILD new file mode 100644 index 00000000000..3c6e1b384d0 --- /dev/null +++ b/pkg/controller/endpointslice/config/BUILD @@ -0,0 +1,29 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = [ + "doc.go", + "types.go", + "zz_generated.deepcopy.go", + ], + importpath = "k8s.io/kubernetes/pkg/controller/endpointslice/config", + visibility = ["//visibility:public"], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [ + ":package-srcs", + "//pkg/controller/endpointslice/config/v1alpha1:all-srcs", + ], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/controller/endpointslice/config/doc.go b/pkg/controller/endpointslice/config/doc.go new file mode 100644 index 00000000000..fa47f0db09a --- /dev/null +++ b/pkg/controller/endpointslice/config/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package + +package config // import "k8s.io/kubernetes/pkg/controller/endpointslice/config" diff --git a/pkg/controller/endpointslice/config/types.go b/pkg/controller/endpointslice/config/types.go new file mode 100644 index 00000000000..1dd1296f29b --- /dev/null +++ b/pkg/controller/endpointslice/config/types.go @@ -0,0 +1,31 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +// EndpointSliceControllerConfiguration contains elements describing +// EndpointSliceController. +type EndpointSliceControllerConfiguration struct { + // concurrentServiceEndpointSyncs is the number of service endpoint syncing + // operations that will be done concurrently. Larger number = faster + // endpoint slice updating, but more CPU (and network) load. + ConcurrentServiceEndpointSyncs int32 + + // maxEndpointsPerSlice is the maximum number of endpoints that will be + // added to an EndpointSlice. More endpoints per slice will result in fewer + // and larger endpoint slices, but larger resources. + MaxEndpointsPerSlice int32 +} diff --git a/pkg/controller/endpointslice/config/v1alpha1/BUILD b/pkg/controller/endpointslice/config/v1alpha1/BUILD new file mode 100644 index 00000000000..ab99053fbc0 --- /dev/null +++ b/pkg/controller/endpointslice/config/v1alpha1/BUILD @@ -0,0 +1,36 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = [ + "conversion.go", + "defaults.go", + "doc.go", + "register.go", + "zz_generated.conversion.go", + "zz_generated.deepcopy.go", + ], + importpath = "k8s.io/kubernetes/pkg/controller/endpointslice/config/v1alpha1", + visibility = ["//visibility:public"], + deps = [ + "//pkg/controller/endpointslice/config:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/conversion:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", + "//staging/src/k8s.io/kube-controller-manager/config/v1alpha1:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/controller/endpointslice/config/v1alpha1/conversion.go b/pkg/controller/endpointslice/config/v1alpha1/conversion.go new file mode 100644 index 00000000000..637f36640d5 --- /dev/null +++ b/pkg/controller/endpointslice/config/v1alpha1/conversion.go @@ -0,0 +1,40 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/conversion" + "k8s.io/kube-controller-manager/config/v1alpha1" + endpointsliceconfig "k8s.io/kubernetes/pkg/controller/endpointslice/config" +) + +// Important! The public back-and-forth conversion functions for the types in +// this package with EndpointControllerConfiguration types need to be manually +// exposed like this in order for other packages that reference this package to +// be able to call these conversion functions in an autogenerated manner. +// TODO: Fix the bug in conversion-gen so it automatically discovers these +// Convert_* functions in autogenerated code as well. + +// Convert_v1alpha1_EndpointSliceControllerConfiguration_To_config_EndpointSliceControllerConfiguration is an autogenerated conversion function. +func Convert_v1alpha1_EndpointSliceControllerConfiguration_To_config_EndpointSliceControllerConfiguration(in *v1alpha1.EndpointSliceControllerConfiguration, out *endpointsliceconfig.EndpointSliceControllerConfiguration, s conversion.Scope) error { + return autoConvert_v1alpha1_EndpointSliceControllerConfiguration_To_config_EndpointSliceControllerConfiguration(in, out, s) +} + +// Convert_config_EndpointSliceControllerConfiguration_To_v1alpha1_EndpointSliceControllerConfiguration is an autogenerated conversion function. +func Convert_config_EndpointSliceControllerConfiguration_To_v1alpha1_EndpointSliceControllerConfiguration(in *endpointsliceconfig.EndpointSliceControllerConfiguration, out *v1alpha1.EndpointSliceControllerConfiguration, s conversion.Scope) error { + return autoConvert_config_EndpointSliceControllerConfiguration_To_v1alpha1_EndpointSliceControllerConfiguration(in, out, s) +} diff --git a/pkg/controller/endpointslice/config/v1alpha1/defaults.go b/pkg/controller/endpointslice/config/v1alpha1/defaults.go new file mode 100644 index 00000000000..fa6340656c9 --- /dev/null +++ b/pkg/controller/endpointslice/config/v1alpha1/defaults.go @@ -0,0 +1,41 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + kubectrlmgrconfigv1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1" +) + +// RecommendedDefaultEndpointSliceControllerConfiguration defaults a pointer to +// a EndpointSliceControllerConfiguration struct. This will set the recommended +// default values, but they may be subject to change between API versions. This +// function is intentionally not registered in the scheme as a "normal" +// `SetDefaults_Foo` function to allow consumers of this type to set whatever +// defaults for their embedded configs. Forcing consumers to use these defaults +// would be problematic as defaulting in the scheme is done as part of the +// conversion, and there would be no easy way to opt-out. Instead, if you want +// to use this defaulting method run it in your wrapper struct of this type in +// its `SetDefaults_` method. +func RecommendedDefaultEndpointSliceControllerConfiguration(obj *kubectrlmgrconfigv1alpha1.EndpointSliceControllerConfiguration) { + if obj.ConcurrentServiceEndpointSyncs == 0 { + obj.ConcurrentServiceEndpointSyncs = 5 + } + + if obj.MaxEndpointsPerSlice == 0 { + obj.MaxEndpointsPerSlice = 100 + } +} diff --git a/pkg/controller/endpointslice/config/v1alpha1/doc.go b/pkg/controller/endpointslice/config/v1alpha1/doc.go new file mode 100644 index 00000000000..2ef2a1fee4b --- /dev/null +++ b/pkg/controller/endpointslice/config/v1alpha1/doc.go @@ -0,0 +1,21 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package +// +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/endpointslice/config +// +k8s:conversion-gen-external-types=k8s.io/kube-controller-manager/config/v1alpha1 + +package v1alpha1 // import "k8s.io/kubernetes/pkg/controller/endpointslice/config/v1alpha1" diff --git a/pkg/controller/endpointslice/config/v1alpha1/register.go b/pkg/controller/endpointslice/config/v1alpha1/register.go new file mode 100644 index 00000000000..360b9d41a85 --- /dev/null +++ b/pkg/controller/endpointslice/config/v1alpha1/register.go @@ -0,0 +1,34 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime" +) + +var ( + // SchemeBuilder is the scheme builder with scheme init functions to run for + // this API package + SchemeBuilder runtime.SchemeBuilder + // localSchemeBuilder extends the SchemeBuilder instance with the external + // types. In this package, defaulting and conversion init funcs are + // registered as well. + localSchemeBuilder = &SchemeBuilder + // AddToScheme is a global function that registers this API group & version + // to a scheme + AddToScheme = localSchemeBuilder.AddToScheme +) diff --git a/pkg/controller/endpointslice/config/v1alpha1/zz_generated.conversion.go b/pkg/controller/endpointslice/config/v1alpha1/zz_generated.conversion.go new file mode 100644 index 00000000000..f31f67faa90 --- /dev/null +++ b/pkg/controller/endpointslice/config/v1alpha1/zz_generated.conversion.go @@ -0,0 +1,103 @@ +// +build !ignore_autogenerated + +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by conversion-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + conversion "k8s.io/apimachinery/pkg/conversion" + runtime "k8s.io/apimachinery/pkg/runtime" + v1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1" + config "k8s.io/kubernetes/pkg/controller/endpointslice/config" +) + +func init() { + localSchemeBuilder.Register(RegisterConversions) +} + +// RegisterConversions adds conversion functions to the given scheme. +// Public to allow building arbitrary schemes. +func RegisterConversions(s *runtime.Scheme) error { + if err := s.AddGeneratedConversionFunc((*v1alpha1.EndpointSliceControllerConfiguration)(nil), (*config.EndpointSliceControllerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_EndpointSliceControllerConfiguration_To_config_EndpointSliceControllerConfiguration(a.(*v1alpha1.EndpointSliceControllerConfiguration), b.(*config.EndpointSliceControllerConfiguration), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*config.EndpointSliceControllerConfiguration)(nil), (*v1alpha1.EndpointSliceControllerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_config_EndpointSliceControllerConfiguration_To_v1alpha1_EndpointSliceControllerConfiguration(a.(*config.EndpointSliceControllerConfiguration), b.(*v1alpha1.EndpointSliceControllerConfiguration), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1alpha1.GroupResource)(nil), (*v1.GroupResource)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_GroupResource_To_v1_GroupResource(a.(*v1alpha1.GroupResource), b.(*v1.GroupResource), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1.GroupResource)(nil), (*v1alpha1.GroupResource)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1_GroupResource_To_v1alpha1_GroupResource(a.(*v1.GroupResource), b.(*v1alpha1.GroupResource), scope) + }); err != nil { + return err + } + if err := s.AddConversionFunc((*config.EndpointSliceControllerConfiguration)(nil), (*v1alpha1.EndpointSliceControllerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_config_EndpointSliceControllerConfiguration_To_v1alpha1_EndpointSliceControllerConfiguration(a.(*config.EndpointSliceControllerConfiguration), b.(*v1alpha1.EndpointSliceControllerConfiguration), scope) + }); err != nil { + return err + } + if err := s.AddConversionFunc((*v1alpha1.EndpointSliceControllerConfiguration)(nil), (*config.EndpointSliceControllerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_EndpointSliceControllerConfiguration_To_config_EndpointSliceControllerConfiguration(a.(*v1alpha1.EndpointSliceControllerConfiguration), b.(*config.EndpointSliceControllerConfiguration), scope) + }); err != nil { + return err + } + return nil +} + +func autoConvert_v1alpha1_EndpointSliceControllerConfiguration_To_config_EndpointSliceControllerConfiguration(in *v1alpha1.EndpointSliceControllerConfiguration, out *config.EndpointSliceControllerConfiguration, s conversion.Scope) error { + out.ConcurrentServiceEndpointSyncs = in.ConcurrentServiceEndpointSyncs + out.MaxEndpointsPerSlice = in.MaxEndpointsPerSlice + return nil +} + +func autoConvert_config_EndpointSliceControllerConfiguration_To_v1alpha1_EndpointSliceControllerConfiguration(in *config.EndpointSliceControllerConfiguration, out *v1alpha1.EndpointSliceControllerConfiguration, s conversion.Scope) error { + out.ConcurrentServiceEndpointSyncs = in.ConcurrentServiceEndpointSyncs + out.MaxEndpointsPerSlice = in.MaxEndpointsPerSlice + return nil +} + +func autoConvert_v1alpha1_GroupResource_To_v1_GroupResource(in *v1alpha1.GroupResource, out *v1.GroupResource, s conversion.Scope) error { + out.Group = in.Group + out.Resource = in.Resource + return nil +} + +// Convert_v1alpha1_GroupResource_To_v1_GroupResource is an autogenerated conversion function. +func Convert_v1alpha1_GroupResource_To_v1_GroupResource(in *v1alpha1.GroupResource, out *v1.GroupResource, s conversion.Scope) error { + return autoConvert_v1alpha1_GroupResource_To_v1_GroupResource(in, out, s) +} + +func autoConvert_v1_GroupResource_To_v1alpha1_GroupResource(in *v1.GroupResource, out *v1alpha1.GroupResource, s conversion.Scope) error { + out.Group = in.Group + out.Resource = in.Resource + return nil +} + +// Convert_v1_GroupResource_To_v1alpha1_GroupResource is an autogenerated conversion function. +func Convert_v1_GroupResource_To_v1alpha1_GroupResource(in *v1.GroupResource, out *v1alpha1.GroupResource, s conversion.Scope) error { + return autoConvert_v1_GroupResource_To_v1alpha1_GroupResource(in, out, s) +} diff --git a/pkg/controller/endpointslice/config/v1alpha1/zz_generated.deepcopy.go b/pkg/controller/endpointslice/config/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000000..0ec19467c40 --- /dev/null +++ b/pkg/controller/endpointslice/config/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,21 @@ +// +build !ignore_autogenerated + +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1alpha1 diff --git a/pkg/controller/endpointslice/config/zz_generated.deepcopy.go b/pkg/controller/endpointslice/config/zz_generated.deepcopy.go new file mode 100644 index 00000000000..4b3289de43b --- /dev/null +++ b/pkg/controller/endpointslice/config/zz_generated.deepcopy.go @@ -0,0 +1,37 @@ +// +build !ignore_autogenerated + +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package config + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EndpointSliceControllerConfiguration) DeepCopyInto(out *EndpointSliceControllerConfiguration) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointSliceControllerConfiguration. +func (in *EndpointSliceControllerConfiguration) DeepCopy() *EndpointSliceControllerConfiguration { + if in == nil { + return nil + } + out := new(EndpointSliceControllerConfiguration) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/controller/endpointslice/endpointset.go b/pkg/controller/endpointslice/endpointset.go new file mode 100644 index 00000000000..604f38ec82c --- /dev/null +++ b/pkg/controller/endpointslice/endpointset.go @@ -0,0 +1,96 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpointslice + +import ( + "sort" + + discovery "k8s.io/api/discovery/v1alpha1" +) + +// endpointHash is used to uniquely identify endpoints. Only including addresses +// and hostnames as unique identifiers allows us to do more in place updates +// should attributes such as topology, conditions, or targetRef change. +type endpointHash string +type endpointHashObj struct { + Addresses []string + Hostname string +} + +func hashEndpoint(endpoint *discovery.Endpoint) endpointHash { + sort.Strings(endpoint.Addresses) + hashObj := endpointHashObj{Addresses: endpoint.Addresses} + if endpoint.Hostname != nil { + hashObj.Hostname = *endpoint.Hostname + } + + return endpointHash(deepHashObjectToString(hashObj)) +} + +// endpointSet provides simple methods for comparing sets of Endpoints. +type endpointSet map[endpointHash]*discovery.Endpoint + +// Insert adds items to the set. +func (s endpointSet) Insert(items ...*discovery.Endpoint) endpointSet { + for _, item := range items { + s[hashEndpoint(item)] = item + } + return s +} + +// Delete removes all items from the set. +func (s endpointSet) Delete(items ...*discovery.Endpoint) endpointSet { + for _, item := range items { + delete(s, hashEndpoint(item)) + } + return s +} + +// Has returns true if and only if item is contained in the set. +func (s endpointSet) Has(item *discovery.Endpoint) bool { + _, contained := s[hashEndpoint(item)] + return contained +} + +// Returns an endpoint matching the hash if contained in the set. +func (s endpointSet) Get(item *discovery.Endpoint) *discovery.Endpoint { + got, _ := s[hashEndpoint(item)] + return got +} + +// UnsortedList returns the slice with contents in random order. +func (s endpointSet) UnsortedList() []*discovery.Endpoint { + endpoints := make([]*discovery.Endpoint, 0, len(s)) + for _, endpoint := range s { + endpoints = append(endpoints, endpoint) + } + return endpoints +} + +// Returns a single element from the set. +func (s endpointSet) PopAny() (*discovery.Endpoint, bool) { + for _, endpoint := range s { + s.Delete(endpoint) + return endpoint, true + } + return nil, false +} + +// Len returns the size of the set. +func (s endpointSet) Len() int { + return len(s) +} diff --git a/pkg/controller/endpointslice/endpointslice_controller.go b/pkg/controller/endpointslice/endpointslice_controller.go new file mode 100644 index 00000000000..d5fef2e1154 --- /dev/null +++ b/pkg/controller/endpointslice/endpointslice_controller.go @@ -0,0 +1,343 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpointslice + +import ( + "fmt" + "time" + + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + discoveryinformers "k8s.io/client-go/informers/discovery/v1alpha1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + discoverylisters "k8s.io/client-go/listers/discovery/v1alpha1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog" + "k8s.io/kubernetes/pkg/controller" + endpointutil "k8s.io/kubernetes/pkg/controller/util/endpoint" + "k8s.io/kubernetes/pkg/util/metrics" +) + +const ( + // serviceNameLabel is used to indicate the name of a Kubernetes service + // associated with an EndpointSlice. + serviceNameLabel = "kubernetes.io/service-name" + + // maxRetries is the number of times a service will be retried before it is + // dropped out of the queue. Any sync error, such as a failure to create or + // update an EndpointSlice could trigger a retry. With the current + // rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers + // represent the sequence of delays between successive queuings of a + // service. + // + // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, + // 10.2s, 20.4s, 41s, 82s + maxRetries = 15 +) + +// NewController creates and initializes a new Controller +func NewController(podInformer coreinformers.PodInformer, + serviceInformer coreinformers.ServiceInformer, + nodeInformer coreinformers.NodeInformer, + esInformer discoveryinformers.EndpointSliceInformer, + maxEndpointsPerSlice int32, + client clientset.Interface, +) *Controller { + broadcaster := record.NewBroadcaster() + broadcaster.StartLogging(klog.Infof) + broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: client.CoreV1().Events("")}) + recorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "endpoint-slice-controller"}) + + if client != nil && client.CoreV1().RESTClient().GetRateLimiter() != nil { + metrics.RegisterMetricAndTrackRateLimiterUsage("endpoint_slice_controller", client.DiscoveryV1alpha1().RESTClient().GetRateLimiter()) + } + + c := &Controller{ + client: client, + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "endpoint_slice"), + workerLoopPeriod: time.Second, + } + + serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: c.enqueueService, + UpdateFunc: func(old, cur interface{}) { + c.enqueueService(cur) + }, + DeleteFunc: c.enqueueService, + }) + c.serviceLister = serviceInformer.Lister() + c.servicesSynced = serviceInformer.Informer().HasSynced + + podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: c.addPod, + UpdateFunc: c.updatePod, + DeleteFunc: c.deletePod, + }) + c.podLister = podInformer.Lister() + c.podsSynced = podInformer.Informer().HasSynced + + c.nodeLister = nodeInformer.Lister() + c.nodesSynced = nodeInformer.Informer().HasSynced + + c.endpointSliceLister = esInformer.Lister() + c.endpointSlicesSynced = esInformer.Informer().HasSynced + + c.maxEndpointsPerSlice = maxEndpointsPerSlice + + c.reconciler = &reconciler{ + client: c.client, + nodeLister: c.nodeLister, + maxEndpointsPerSlice: c.maxEndpointsPerSlice, + } + c.triggerTimeTracker = endpointutil.NewTriggerTimeTracker() + + c.eventBroadcaster = broadcaster + c.eventRecorder = recorder + + return c +} + +// Controller manages selector-based service endpoint slices +type Controller struct { + client clientset.Interface + eventBroadcaster record.EventBroadcaster + eventRecorder record.EventRecorder + + // serviceLister is able to list/get services and is populated by the + // shared informer passed to NewController + serviceLister corelisters.ServiceLister + // servicesSynced returns true if the service shared informer has been synced at least once. + // Added as a member to the struct to allow injection for testing. + servicesSynced cache.InformerSynced + + // podLister is able to list/get pods and is populated by the + // shared informer passed to NewController + podLister corelisters.PodLister + // podsSynced returns true if the pod shared informer has been synced at least once. + // Added as a member to the struct to allow injection for testing. + podsSynced cache.InformerSynced + + // endpointSliceLister is able to list/get pods and is populated by the + // shared informer passed to NewController + endpointSliceLister discoverylisters.EndpointSliceLister + // endpointSlicesSynced returns true if the endpoint slice shared informer has been synced at least once. + // Added as a member to the struct to allow injection for testing. + endpointSlicesSynced cache.InformerSynced + + // nodeLister is able to list/get pods and is populated by the + // shared informer passed to NewController + nodeLister corelisters.NodeLister + // nodesSynced returns true if the node shared informer has been synced at least once. + // Added as a member to the struct to allow injection for testing. + nodesSynced cache.InformerSynced + + // reconciler is an util used to reconcile EndpointSlice changes. + reconciler *reconciler + + // triggerTimeTracker is an util used to compute and export the + // EndpointsLastChangeTriggerTime annotation. + triggerTimeTracker *endpointutil.TriggerTimeTracker + + // Services that need to be updated. A channel is inappropriate here, + // because it allowes services with lots of pods to be serviced much + // more often than services with few pods; it also would cause a + // service that's inserted multiple times to be processed more than + // necessary. + queue workqueue.RateLimitingInterface + + // maxEndpointsPerSlice references the maximum number of endpoints that + // should be added to an EndpointSlice + maxEndpointsPerSlice int32 + + // workerLoopPeriod is the time between worker runs. The workers + // process the queue of service and pod changes + workerLoopPeriod time.Duration +} + +// Run will not return until stopCh is closed. +func (c *Controller) Run(workers int, stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + defer c.queue.ShutDown() + + klog.Infof("Starting endpoint controller") + defer klog.Infof("Shutting down endpoint controller") + + if !cache.WaitForNamedCacheSync("endpoint_slice", stopCh, c.podsSynced, c.servicesSynced) { + return + } + + for i := 0; i < workers; i++ { + go wait.Until(c.worker, c.workerLoopPeriod, stopCh) + } + + go func() { + defer utilruntime.HandleCrash() + }() + + <-stopCh +} + +// worker runs a worker thread that just dequeues items, processes them, and +// marks them done. You may run as many of these in parallel as you wish; the +// workqueue guarantees that they will not end up processing the same service +// at the same time +func (c *Controller) worker() { + for c.processNextWorkItem() { + } +} + +func (c *Controller) processNextWorkItem() bool { + cKey, quit := c.queue.Get() + if quit { + return false + } + defer c.queue.Done(cKey) + + err := c.syncService(cKey.(string)) + c.handleErr(err, cKey) + + return true +} + +func (c *Controller) handleErr(err error, key interface{}) { + if err == nil { + c.queue.Forget(key) + return + } + + if c.queue.NumRequeues(key) < maxRetries { + klog.Warningf("Error syncing endpoint slices for service %q, retrying. Error: %v", key, err) + c.queue.AddRateLimited(key) + return + } + + klog.Warningf("Retry budget exceeded, dropping service %q out of the queue: %v", key, err) + c.queue.Forget(key) + utilruntime.HandleError(err) +} + +func (c *Controller) syncService(key string) error { + startTime := time.Now() + defer func() { + klog.V(4).Infof("Finished syncing service %q endpoint slices. (%v)", key, time.Since(startTime)) + }() + + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + + service, err := c.serviceLister.Services(namespace).Get(name) + if err != nil { + if apierrors.IsNotFound(err) { + c.triggerTimeTracker.DeleteService(namespace, name) + } + return err + } + + if service.Spec.Selector == nil { + // services without a selector receive no endpoint slices from this controller; + // these services will receive endpoint slices that are created out-of-band via the REST API. + return nil + } + + klog.V(5).Infof("About to update endpoint slices for service %q", key) + + podLabelSelector := labels.Set(service.Spec.Selector).AsSelectorPreValidated() + pods, err := c.podLister.Pods(service.Namespace).List(podLabelSelector) + if err != nil { + // Since we're getting stuff from a local cache, it is basically + // impossible to get this error. + c.eventRecorder.Eventf(service, v1.EventTypeWarning, "FailedToListPods", + "Error listing Pods for Service %s/%s: %v", service.Namespace, service.Name, err) + return err + } + + esLabelSelector := labels.Set(map[string]string{serviceNameLabel: service.Name}).AsSelectorPreValidated() + endpointSlices, err := c.endpointSliceLister.EndpointSlices(service.Namespace).List(esLabelSelector) + + if err != nil { + // Since we're getting stuff from a local cache, it is basically + // impossible to get this error. + c.eventRecorder.Eventf(service, v1.EventTypeWarning, "FailedToListEndpointSlices", + "Error listing Endpoint Slices for Service %s/%s: %v", service.Namespace, service.Name, err) + return err + } + + // We call ComputeEndpointLastChangeTriggerTime here to make sure that the + // state of the trigger time tracker gets updated even if the sync turns out + // to be no-op and we don't update the EndpointSlice objects. + lastChangeTriggerTime := c.triggerTimeTracker. + ComputeEndpointLastChangeTriggerTime(namespace, service, pods) + + err = c.reconciler.reconcile(service, pods, endpointSlices, lastChangeTriggerTime) + if err != nil { + c.eventRecorder.Eventf(service, v1.EventTypeWarning, "FailedToUpdateEndpointSlices", + "Error updating Endpoint Slices for Service %s/%s: %v", service.Namespace, service.Name, err) + return err + } + + return nil +} + +// obj could be a *v1.Service or a DeletionalFinalStateUnknown marker item +func (c *Controller) enqueueService(obj interface{}) { + key, err := controller.KeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Couldn't get key for object")) + return + } + + c.queue.Add(key) +} + +func (c *Controller) addPod(obj interface{}) { + pod := obj.(*v1.Pod) + services, err := endpointutil.GetPodServiceMemberships(c.serviceLister, pod) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Unable to get pod %s/%s's service memberships: %v", pod.Namespace, pod.Name, err)) + return + } + for key := range services { + c.queue.Add(key) + } +} + +func (c *Controller) updatePod(old, cur interface{}) { + services := endpointutil.GetServicesToUpdateOnPodChange(c.serviceLister, old, cur, podEndpointChanged) + for key := range services { + c.queue.Add(key) + } +} + +// When a pod is deleted, enqueue the services the pod used to be a member of +// obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. +func (c *Controller) deletePod(obj interface{}) { + pod := endpointutil.GetPodFromDeleteAction(obj) + if pod != nil { + c.addPod(pod) + } +} diff --git a/pkg/controller/endpointslice/endpointslice_controller_test.go b/pkg/controller/endpointslice/endpointslice_controller_test.go new file mode 100644 index 00000000000..15524eea21f --- /dev/null +++ b/pkg/controller/endpointslice/endpointslice_controller_test.go @@ -0,0 +1,326 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpointslice + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/cache" + "k8s.io/kubernetes/pkg/controller" + endpointutil "k8s.io/kubernetes/pkg/controller/util/endpoint" + utilpointer "k8s.io/utils/pointer" +) + +// Most of the tests related to EndpointSlice allocation can be found in reconciler_test.go +// Tests here primarily focus on unique controller functionality before the reconciler begins + +var alwaysReady = func() bool { return true } + +type endpointSliceController struct { + *Controller + endpointSliceStore cache.Store + nodeStore cache.Store + podStore cache.Store + serviceStore cache.Store +} + +func newController(nodeNames []string) (*fake.Clientset, *endpointSliceController) { + client := newClientset() + informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc()) + nodeInformer := informerFactory.Core().V1().Nodes() + indexer := nodeInformer.Informer().GetIndexer() + for _, nodeName := range nodeNames { + indexer.Add(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}) + } + + esController := NewController( + informerFactory.Core().V1().Pods(), + informerFactory.Core().V1().Services(), + nodeInformer, + informerFactory.Discovery().V1alpha1().EndpointSlices(), + int32(100), + client) + + esController.nodesSynced = alwaysReady + esController.podsSynced = alwaysReady + esController.servicesSynced = alwaysReady + esController.endpointSlicesSynced = alwaysReady + + return client, &endpointSliceController{ + esController, + informerFactory.Discovery().V1alpha1().EndpointSlices().Informer().GetStore(), + informerFactory.Core().V1().Nodes().Informer().GetStore(), + informerFactory.Core().V1().Pods().Informer().GetStore(), + informerFactory.Core().V1().Services().Informer().GetStore(), + } +} + +// Ensure SyncService for service with no selector results in no action +func TestSyncServiceNoSelector(t *testing.T) { + ns := metav1.NamespaceDefault + serviceName := "testing-1" + client, esController := newController([]string{"node-1"}) + esController.serviceStore.Add(&v1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: serviceName, Namespace: ns}, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{{TargetPort: intstr.FromInt(80)}}, + }, + }) + + err := esController.syncService(fmt.Sprintf("%s/%s", ns, serviceName)) + assert.Nil(t, err) + assert.Len(t, client.Actions(), 0) +} + +// Ensure SyncService for service with selector but no pods results in placeholder EndpointSlice +func TestSyncServiceWithSelector(t *testing.T) { + ns := metav1.NamespaceDefault + serviceName := "testing-1" + client, esController := newController([]string{"node-1"}) + standardSyncService(t, esController, ns, serviceName) + expectActions(t, client.Actions(), 1, "create", "endpointslices") + + sliceList, err := client.DiscoveryV1alpha1().EndpointSlices(ns).List(metav1.ListOptions{}) + assert.Nil(t, err, "Expected no error fetching endpoint slices") + assert.Len(t, sliceList.Items, 1, "Expected 1 endpoint slices") + slice := sliceList.Items[0] + assert.Regexp(t, "^"+serviceName, slice.Name) + assert.Equal(t, serviceName, slice.Labels[serviceNameLabel]) + assert.EqualValues(t, []discovery.EndpointPort{}, slice.Ports) + assert.EqualValues(t, []discovery.Endpoint{}, slice.Endpoints) + assert.NotEmpty(t, slice.Annotations["endpoints.kubernetes.io/last-change-trigger-time"]) +} + +// Ensure SyncService gracefully handles a missing service. This test also +// populates another existing service to ensure a clean up process doesn't +// remove too much. +func TestSyncServiceMissing(t *testing.T) { + namespace := metav1.NamespaceDefault + client, esController := newController([]string{"node-1"}) + + // Build up existing service + existingServiceName := "stillthere" + existingServiceKey := endpointutil.ServiceKey{Name: existingServiceName, Namespace: namespace} + esController.triggerTimeTracker.ServiceStates[existingServiceKey] = endpointutil.ServiceState{} + esController.serviceStore.Add(&v1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: existingServiceName, Namespace: namespace}, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{{TargetPort: intstr.FromInt(80)}}, + Selector: map[string]string{"foo": "bar"}, + }, + }) + + // Add missing service to triggerTimeTracker to ensure the reference is cleaned up + missingServiceName := "notthere" + missingServiceKey := endpointutil.ServiceKey{Name: missingServiceName, Namespace: namespace} + esController.triggerTimeTracker.ServiceStates[missingServiceKey] = endpointutil.ServiceState{} + + err := esController.syncService(fmt.Sprintf("%s/%s", namespace, missingServiceName)) + + // Since the service doesn't exist, we should get a not found error + assert.NotNil(t, err, "Expected no error syncing service") + assert.Equal(t, err.Error(), "service \"notthere\" not found") + + // That should mean no client actions were performed + assert.Len(t, client.Actions(), 0) + + // TriggerTimeTracker should have removed the reference to the missing service + assert.NotContains(t, esController.triggerTimeTracker.ServiceStates, missingServiceKey) + + // TriggerTimeTracker should have left the reference to the missing service + assert.Contains(t, esController.triggerTimeTracker.ServiceStates, existingServiceKey) +} + +// Ensure SyncService correctly selects Pods. +func TestSyncServicePodSelection(t *testing.T) { + client, esController := newController([]string{"node-1"}) + ns := metav1.NamespaceDefault + + pod1 := newPod(1, ns, true, 0) + esController.podStore.Add(pod1) + + // ensure this pod will not match the selector + pod2 := newPod(2, ns, true, 0) + pod2.Labels["foo"] = "boo" + esController.podStore.Add(pod2) + + standardSyncService(t, esController, ns, "testing-1") + expectActions(t, client.Actions(), 1, "create", "endpointslices") + + // an endpoint slice should be created, it should only reference pod1 (not pod2) + slices, err := client.DiscoveryV1alpha1().EndpointSlices(ns).List(metav1.ListOptions{}) + assert.Nil(t, err, "Expected no error fetching endpoint slices") + assert.Len(t, slices.Items, 1, "Expected 1 endpoint slices") + slice := slices.Items[0] + assert.Len(t, slice.Endpoints, 1, "Expected 1 endpoint in first slice") + assert.NotEmpty(t, slice.Annotations["endpoints.kubernetes.io/last-change-trigger-time"]) + endpoint := slice.Endpoints[0] + assert.EqualValues(t, endpoint.TargetRef, &v1.ObjectReference{Kind: "Pod", Namespace: ns, Name: pod1.Name}) +} + +// Ensure SyncService correctly selects EndpointSlices. +func TestSyncServiceEndpointSliceSelection(t *testing.T) { + client, esController := newController([]string{"node-1"}) + ns := metav1.NamespaceDefault + serviceName := "testing-1" + + // 3 slices, 2 with matching labels for our service + endpointSlices := []*discovery.EndpointSlice{{ + ObjectMeta: metav1.ObjectMeta{Name: "matching-1", Namespace: ns, Labels: map[string]string{serviceNameLabel: serviceName}}, + }, { + ObjectMeta: metav1.ObjectMeta{Name: "matching-2", Namespace: ns, Labels: map[string]string{serviceNameLabel: serviceName}}, + }, { + ObjectMeta: metav1.ObjectMeta{Name: "not-matching-1", Namespace: ns, Labels: map[string]string{serviceNameLabel: "something-else"}}, + }} + + // need to add them to both store and fake clientset + for _, endpointSlice := range endpointSlices { + addErr := esController.endpointSliceStore.Add(endpointSlice) + assert.Nil(t, addErr, "Expected no error adding EndpointSlice") + _, err := client.DiscoveryV1alpha1().EndpointSlices(ns).Create(endpointSlice) + assert.Nil(t, err, "Expected no error creating EndpointSlice") + } + + numActionsBefore := len(client.Actions()) + standardSyncService(t, esController, ns, serviceName) + + // should only have 2 additional actions + assert.Len(t, client.Actions(), numActionsBefore+2) + + // only 2 slices should match, 1 of those should be deleted, 1 should be updated as a placeholder + assert.Equal(t, "update", client.Actions()[numActionsBefore].GetVerb()) + assert.Equal(t, client.Actions()[numActionsBefore].GetResource().Resource, "endpointslices") + assert.Equal(t, "delete", client.Actions()[numActionsBefore+1].GetVerb()) + assert.Equal(t, client.Actions()[numActionsBefore+1].GetResource().Resource, "endpointslices") +} + +// Ensure SyncService handles a variety of protocols and IPs appropriately. +func TestSyncServiceFull(t *testing.T) { + client, esController := newController([]string{"node-1"}) + namespace := metav1.NamespaceDefault + serviceName := "all-the-protocols" + + // pod 1 only uses PodIP status attr + pod1 := newPod(1, namespace, true, 0) + pod1.Status.PodIP = "1.2.3.4" + pod1.Status.PodIPs = []v1.PodIP{} + esController.podStore.Add(pod1) + + // pod 2 only uses PodIPs status attr + pod2 := newPod(2, namespace, true, 0) + pod2.Status.PodIP = "" + pod2.Status.PodIPs = []v1.PodIP{{IP: "1.2.3.5"}, {IP: "1234::5678:0000:0000:9abc:def0"}} + esController.podStore.Add(pod2) + + // create service with all protocols and multiple ports + serviceCreateTime := time.Now() + esController.serviceStore.Add(&v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + CreationTimestamp: metav1.NewTime(serviceCreateTime), + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + {Name: "tcp-example", TargetPort: intstr.FromInt(80), Protocol: v1.ProtocolTCP}, + {Name: "udp-example", TargetPort: intstr.FromInt(161), Protocol: v1.ProtocolUDP}, + {Name: "sctp-example", TargetPort: intstr.FromInt(3456), Protocol: v1.ProtocolSCTP}, + }, + Selector: map[string]string{"foo": "bar"}, + }, + }) + + // run through full sync service loop + err := esController.syncService(fmt.Sprintf("%s/%s", namespace, serviceName)) + assert.Nil(t, err) + + // should only have 1 action - to create endpoint slice + assert.Len(t, client.Actions(), 1) + expectActions(t, client.Actions(), 1, "create", "endpointslices") + sliceList, err := client.DiscoveryV1alpha1().EndpointSlices(namespace).List(metav1.ListOptions{}) + assert.Nil(t, err, "Expected no error fetching endpoint slices") + assert.Len(t, sliceList.Items, 1, "Expected 1 endpoint slices") + + // ensure all attributes of endpoint slice match expected state + slice := sliceList.Items[0] + assert.Len(t, slice.Endpoints, 2, "Expected 2 endpoints in first slice") + assert.Equal(t, slice.Annotations["endpoints.kubernetes.io/last-change-trigger-time"], serviceCreateTime.Format(time.RFC3339Nano)) + assert.EqualValues(t, []discovery.EndpointPort{{ + Name: strPtr("tcp-example"), + Protocol: protoPtr(v1.ProtocolTCP), + Port: int32Ptr(int32(80)), + }, { + Name: strPtr("udp-example"), + Protocol: protoPtr(v1.ProtocolUDP), + Port: int32Ptr(int32(161)), + }, { + Name: strPtr("sctp-example"), + Protocol: protoPtr(v1.ProtocolSCTP), + Port: int32Ptr(int32(3456)), + }}, slice.Ports) + assert.ElementsMatch(t, []discovery.Endpoint{{ + Conditions: discovery.EndpointConditions{Ready: utilpointer.BoolPtr(true)}, + Addresses: []string{"1.2.3.4"}, + TargetRef: &v1.ObjectReference{Kind: "Pod", Namespace: namespace, Name: pod1.Name}, + Topology: map[string]string{"kubernetes.io/hostname": "node-1"}, + }, { + Conditions: discovery.EndpointConditions{Ready: utilpointer.BoolPtr(true)}, + Addresses: []string{"1.2.3.5", "1234::5678:0000:0000:9abc:def0"}, + TargetRef: &v1.ObjectReference{Kind: "Pod", Namespace: namespace, Name: pod2.Name}, + Topology: map[string]string{"kubernetes.io/hostname": "node-1"}, + }}, slice.Endpoints) +} + +// Test helpers + +func standardSyncService(t *testing.T, esController *endpointSliceController, namespace, serviceName string) { + esController.serviceStore.Add(&v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + CreationTimestamp: metav1.NewTime(time.Now()), + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{{TargetPort: intstr.FromInt(80)}}, + Selector: map[string]string{"foo": "bar"}, + }, + }) + + err := esController.syncService(fmt.Sprintf("%s/%s", namespace, serviceName)) + assert.Nil(t, err, "Expected no error syncing service") +} + +func strPtr(str string) *string { + return &str +} + +func protoPtr(proto v1.Protocol) *v1.Protocol { + return &proto +} + +func int32Ptr(num int32) *int32 { + return &num +} diff --git a/pkg/controller/endpointslice/reconciler.go b/pkg/controller/endpointslice/reconciler.go new file mode 100644 index 00000000000..9abb1c9e7eb --- /dev/null +++ b/pkg/controller/endpointslice/reconciler.go @@ -0,0 +1,301 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpointslice + +import ( + "fmt" + "sort" + "time" + + corev1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + clientset "k8s.io/client-go/kubernetes" + corelisters "k8s.io/client-go/listers/core/v1" + endpointutil "k8s.io/kubernetes/pkg/controller/util/endpoint" +) + +// reconciler is responsible for transforming current EndpointSlice state into +// desired state +type reconciler struct { + client clientset.Interface + nodeLister corelisters.NodeLister + maxEndpointsPerSlice int32 +} + +// endpointMeta includes the attributes we group slices on, this type helps with +// that logic in reconciler +type endpointMeta struct { + Ports []discovery.EndpointPort `json:"ports" protobuf:"bytes,2,rep,name=ports"` + AddressType *discovery.AddressType `json:"addressType" protobuf:"bytes,3,rep,name=addressType"` +} + +// reconcile takes a set of pods currently matching a service selector and +// compares them with the endpoints already present in any existing endpoint +// slices for the given service. It creates, updates, or deletes endpoint slices +// to ensure the desired set of pods are represented by endpoint slices. +func (r *reconciler) reconcile(service *corev1.Service, pods []*corev1.Pod, existingSlices []*discovery.EndpointSlice, triggerTime time.Time) error { + // Build data structures for existing state. + existingSlicesByPortMap := map[portMapKey][]*discovery.EndpointSlice{} + for _, existingSlice := range existingSlices { + epHash := newPortMapKey(existingSlice.Ports) + existingSlicesByPortMap[epHash] = append(existingSlicesByPortMap[epHash], existingSlice) + } + + // Build data structures for desired state. + desiredMetaByPortMap := map[portMapKey]*endpointMeta{} + desiredEndpointsByPortMap := map[portMapKey]endpointSet{} + + for _, pod := range pods { + if endpointutil.ShouldPodBeInEndpoints(pod) { + endpointPorts := getEndpointPorts(service, pod) + epHash := newPortMapKey(endpointPorts) + if _, ok := desiredEndpointsByPortMap[epHash]; !ok { + desiredEndpointsByPortMap[epHash] = endpointSet{} + } + + if _, ok := desiredMetaByPortMap[epHash]; !ok { + // TODO: Support multiple backend types + ipAddressType := discovery.AddressTypeIP + desiredMetaByPortMap[epHash] = &endpointMeta{ + AddressType: &ipAddressType, + Ports: endpointPorts, + } + } + + node, err := r.nodeLister.Get(pod.Spec.NodeName) + if err != nil { + return err + } + endpoint := podToEndpoint(pod, node) + desiredEndpointsByPortMap[epHash].Insert(&endpoint) + } + } + + slicesToCreate := []*discovery.EndpointSlice{} + slicesToUpdate := []*discovery.EndpointSlice{} + sliceNamesToDelete := sets.String{} + + // Determine changes necessary for each group of slices by port map. + for portMap, desiredEndpoints := range desiredEndpointsByPortMap { + pmSlicesToCreate, pmSlicesToUpdate, pmSliceNamesToDelete := r.reconcileByPortMapping( + service, existingSlicesByPortMap[portMap], desiredEndpoints, desiredMetaByPortMap[portMap]) + if len(pmSlicesToCreate) > 0 { + slicesToCreate = append(slicesToCreate, pmSlicesToCreate...) + } + if len(pmSlicesToUpdate) > 0 { + slicesToUpdate = append(slicesToUpdate, pmSlicesToUpdate...) + } + if pmSliceNamesToDelete.Len() > 0 { + sliceNamesToDelete = sliceNamesToDelete.Union(pmSliceNamesToDelete) + } + } + + // If there are unique sets of ports that are no longer desired, mark + // the corresponding endpoint slices for deletion. + for portMap, existingSlices := range existingSlicesByPortMap { + if _, ok := desiredEndpointsByPortMap[portMap]; !ok { + for _, existingSlice := range existingSlices { + sliceNamesToDelete.Insert(existingSlice.Name) + } + } + } + + // When no endpoint slices would usually exist, we need to add a placeholder. + if len(existingSlices) == sliceNamesToDelete.Len() && len(slicesToCreate) < 1 { + placeholderSlice := newEndpointSlice(service, &endpointMeta{Ports: []discovery.EndpointPort{}}) + slicesToCreate = append(slicesToCreate, placeholderSlice) + } + + return r.finalize(service, slicesToCreate, slicesToUpdate, sliceNamesToDelete, triggerTime) +} + +// finalize creates, updates, and deletes slices as specified +func (r *reconciler) finalize( + service *corev1.Service, + slicesToCreate, + slicesToUpdate []*discovery.EndpointSlice, + sliceNamesToDelete sets.String, + triggerTime time.Time, +) error { + errs := []error{} + + // If there are slices to create and delete, change the creates to updates + // of the slices that would otherwise be deleted. + for len(slicesToCreate) > 0 && sliceNamesToDelete.Len() > 0 { + sliceName, _ := sliceNamesToDelete.PopAny() + slice := slicesToCreate[len(slicesToCreate)-1] + slicesToCreate = slicesToCreate[:len(slicesToCreate)-1] + slice.Name = sliceName + slicesToUpdate = append(slicesToUpdate, slice) + } + + for _, endpointSlice := range slicesToCreate { + addTriggerTimeAnnotation(endpointSlice, triggerTime) + _, err := r.client.DiscoveryV1alpha1().EndpointSlices(service.Namespace).Create(endpointSlice) + if err != nil { + errs = append(errs, fmt.Errorf("Error creating EndpointSlice for Service %s/%s: %v", service.Namespace, service.Name, err)) + } + } + + for _, endpointSlice := range slicesToUpdate { + addTriggerTimeAnnotation(endpointSlice, triggerTime) + _, err := r.client.DiscoveryV1alpha1().EndpointSlices(service.Namespace).Update(endpointSlice) + if err != nil { + errs = append(errs, fmt.Errorf("Error updating %s EndpointSlice for Service %s/%s: %v", endpointSlice.Name, service.Namespace, service.Name, err)) + } + } + + for sliceNamesToDelete.Len() > 0 { + sliceName, _ := sliceNamesToDelete.PopAny() + err := r.client.DiscoveryV1alpha1().EndpointSlices(service.Namespace).Delete(sliceName, &metav1.DeleteOptions{}) + if err != nil { + errs = append(errs, fmt.Errorf("Error deleting %s EndpointSlice for Service %s/%s: %v", sliceName, service.Namespace, service.Name, err)) + } + } + + return utilerrors.NewAggregate(errs) +} + +// reconcileByPortMapping compares the endpoints found in existing slices with +// the list of desired endpoints and returns lists of slices to create, update, +// and delete. The logic is split up into several main steps: +// 1. Iterate through existing slices, delete endpoints that are no longer +// desired and update matching endpoints that have changed. +// 2. Iterate through slices that have been modified in 1 and fill them up with +// any remaining desired endpoints. +// 3. If there still desired endpoints left, try to fit them into a previously +// unchanged slice and/or create new ones. +func (r *reconciler) reconcileByPortMapping( + service *corev1.Service, + existingSlices []*discovery.EndpointSlice, + desiredSet endpointSet, + endpointMeta *endpointMeta, +) ([]*discovery.EndpointSlice, []*discovery.EndpointSlice, sets.String) { + slicesByName := map[string]*discovery.EndpointSlice{} + sliceNamesUnchanged := sets.String{} + sliceNamesToUpdate := sets.String{} + sliceNamesToDelete := sets.String{} + + // 1. Iterate through existing slices to delete endpoints no longer desired + // and update endpoints that have changed + for _, existingSlice := range existingSlices { + slicesByName[existingSlice.Name] = existingSlice + newEndpoints := []discovery.Endpoint{} + endpointUpdated := false + for _, endpoint := range existingSlice.Endpoints { + got := desiredSet.Get(&endpoint) + // If endpoint is desired add it to list of endpoints to keep. + if got != nil { + newEndpoints = append(newEndpoints, *got) + // If existing version of endpoint doesn't match desired version + // set endpointUpdated to ensure endpoint changes are persisted. + if !endpointsEqualBeyondHash(got, &endpoint) { + endpointUpdated = true + } + // once an endpoint has been placed/found in a slice, it no + // longer needs to be handled + desiredSet.Delete(&endpoint) + } + } + + // If an endpoint was updated or removed, mark for update or delete + if endpointUpdated || len(existingSlice.Endpoints) != len(newEndpoints) { + if len(newEndpoints) == 0 { + // if no endpoints desired in this slice, mark for deletion + sliceNamesToDelete.Insert(existingSlice.Name) + } else { + // otherwise, mark for update + existingSlice.Endpoints = newEndpoints + sliceNamesToUpdate.Insert(existingSlice.Name) + } + } else { + // slices with no changes will be useful if there are leftover endpoints + sliceNamesUnchanged.Insert(existingSlice.Name) + } + } + + // 2. If we still have desired endpoints to add and slices marked for update, + // iterate through the slices and fill them up with the desired endpoints. + if desiredSet.Len() > 0 && sliceNamesToUpdate.Len() > 0 { + slices := []*discovery.EndpointSlice{} + for _, sliceName := range sliceNamesToUpdate.UnsortedList() { + slices = append(slices, slicesByName[sliceName]) + } + // Sort endpoint slices by length so we're filling up the fullest ones + // first. + sort.Sort(endpointSliceEndpointLen(slices)) + + // Iterate through slices and fill them up with desired endpoints. + for _, slice := range slices { + for desiredSet.Len() > 0 && len(slice.Endpoints) < int(r.maxEndpointsPerSlice) { + endpoint, _ := desiredSet.PopAny() + slice.Endpoints = append(slice.Endpoints, *endpoint) + } + } + } + + // 3. If there are still desired endpoints left at this point, we try to fit + // the endpoints in a single existing slice. If there are no slices with + // that capacity, we create new slices for the the endpoints. + slicesToCreate := []*discovery.EndpointSlice{} + + for desiredSet.Len() > 0 { + var sliceToFill *discovery.EndpointSlice + + // If the remaining amounts of endpoints is smaller than the max + // endpoints per slice and we have slices that haven't already been + // filled, try to fit them in one. + if desiredSet.Len() < int(r.maxEndpointsPerSlice) && sliceNamesUnchanged.Len() > 0 { + unchangedSlices := []*discovery.EndpointSlice{} + for _, sliceName := range sliceNamesUnchanged.UnsortedList() { + unchangedSlices = append(unchangedSlices, slicesByName[sliceName]) + } + sliceToFill = getSliceToFill(unchangedSlices, desiredSet.Len(), int(r.maxEndpointsPerSlice)) + } + + // If we didn't find a sliceToFill, generate a new empty one. + if sliceToFill == nil { + sliceToFill = newEndpointSlice(service, endpointMeta) + } + + // Fill the slice up with remaining endpoints. + for desiredSet.Len() > 0 && len(sliceToFill.Endpoints) < int(r.maxEndpointsPerSlice) { + endpoint, _ := desiredSet.PopAny() + sliceToFill.Endpoints = append(sliceToFill.Endpoints, *endpoint) + } + + // New slices will not have a Name set, use this to determine whether + // this should be an update or create. + if sliceToFill.Name != "" { + sliceNamesToUpdate.Insert(sliceToFill.Name) + sliceNamesUnchanged.Delete(sliceToFill.Name) + } else { + slicesToCreate = append(slicesToCreate, sliceToFill) + } + } + + // Build slicesToUpdate from slice names. + slicesToUpdate := []*discovery.EndpointSlice{} + for _, sliceName := range sliceNamesToUpdate.UnsortedList() { + slicesToUpdate = append(slicesToUpdate, slicesByName[sliceName]) + } + + return slicesToCreate, slicesToUpdate, sliceNamesToDelete +} diff --git a/pkg/controller/endpointslice/reconciler_test.go b/pkg/controller/endpointslice/reconciler_test.go new file mode 100644 index 00000000000..dea28337a3b --- /dev/null +++ b/pkg/controller/endpointslice/reconciler_test.go @@ -0,0 +1,607 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpointslice + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1alpha1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes/fake" + corelisters "k8s.io/client-go/listers/core/v1" + k8stesting "k8s.io/client-go/testing" + "k8s.io/kubernetes/pkg/controller" + utilpointer "k8s.io/utils/pointer" +) + +var defaultMaxEndpointsPerSlice = int32(100) + +// Even when there are no pods, we want to have a placeholder slice for each service +func TestReconcileEmpty(t *testing.T) { + client := newClientset() + namespace := "test" + svc, _ := newServiceAndendpointMeta("foo", namespace) + + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, []*corev1.Pod{}, []*discovery.EndpointSlice{}, time.Now()) + expectActions(t, client.Actions(), 1, "create", "endpointslices") + + slices := fetchEndpointSlices(t, client, namespace) + assert.Len(t, slices, 1, "Expected 1 endpoint slices") + + assert.Regexp(t, "^"+svc.Name, slices[0].Name) + assert.Equal(t, svc.Name, slices[0].Labels[serviceNameLabel]) + assert.EqualValues(t, []discovery.EndpointPort{}, slices[0].Ports) + assert.EqualValues(t, []discovery.Endpoint{}, slices[0].Endpoints) +} + +// Given a single pod matching a service selector and no existing endpoint slices, +// a slice should be created +func TestReconcile1Pod(t *testing.T) { + client := newClientset() + namespace := "test" + svc, _ := newServiceAndendpointMeta("foo", namespace) + pod1 := newPod(1, namespace, true, 1) + pod1.Spec.Hostname = "example-hostname" + node1 := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: pod1.Spec.NodeName, + Labels: map[string]string{ + "topology.kubernetes.io/zone": "us-central1-a", + "topology.kubernetes.io/region": "us-central1", + }, + }, + } + + triggerTime := time.Now() + r := newReconciler(client, []*corev1.Node{node1}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, []*corev1.Pod{pod1}, []*discovery.EndpointSlice{}, triggerTime) + assert.Len(t, client.Actions(), 1, "Expected 1 additional clientset action") + + slices := fetchEndpointSlices(t, client, namespace) + assert.Len(t, slices, 1, "Expected 1 endpoint slices") + assert.Regexp(t, "^"+svc.Name, slices[0].Name) + assert.Equal(t, svc.Name, slices[0].Labels[serviceNameLabel]) + assert.Equal(t, slices[0].Annotations, map[string]string{ + "endpoints.kubernetes.io/last-change-trigger-time": triggerTime.Format(time.RFC3339Nano), + }) + assert.EqualValues(t, []discovery.Endpoint{{ + Addresses: []string{"1.2.3.5"}, + Conditions: discovery.EndpointConditions{Ready: utilpointer.BoolPtr(true)}, + Topology: map[string]string{ + "kubernetes.io/hostname": "node-1", + "topology.kubernetes.io/zone": "us-central1-a", + "topology.kubernetes.io/region": "us-central1", + }, + TargetRef: &corev1.ObjectReference{ + Kind: "Pod", + Namespace: namespace, + Name: "pod1", + }, + }}, slices[0].Endpoints) +} + +// given an existing endpoint slice and no pods matching the service, the existing +// slice should be updated to a placeholder (not deleted) +func TestReconcile1EndpointSlice(t *testing.T) { + client := newClientset() + namespace := "test" + svc, endpointMeta := newServiceAndendpointMeta("foo", namespace) + endpointSlice1 := newEmptyEndpointSlice(1, namespace, endpointMeta, svc) + + _, createErr := client.DiscoveryV1alpha1().EndpointSlices(namespace).Create(endpointSlice1) + assert.Nil(t, createErr, "Expected no error creating endpoint slice") + + numActionsBefore := len(client.Actions()) + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, []*corev1.Pod{}, []*discovery.EndpointSlice{endpointSlice1}, time.Now()) + assert.Len(t, client.Actions(), numActionsBefore+1, "Expected 1 additional clientset action") + actions := client.Actions() + assert.True(t, actions[numActionsBefore].Matches("update", "endpointslices"), "Action should be update endpoint slice") + + slices := fetchEndpointSlices(t, client, namespace) + assert.Len(t, slices, 1, "Expected 1 endpoint slices") + + assert.Regexp(t, "^"+svc.Name, slices[0].Name) + assert.Equal(t, svc.Name, slices[0].Labels[serviceNameLabel]) + assert.EqualValues(t, []discovery.EndpointPort{}, slices[0].Ports) + assert.EqualValues(t, []discovery.Endpoint{}, slices[0].Endpoints) +} + +// a simple use case with 250 pods matching a service and no existing slices +// reconcile should create 3 slices, completely filling 2 of them +func TestReconcileManyPods(t *testing.T) { + client := newClientset() + namespace := "test" + svc, _ := newServiceAndendpointMeta("foo", namespace) + + // start with 250 pods + pods := []*corev1.Pod{} + for i := 0; i < 250; i++ { + ready := !(i%3 == 0) + pods = append(pods, newPod(i, namespace, ready, 1)) + } + + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, []*discovery.EndpointSlice{}, time.Now()) + + // This is an ideal scenario where only 3 actions are required, and they're all creates + assert.Len(t, client.Actions(), 3, "Expected 3 additional clientset actions") + expectActions(t, client.Actions(), 3, "create", "endpointslices") + + // Two endpoint slices should be completely full, the remainder should be in another one + expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), []int{100, 100, 50}) +} + +// now with preexisting slices, we have 250 pods matching a service +// the first endpoint slice contains 62 endpoints, all desired +// the second endpoint slice contains 61 endpoints, all desired +// that leaves 127 to add +// to minimize writes, our strategy is to create new slices for multiples of 100 +// that leaves 27 to drop in an existing slice +// dropping them in the first slice will result in the slice being closest to full +// this approach requires 1 update + 1 create instead of 2 updates + 1 create +func TestReconcileEndpointSlicesSomePreexisting(t *testing.T) { + client := newClientset() + namespace := "test" + svc, endpointMeta := newServiceAndendpointMeta("foo", namespace) + + // start with 250 pods + pods := []*corev1.Pod{} + for i := 0; i < 250; i++ { + ready := !(i%3 == 0) + pods = append(pods, newPod(i, namespace, ready, 1)) + } + + // have approximately 1/4 in first slice + endpointSlice1 := newEmptyEndpointSlice(1, namespace, endpointMeta, svc) + for i := 1; i < len(pods)-4; i += 4 { + endpointSlice1.Endpoints = append(endpointSlice1.Endpoints, podToEndpoint(pods[i], &corev1.Node{})) + } + + // have approximately 1/4 in second slice + endpointSlice2 := newEmptyEndpointSlice(2, namespace, endpointMeta, svc) + for i := 3; i < len(pods)-4; i += 4 { + endpointSlice2.Endpoints = append(endpointSlice2.Endpoints, podToEndpoint(pods[i], &corev1.Node{})) + } + + existingSlices := []*discovery.EndpointSlice{endpointSlice1, endpointSlice2} + createEndpointSlices(t, client, namespace, existingSlices) + + numActionsBefore := len(client.Actions()) + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, existingSlices, time.Now()) + + actions := client.Actions() + assert.Equal(t, numActionsBefore+2, len(actions), "Expected 2 additional client actions as part of reconcile") + assert.True(t, actions[numActionsBefore].Matches("create", "endpointslices"), "First action should be create endpoint slice") + assert.True(t, actions[numActionsBefore+1].Matches("update", "endpointslices"), "Second action should be update endpoint slice") + + // 1 new slice (0->100) + 1 updated slice (62->89) + expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), []int{89, 61, 100}) +} + +// now with preexisting slices, we have 300 pods matching a service +// this scenario will show some less ideal allocation +// the first endpoint slice contains 74 endpoints, all desired +// the second endpoint slice contains 74 endpoints, all desired +// that leaves 152 to add +// to minimize writes, our strategy is to create new slices for multiples of 100 +// that leaves 52 to drop in an existing slice +// that capacity could fit if split in the 2 existing slices +// to minimize writes though, reconcile create a new slice with those 52 endpoints +// this approach requires 2 creates instead of 2 updates + 1 create +func TestReconcileEndpointSlicesSomePreexistingWorseAllocation(t *testing.T) { + client := newClientset() + namespace := "test" + svc, endpointMeta := newServiceAndendpointMeta("foo", namespace) + + // start with 300 pods + pods := []*corev1.Pod{} + for i := 0; i < 300; i++ { + ready := !(i%3 == 0) + pods = append(pods, newPod(i, namespace, ready, 1)) + } + + // have approximately 1/4 in first slice + endpointSlice1 := newEmptyEndpointSlice(1, namespace, endpointMeta, svc) + for i := 1; i < len(pods)-4; i += 4 { + endpointSlice1.Endpoints = append(endpointSlice1.Endpoints, podToEndpoint(pods[i], &corev1.Node{})) + } + + // have approximately 1/4 in second slice + endpointSlice2 := newEmptyEndpointSlice(2, namespace, endpointMeta, svc) + for i := 3; i < len(pods)-4; i += 4 { + endpointSlice2.Endpoints = append(endpointSlice2.Endpoints, podToEndpoint(pods[i], &corev1.Node{})) + } + + existingSlices := []*discovery.EndpointSlice{endpointSlice1, endpointSlice2} + createEndpointSlices(t, client, namespace, existingSlices) + + numActionsBefore := len(client.Actions()) + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, existingSlices, time.Now()) + + actions := client.Actions() + assert.Equal(t, numActionsBefore+2, len(actions), "Expected 2 additional client actions as part of reconcile") + expectActions(t, client.Actions(), 2, "create", "endpointslices") + + // 2 new slices (100, 52) in addition to existing slices (74, 74) + expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), []int{74, 74, 100, 52}) +} + +// In some cases, such as a service port change, all slices for that service will require a change +// This test ensures that we are updating those slices and not calling create + delete for each +func TestReconcileEndpointSlicesUpdating(t *testing.T) { + client := newClientset() + namespace := "test" + svc, _ := newServiceAndendpointMeta("foo", namespace) + + // start with 250 pods + pods := []*corev1.Pod{} + for i := 0; i < 250; i++ { + ready := !(i%3 == 0) + pods = append(pods, newPod(i, namespace, ready, 1)) + } + + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, []*discovery.EndpointSlice{}, time.Now()) + numActionsExpected := 3 + assert.Len(t, client.Actions(), numActionsExpected, "Expected 3 additional clientset actions") + + slices := fetchEndpointSlices(t, client, namespace) + numActionsExpected++ + expectUnorderedSlicesWithLengths(t, slices, []int{100, 100, 50}) + + svc.Spec.Ports[0].TargetPort.IntVal = 81 + reconcileHelper(t, r, &svc, pods, []*discovery.EndpointSlice{&slices[0], &slices[1], &slices[2]}, time.Now()) + + numActionsExpected += 3 + assert.Len(t, client.Actions(), numActionsExpected, "Expected 3 additional clientset actions") + expectActions(t, client.Actions(), 3, "update", "endpointslices") + + expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), []int{100, 100, 50}) +} + +// In this test, we start with 10 slices that only have 30 endpoints each +// An initial reconcile makes no changes (as desired to limit writes) +// When we change a service port, all slices will need to be updated in some way +// reconcile repacks the endpoints into 3 slices, and deletes the extras +func TestReconcileEndpointSlicesRecycling(t *testing.T) { + client := newClientset() + namespace := "test" + svc, endpointMeta := newServiceAndendpointMeta("foo", namespace) + + // start with 300 pods + pods := []*corev1.Pod{} + for i := 0; i < 300; i++ { + ready := !(i%3 == 0) + pods = append(pods, newPod(i, namespace, ready, 1)) + } + + // generate 10 existing slices with 30 pods/endpoints each + existingSlices := []*discovery.EndpointSlice{} + for i, pod := range pods { + sliceNum := i / 30 + if i%30 == 0 { + existingSlices = append(existingSlices, newEmptyEndpointSlice(sliceNum, namespace, endpointMeta, svc)) + } + existingSlices[sliceNum].Endpoints = append(existingSlices[sliceNum].Endpoints, podToEndpoint(pod, &corev1.Node{})) + } + + createEndpointSlices(t, client, namespace, existingSlices) + + numActionsBefore := len(client.Actions()) + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, existingSlices, time.Now()) + // initial reconcile should be a no op, all pods are accounted for in slices, no repacking should be done + assert.Equal(t, numActionsBefore+0, len(client.Actions()), "Expected 0 additional client actions as part of reconcile") + + // changing a service port should require all slices to be updated, time for a repack + svc.Spec.Ports[0].TargetPort.IntVal = 81 + reconcileHelper(t, r, &svc, pods, existingSlices, time.Now()) + + // this should reflect 3 updates + 7 deletes + assert.Equal(t, numActionsBefore+10, len(client.Actions()), "Expected 10 additional client actions as part of reconcile") + + // thanks to recycling, we get a free repack of endpoints, resulting in 3 full slices instead of 10 mostly empty slices + expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), []int{100, 100, 100}) +} + +// In this test, we want to verify that endpoints are added to a slice that will +// be closest to full after the operation, even when slices are already marked +// for update. +func TestReconcileEndpointSlicesUpdatePacking(t *testing.T) { + client := newClientset() + namespace := "test" + svc, endpointMeta := newServiceAndendpointMeta("foo", namespace) + + existingSlices := []*discovery.EndpointSlice{} + pods := []*corev1.Pod{} + + slice1 := newEmptyEndpointSlice(1, namespace, endpointMeta, svc) + for i := 0; i < 80; i++ { + pod := newPod(i, namespace, true, 1) + slice1.Endpoints = append(slice1.Endpoints, podToEndpoint(pod, &corev1.Node{})) + pods = append(pods, pod) + } + existingSlices = append(existingSlices, slice1) + + slice2 := newEmptyEndpointSlice(2, namespace, endpointMeta, svc) + for i := 100; i < 120; i++ { + pod := newPod(i, namespace, true, 1) + slice2.Endpoints = append(slice2.Endpoints, podToEndpoint(pod, &corev1.Node{})) + pods = append(pods, pod) + } + existingSlices = append(existingSlices, slice2) + + createEndpointSlices(t, client, namespace, existingSlices) + + // ensure that endpoints in each slice will be marked for update. + for i, pod := range pods { + if i%10 == 0 { + pod.Status.Conditions = []v1.PodCondition{{ + Type: v1.PodReady, + Status: v1.ConditionFalse, + }} + } + } + + // add a few additional endpoints - no more than could fit in either slice. + for i := 200; i < 215; i++ { + pods = append(pods, newPod(i, namespace, true, 1)) + } + + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, existingSlices, time.Now()) + + // ensure that both endpoint slices have been updated + expectActions(t, client.Actions(), 2, "update", "endpointslices") + + // additional pods should get added to fuller slice + expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), []int{95, 20}) +} + +// Named ports can map to different port numbers on different pods. +// This test ensures that EndpointSlices are grouped correctly in that case. +func TestReconcileEndpointSlicesNamedPorts(t *testing.T) { + client := newClientset() + namespace := "test" + + portNameIntStr := intstr.IntOrString{ + Type: intstr.String, + StrVal: "http", + } + + svc := corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: "named-port-example", Namespace: namespace}, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{{ + TargetPort: portNameIntStr, + Protocol: v1.ProtocolTCP, + }}, + Selector: map[string]string{"foo": "bar"}, + }, + } + + // start with 300 pods + pods := []*corev1.Pod{} + for i := 0; i < 300; i++ { + ready := !(i%3 == 0) + portOffset := i % 5 + pod := newPod(i, namespace, ready, 1) + pod.Spec.Containers[0].Ports = []v1.ContainerPort{{ + Name: portNameIntStr.StrVal, + ContainerPort: int32(8080 + portOffset), + Protocol: v1.ProtocolTCP, + }} + pods = append(pods, pod) + } + + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, defaultMaxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, []*discovery.EndpointSlice{}, time.Now()) + + // reconcile should create 5 endpoint slices + assert.Equal(t, 5, len(client.Actions()), "Expected 5 client actions as part of reconcile") + expectActions(t, client.Actions(), 5, "create", "endpointslices") + + fetchedSlices := fetchEndpointSlices(t, client, namespace) + + // each slice should have 60 endpoints to match 5 unique variations of named port mapping + expectUnorderedSlicesWithLengths(t, fetchedSlices, []int{60, 60, 60, 60, 60}) + + // generate data structures for expected slice ports and address types + protoTCP := v1.ProtocolTCP + ipAddressType := discovery.AddressTypeIP + expectedSlices := []discovery.EndpointSlice{} + for i := range fetchedSlices { + expectedSlices = append(expectedSlices, discovery.EndpointSlice{ + Ports: []discovery.EndpointPort{{ + Name: utilpointer.StringPtr(""), + Protocol: &protoTCP, + Port: utilpointer.Int32Ptr(int32(8080 + i)), + }}, + AddressType: &ipAddressType, + }) + } + + // slices fetched should match expected address type and ports + expectUnorderedSlicesWithTopLevelAttrs(t, fetchedSlices, expectedSlices) +} + +// This test ensures that maxEndpointsPerSlice configuration results in +// appropriate endpoints distribution among slices +func TestReconcileMaxEndpointsPerSlice(t *testing.T) { + namespace := "test" + svc, _ := newServiceAndendpointMeta("foo", namespace) + + // start with 250 pods + pods := []*corev1.Pod{} + for i := 0; i < 250; i++ { + ready := !(i%3 == 0) + pods = append(pods, newPod(i, namespace, ready, 1)) + } + + testCases := []struct { + maxEndpointsPerSlice int32 + expectedSliceLengths []int + }{ + { + maxEndpointsPerSlice: int32(50), + expectedSliceLengths: []int{50, 50, 50, 50, 50}, + }, { + maxEndpointsPerSlice: int32(80), + expectedSliceLengths: []int{80, 80, 80, 10}, + }, { + maxEndpointsPerSlice: int32(150), + expectedSliceLengths: []int{150, 100}, + }, { + maxEndpointsPerSlice: int32(250), + expectedSliceLengths: []int{250}, + }, { + maxEndpointsPerSlice: int32(500), + expectedSliceLengths: []int{250}, + }, + } + + for _, testCase := range testCases { + client := newClientset() + r := newReconciler(client, []*corev1.Node{{ObjectMeta: metav1.ObjectMeta{Name: "node-1"}}}, testCase.maxEndpointsPerSlice) + reconcileHelper(t, r, &svc, pods, []*discovery.EndpointSlice{}, time.Now()) + expectUnorderedSlicesWithLengths(t, fetchEndpointSlices(t, client, namespace), testCase.expectedSliceLengths) + } +} + +// Test Helpers + +func newReconciler(client *fake.Clientset, nodes []*corev1.Node, maxEndpointsPerSlice int32) *reconciler { + informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc()) + nodeInformer := informerFactory.Core().V1().Nodes() + indexer := nodeInformer.Informer().GetIndexer() + for _, node := range nodes { + indexer.Add(node) + } + + return &reconciler{ + client: client, + nodeLister: corelisters.NewNodeLister(indexer), + maxEndpointsPerSlice: maxEndpointsPerSlice, + } +} + +// ensures endpoint slices exist with the desired set of lengths +func expectUnorderedSlicesWithLengths(t *testing.T, endpointSlices []discovery.EndpointSlice, expectedLengths []int) { + assert.Len(t, endpointSlices, len(expectedLengths), "Expected %d endpoint slices", len(expectedLengths)) + + lengthsWithNoMatch := []int{} + desiredLengths := expectedLengths + actualLengths := []int{} + for _, endpointSlice := range endpointSlices { + actualLen := len(endpointSlice.Endpoints) + actualLengths = append(actualLengths, actualLen) + matchFound := false + for i := 0; i < len(desiredLengths); i++ { + if desiredLengths[i] == actualLen { + matchFound = true + desiredLengths = append(desiredLengths[:i], desiredLengths[i+1:]...) + break + } + } + + if !matchFound { + lengthsWithNoMatch = append(lengthsWithNoMatch, actualLen) + } + } + + if len(lengthsWithNoMatch) > 0 || len(desiredLengths) > 0 { + t.Errorf("Actual slice lengths (%v) don't match expected (%v)", actualLengths, expectedLengths) + } +} + +// ensures endpoint slices exist with the desired set of ports and address types +func expectUnorderedSlicesWithTopLevelAttrs(t *testing.T, endpointSlices []discovery.EndpointSlice, expectedSlices []discovery.EndpointSlice) { + t.Helper() + assert.Len(t, endpointSlices, len(expectedSlices), "Expected %d endpoint slices", len(expectedSlices)) + + slicesWithNoMatch := []discovery.EndpointSlice{} + for _, endpointSlice := range endpointSlices { + matchFound := false + for i := 0; i < len(expectedSlices); i++ { + if portsAndAddressTypeEqual(expectedSlices[i], endpointSlice) { + matchFound = true + expectedSlices = append(expectedSlices[:i], expectedSlices[i+1:]...) + break + } + } + + if !matchFound { + slicesWithNoMatch = append(slicesWithNoMatch, endpointSlice) + } + } + + assert.Len(t, slicesWithNoMatch, 0, "EndpointSlice(s) found without matching attributes") + assert.Len(t, expectedSlices, 0, "Expected slices(s) not found in EndpointSlices") +} + +func expectActions(t *testing.T, actions []k8stesting.Action, num int, verb, resource string) { + t.Helper() + for i := 0; i < num; i++ { + relativePos := len(actions) - i - 1 + assert.Equal(t, verb, actions[relativePos].GetVerb(), "Expected action -%d verb to be %s", i, verb) + assert.Equal(t, resource, actions[relativePos].GetResource().Resource, "Expected action -%d resource to be %s", i, resource) + } +} + +func portsAndAddressTypeEqual(slice1, slice2 discovery.EndpointSlice) bool { + return apiequality.Semantic.DeepEqual(slice1.Ports, slice2.Ports) && apiequality.Semantic.DeepEqual(slice1.AddressType, slice2.AddressType) +} + +func createEndpointSlices(t *testing.T, client *fake.Clientset, namespace string, endpointSlices []*discovery.EndpointSlice) { + t.Helper() + for _, endpointSlice := range endpointSlices { + _, err := client.DiscoveryV1alpha1().EndpointSlices(namespace).Create(endpointSlice) + if err != nil { + t.Fatalf("Expected no error creating Endpoint Slice, got: %v", err) + } + } +} + +func fetchEndpointSlices(t *testing.T, client *fake.Clientset, namespace string) []discovery.EndpointSlice { + t.Helper() + fetchedSlices, err := client.DiscoveryV1alpha1().EndpointSlices(namespace).List(metav1.ListOptions{}) + if err != nil { + t.Fatalf("Expected no error fetching Endpoint Slices, got: %v", err) + return []discovery.EndpointSlice{} + } + return fetchedSlices.Items +} + +func reconcileHelper(t *testing.T, r *reconciler, service *corev1.Service, pods []*corev1.Pod, existingSlices []*discovery.EndpointSlice, triggerTime time.Time) { + t.Helper() + err := r.reconcile(service, pods, existingSlices, triggerTime) + if err != nil { + t.Fatalf("Expected no error reconciling Endpoint Slices, got: %v", err) + } +} diff --git a/pkg/controller/endpointslice/utils.go b/pkg/controller/endpointslice/utils.go new file mode 100644 index 00000000000..ff093d78e64 --- /dev/null +++ b/pkg/controller/endpointslice/utils.go @@ -0,0 +1,261 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpointslice + +import ( + "crypto/md5" + "encoding/hex" + "fmt" + "reflect" + "sort" + "time" + + corev1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1alpha1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/klog" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" + api "k8s.io/kubernetes/pkg/apis/core" + "k8s.io/kubernetes/pkg/util/hash" +) + +// podEndpointChanged returns true if the results of podToEndpoint are different +// for the pods passed to this function. +func podEndpointChanged(pod1, pod2 *corev1.Pod) bool { + endpoint1 := podToEndpoint(pod1, &corev1.Node{}) + endpoint2 := podToEndpoint(pod2, &corev1.Node{}) + + endpoint1.TargetRef.ResourceVersion = "" + endpoint2.TargetRef.ResourceVersion = "" + + return !reflect.DeepEqual(endpoint1, endpoint2) +} + +// podToEndpoint returns an Endpoint object generated from a Pod and Node. +func podToEndpoint(pod *corev1.Pod, node *corev1.Node) discovery.Endpoint { + // Build out topology information. This is currently limited to hostname, + // zone, and region, but this will be expanded in the future. + topology := map[string]string{} + + if pod.Spec.NodeName != "" { + topology["kubernetes.io/hostname"] = pod.Spec.NodeName + } + + if node != nil { + topologyLabels := []string{ + "topology.kubernetes.io/zone", + "topology.kubernetes.io/region", + } + + for _, topologyLabel := range topologyLabels { + if node.Labels[topologyLabel] != "" { + topology[topologyLabel] = node.Labels[topologyLabel] + } + } + } + + ready := podutil.IsPodReady(pod) + return discovery.Endpoint{ + Addresses: getEndpointAddresses(pod.Status), + Conditions: discovery.EndpointConditions{ + Ready: &ready, + }, + Topology: topology, + TargetRef: &corev1.ObjectReference{ + Kind: "Pod", + Namespace: pod.ObjectMeta.Namespace, + Name: pod.ObjectMeta.Name, + UID: pod.ObjectMeta.UID, + ResourceVersion: pod.ObjectMeta.ResourceVersion, + }, + } +} + +// getEndpointPorts returns a list of EndpointPorts generated from a Service +// and Pod. +func getEndpointPorts(service *corev1.Service, pod *corev1.Pod) []discovery.EndpointPort { + endpointPorts := []discovery.EndpointPort{} + + // Allow headless service not to have ports. + if len(service.Spec.Ports) == 0 && service.Spec.ClusterIP == api.ClusterIPNone { + return endpointPorts + } + + for i := range service.Spec.Ports { + servicePort := &service.Spec.Ports[i] + + portName := servicePort.Name + portProto := servicePort.Protocol + portNum, err := podutil.FindPort(pod, servicePort) + if err != nil { + klog.V(4).Infof("Failed to find port for service %s/%s: %v", service.Namespace, service.Name, err) + continue + } + + i32PortNum := int32(portNum) + endpointPorts = append(endpointPorts, discovery.EndpointPort{ + Name: &portName, + Port: &i32PortNum, + Protocol: &portProto, + }) + } + + return endpointPorts +} + +// getEndpointAddresses returns a list of addresses generated from a pod status. +func getEndpointAddresses(podStatus corev1.PodStatus) []string { + if len(podStatus.PodIPs) > 1 { + addresss := []string{} + for _, podIP := range podStatus.PodIPs { + addresss = append(addresss, podIP.IP) + } + return addresss + } + + return []string{podStatus.PodIP} +} + +// endpointsEqualBeyondHash returns true if endpoints have equal attributes +// but excludes equality checks that would have already been covered with +// endpoint hashing (see hashEndpoint func for more info). +func endpointsEqualBeyondHash(ep1, ep2 *discovery.Endpoint) bool { + if !apiequality.Semantic.DeepEqual(ep1.Topology, ep2.Topology) { + return false + } + + if boolPtrChanged(ep1.Conditions.Ready, ep2.Conditions.Ready) { + return false + } + + if objectRefPtrChanged(ep1.TargetRef, ep2.TargetRef) { + return false + } + + return true +} + +// newEndpointSlice returns an EndpointSlice generated from a service and +// endpointMeta. +func newEndpointSlice(service *corev1.Service, endpointMeta *endpointMeta) *discovery.EndpointSlice { + gvk := schema.GroupVersionKind{Version: "v1", Kind: "Service"} + ownerRef := metav1.NewControllerRef(service, gvk) + return &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{serviceNameLabel: service.Name}, + GenerateName: fmt.Sprintf("%s.", service.Name), + OwnerReferences: []metav1.OwnerReference{*ownerRef}, + Namespace: service.Namespace, + }, + Ports: endpointMeta.Ports, + AddressType: endpointMeta.AddressType, + Endpoints: []discovery.Endpoint{}, + } +} + +// boolPtrChanged returns true if a set of bool pointers have different values. +func boolPtrChanged(ptr1, ptr2 *bool) bool { + if (ptr1 == nil) != (ptr2 == nil) { + return true + } + if ptr1 != nil && ptr2 != nil && *ptr1 != *ptr2 { + return true + } + return false +} + +// objectRefPtrChanged returns true if a set of object ref pointers have +// different values. +func objectRefPtrChanged(ref1, ref2 *corev1.ObjectReference) bool { + if (ref1 == nil) != (ref2 == nil) { + return true + } + if ref1 != nil && ref2 != nil && !apiequality.Semantic.DeepEqual(*ref1, *ref2) { + return true + } + return false +} + +// getSliceToFill will return the EndpointSlice that will be closest to full +// when numEndpoints are added. If no EndpointSlice can be found, a nil pointer +// will be returned. +func getSliceToFill(endpointSlices []*discovery.EndpointSlice, numEndpoints, maxEndpoints int) (slice *discovery.EndpointSlice) { + closestDiff := maxEndpoints + var closestSlice *discovery.EndpointSlice + for _, endpointSlice := range endpointSlices { + currentDiff := maxEndpoints - (numEndpoints + len(endpointSlice.Endpoints)) + if currentDiff >= 0 && currentDiff < closestDiff { + closestDiff = currentDiff + closestSlice = endpointSlice + if closestDiff == 0 { + return closestSlice + } + } + } + return closestSlice +} + +// addTriggerTimeAnnotation adds a triggerTime annotation to an EndpointSlice +func addTriggerTimeAnnotation(endpointSlice *discovery.EndpointSlice, triggerTime time.Time) { + if endpointSlice.Annotations == nil { + endpointSlice.Annotations = make(map[string]string) + } + + if !triggerTime.IsZero() { + endpointSlice.Annotations[corev1.EndpointsLastChangeTriggerTime] = triggerTime.Format(time.RFC3339Nano) + } else { // No new trigger time, clear the annotation. + delete(endpointSlice.Annotations, corev1.EndpointsLastChangeTriggerTime) + } +} + +// deepHashObject creates a unique hash string from a go object. +func deepHashObjectToString(objectToWrite interface{}) string { + hasher := md5.New() + hash.DeepHashObject(hasher, objectToWrite) + return hex.EncodeToString(hasher.Sum(nil)[0:]) +} + +// portMapKey is used to uniquely identify groups of endpoint ports. +type portMapKey string + +func newPortMapKey(endpointPorts []discovery.EndpointPort) portMapKey { + sort.Sort(portsInOrder(endpointPorts)) + return portMapKey(deepHashObjectToString(endpointPorts)) +} + +// endpointSliceEndpointLen helps sort endpoint slices by the number of +// endpoints they contain. +type endpointSliceEndpointLen []*discovery.EndpointSlice + +func (sl endpointSliceEndpointLen) Len() int { return len(sl) } +func (sl endpointSliceEndpointLen) Swap(i, j int) { sl[i], sl[j] = sl[j], sl[i] } +func (sl endpointSliceEndpointLen) Less(i, j int) bool { + return len(sl[i].Endpoints) > len(sl[j].Endpoints) +} + +// portsInOrder helps sort endpoint ports in a consistent way for hashing. +type portsInOrder []discovery.EndpointPort + +func (sl portsInOrder) Len() int { return len(sl) } +func (sl portsInOrder) Swap(i, j int) { sl[i], sl[j] = sl[j], sl[i] } +func (sl portsInOrder) Less(i, j int) bool { + h1 := deepHashObjectToString(sl[i]) + h2 := deepHashObjectToString(sl[j]) + return h1 < h2 +} diff --git a/pkg/controller/endpointslice/utils_test.go b/pkg/controller/endpointslice/utils_test.go new file mode 100644 index 00000000000..c14269b833e --- /dev/null +++ b/pkg/controller/endpointslice/utils_test.go @@ -0,0 +1,335 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpointslice + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + discovery "k8s.io/api/discovery/v1alpha1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" + "k8s.io/client-go/tools/cache" + endpointutil "k8s.io/kubernetes/pkg/controller/util/endpoint" + utilpointer "k8s.io/utils/pointer" +) + +func TestNewEndpointSlice(t *testing.T) { + ipAddressType := discovery.AddressTypeIP + portName := "foo" + protocol := v1.ProtocolTCP + endpointMeta := endpointMeta{ + Ports: []discovery.EndpointPort{{Name: &portName, Protocol: &protocol}}, + AddressType: &ipAddressType, + } + service := v1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "test"}, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{{Port: 80}}, + Selector: map[string]string{"foo": "bar"}, + }, + } + + gvk := schema.GroupVersionKind{Version: "v1", Kind: "Service"} + ownerRef := metav1.NewControllerRef(&service, gvk) + + expectedSlice := discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{serviceNameLabel: service.Name}, + GenerateName: fmt.Sprintf("%s.", service.Name), + OwnerReferences: []metav1.OwnerReference{*ownerRef}, + Namespace: service.Namespace, + }, + Ports: endpointMeta.Ports, + AddressType: endpointMeta.AddressType, + Endpoints: []discovery.Endpoint{}, + } + generatedSlice := newEndpointSlice(&service, &endpointMeta) + + assert.EqualValues(t, expectedSlice, *generatedSlice) +} + +func TestPodToEndpoint(t *testing.T) { + ns := "test" + + readyPod := newPod(1, ns, true, 1) + unreadyPod := newPod(1, ns, false, 1) + multiIPPod := newPod(1, ns, true, 1) + + multiIPPod.Status.PodIPs = []v1.PodIP{{IP: "1.2.3.4"}, {IP: "1234::5678:0000:0000:9abc:def0"}} + + node1 := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: readyPod.Spec.NodeName, + Labels: map[string]string{ + "topology.kubernetes.io/zone": "us-central1-a", + "topology.kubernetes.io/region": "us-central1", + }, + }, + } + + testCases := []struct { + name string + pod *v1.Pod + node *v1.Node + expectedEndpoint discovery.Endpoint + }{ + { + name: "Ready pod", + pod: readyPod, + expectedEndpoint: discovery.Endpoint{ + Addresses: []string{"1.2.3.5"}, + Conditions: discovery.EndpointConditions{Ready: utilpointer.BoolPtr(true)}, + Topology: map[string]string{"kubernetes.io/hostname": "node-1"}, + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Namespace: ns, + Name: readyPod.Name, + UID: readyPod.UID, + ResourceVersion: readyPod.ResourceVersion, + }, + }, + }, + { + name: "Unready pod", + pod: unreadyPod, + expectedEndpoint: discovery.Endpoint{ + Addresses: []string{"1.2.3.5"}, + Conditions: discovery.EndpointConditions{Ready: utilpointer.BoolPtr(false)}, + Topology: map[string]string{"kubernetes.io/hostname": "node-1"}, + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Namespace: ns, + Name: readyPod.Name, + UID: readyPod.UID, + ResourceVersion: readyPod.ResourceVersion, + }, + }, + }, + { + name: "Ready pod + node labels", + pod: readyPod, + node: node1, + expectedEndpoint: discovery.Endpoint{ + Addresses: []string{"1.2.3.5"}, + Conditions: discovery.EndpointConditions{Ready: utilpointer.BoolPtr(true)}, + Topology: map[string]string{ + "kubernetes.io/hostname": "node-1", + "topology.kubernetes.io/zone": "us-central1-a", + "topology.kubernetes.io/region": "us-central1", + }, + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Namespace: ns, + Name: readyPod.Name, + UID: readyPod.UID, + ResourceVersion: readyPod.ResourceVersion, + }, + }, + }, + { + name: "Multi IP Ready pod + node labels", + pod: multiIPPod, + node: node1, + expectedEndpoint: discovery.Endpoint{ + Addresses: []string{"1.2.3.4", "1234::5678:0000:0000:9abc:def0"}, + Conditions: discovery.EndpointConditions{Ready: utilpointer.BoolPtr(true)}, + Topology: map[string]string{ + "kubernetes.io/hostname": "node-1", + "topology.kubernetes.io/zone": "us-central1-a", + "topology.kubernetes.io/region": "us-central1", + }, + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Namespace: ns, + Name: readyPod.Name, + UID: readyPod.UID, + ResourceVersion: readyPod.ResourceVersion, + }, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + endpoint := podToEndpoint(testCase.pod, testCase.node) + assert.EqualValues(t, testCase.expectedEndpoint, endpoint, "Test case failed: %s", testCase.name) + }) + } +} + +func TestPodChangedWithpodEndpointChanged(t *testing.T) { + podStore := cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc) + ns := "test" + podStore.Add(newPod(1, ns, true, 1)) + pods := podStore.List() + if len(pods) != 1 { + t.Errorf("podStore size: expected: %d, got: %d", 1, len(pods)) + return + } + oldPod := pods[0].(*v1.Pod) + newPod := oldPod.DeepCopy() + + if podChangedHelper(oldPod, newPod, podEndpointChanged) { + t.Errorf("Expected pod to be unchanged for copied pod") + } + + newPod.Spec.NodeName = "changed" + if !podChangedHelper(oldPod, newPod, podEndpointChanged) { + t.Errorf("Expected pod to be changed for pod with NodeName changed") + } + newPod.Spec.NodeName = oldPod.Spec.NodeName + + newPod.ObjectMeta.ResourceVersion = "changed" + if podChangedHelper(oldPod, newPod, podEndpointChanged) { + t.Errorf("Expected pod to be unchanged for pod with only ResourceVersion changed") + } + newPod.ObjectMeta.ResourceVersion = oldPod.ObjectMeta.ResourceVersion + + newPod.Status.PodIP = "1.2.3.1" + if !podChangedHelper(oldPod, newPod, podEndpointChanged) { + t.Errorf("Expected pod to be changed with pod IP address change") + } + newPod.Status.PodIP = oldPod.Status.PodIP + + newPod.ObjectMeta.Name = "wrong-name" + if !podChangedHelper(oldPod, newPod, podEndpointChanged) { + t.Errorf("Expected pod to be changed with pod name change") + } + newPod.ObjectMeta.Name = oldPod.ObjectMeta.Name + + saveConditions := oldPod.Status.Conditions + oldPod.Status.Conditions = nil + if !podChangedHelper(oldPod, newPod, podEndpointChanged) { + t.Errorf("Expected pod to be changed with pod readiness change") + } + oldPod.Status.Conditions = saveConditions + + now := metav1.NewTime(time.Now().UTC()) + newPod.ObjectMeta.DeletionTimestamp = &now + if !podChangedHelper(oldPod, newPod, podEndpointChanged) { + t.Errorf("Expected pod to be changed with DeletionTimestamp change") + } + newPod.ObjectMeta.DeletionTimestamp = oldPod.ObjectMeta.DeletionTimestamp.DeepCopy() +} + +// Test helpers + +func newPod(n int, namespace string, ready bool, nPorts int) *v1.Pod { + status := v1.ConditionTrue + if !ready { + status = v1.ConditionFalse + } + + p := &v1.Pod{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1"}, + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: fmt.Sprintf("pod%d", n), + Labels: map[string]string{"foo": "bar"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{{ + Name: "container-1", + }}, + NodeName: "node-1", + }, + Status: v1.PodStatus{ + PodIP: fmt.Sprintf("1.2.3.%d", 4+n), + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + Status: status, + }, + }, + }, + } + + return p +} + +func newClientset() *fake.Clientset { + client := fake.NewSimpleClientset() + + client.PrependReactor("create", "endpointslices", k8stesting.ReactionFunc(func(action k8stesting.Action) (bool, runtime.Object, error) { + endpointSlice := action.(k8stesting.CreateAction).GetObject().(*discovery.EndpointSlice) + + if endpointSlice.ObjectMeta.GenerateName != "" { + endpointSlice.ObjectMeta.Name = fmt.Sprintf("%s-%s", endpointSlice.ObjectMeta.GenerateName, rand.String(8)) + endpointSlice.ObjectMeta.GenerateName = "" + } + + return false, endpointSlice, nil + })) + + return client +} + +func newServiceAndendpointMeta(name, namespace string) (corev1.Service, endpointMeta) { + portNum := int32(80) + portNameIntStr := intstr.IntOrString{ + Type: intstr.Int, + IntVal: portNum, + } + + svc := corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{{ + TargetPort: portNameIntStr, + Protocol: v1.ProtocolTCP, + Name: name, + }}, + Selector: map[string]string{"foo": "bar"}, + }, + } + + ipAddressType := discovery.AddressTypeIP + protocol := v1.ProtocolTCP + endpointMeta := endpointMeta{ + AddressType: &ipAddressType, + Ports: []discovery.EndpointPort{{Name: &name, Port: &portNum, Protocol: &protocol}}, + } + + return svc, endpointMeta +} + +func newEmptyEndpointSlice(n int, namespace string, endpointMeta endpointMeta, svc corev1.Service) *discovery.EndpointSlice { + return &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s.%d", svc.Name, n), + Namespace: namespace, + }, + Ports: endpointMeta.Ports, + AddressType: endpointMeta.AddressType, + Endpoints: []discovery.Endpoint{}, + } +} + +func podChangedHelper(oldPod, newPod *v1.Pod, endpointChanged endpointutil.EndpointsMatch) bool { + podChanged, _ := endpointutil.PodChanged(oldPod, newPod, podEndpointChanged) + return podChanged +} diff --git a/pkg/controller/util/endpoint/BUILD b/pkg/controller/util/endpoint/BUILD new file mode 100644 index 00000000000..e400de4e421 --- /dev/null +++ b/pkg/controller/util/endpoint/BUILD @@ -0,0 +1,49 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "go_default_library", + srcs = [ + "controller_utils.go", + "trigger_time_tracker.go", + ], + importpath = "k8s.io/kubernetes/pkg/controller/util/endpoint", + visibility = ["//visibility:public"], + deps = [ + "//pkg/api/v1/pod:go_default_library", + "//pkg/controller:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", + "//staging/src/k8s.io/client-go/listers/core/v1:go_default_library", + "//staging/src/k8s.io/client-go/tools/cache:go_default_library", + ], +) + +go_test( + name = "go_default_test", + srcs = [ + "controller_utils_test.go", + "trigger_time_tracker_test.go", + ], + embed = [":go_default_library"], + deps = [ + "//staging/src/k8s.io/api/core/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/pkg/controller/util/endpoint/controller_utils.go b/pkg/controller/util/endpoint/controller_utils.go new file mode 100644 index 00000000000..38f70a16bb2 --- /dev/null +++ b/pkg/controller/util/endpoint/controller_utils.go @@ -0,0 +1,174 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpoint + +import ( + "fmt" + "reflect" + + v1 "k8s.io/api/core/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + v1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" + "k8s.io/kubernetes/pkg/controller" +) + +// EndpointsMatch is a type of function that returns true if pod endpoints match. +type EndpointsMatch func(*v1.Pod, *v1.Pod) bool + +// ShouldPodBeInEndpoints returns true if a specified pod should be in an +// endpoints object. +func ShouldPodBeInEndpoints(pod *v1.Pod) bool { + if len(pod.Status.PodIP) == 0 && len(pod.Status.PodIPs) == 0 { + return false + } + + if pod.Spec.RestartPolicy == v1.RestartPolicyNever { + return pod.Status.Phase != v1.PodFailed && pod.Status.Phase != v1.PodSucceeded + } + + if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure { + return pod.Status.Phase != v1.PodSucceeded + } + + return true +} + +// PodChanged returns two boolean values, the first returns true if the pod. +// has changed, the second value returns true if the pod labels have changed. +func PodChanged(oldPod, newPod *v1.Pod, endpointChanged EndpointsMatch) (bool, bool) { + // Check if the pod labels have changed, indicating a possible + // change in the service membership + labelsChanged := false + if !reflect.DeepEqual(newPod.Labels, oldPod.Labels) || + !hostNameAndDomainAreEqual(newPod, oldPod) { + labelsChanged = true + } + + // If the pod's deletion timestamp is set, remove endpoint from ready address. + if newPod.DeletionTimestamp != oldPod.DeletionTimestamp { + return true, labelsChanged + } + // If the pod's readiness has changed, the associated endpoint address + // will move from the unready endpoints set to the ready endpoints. + // So for the purposes of an endpoint, a readiness change on a pod + // means we have a changed pod. + if podutil.IsPodReady(oldPod) != podutil.IsPodReady(newPod) { + return true, labelsChanged + } + // Convert the pod to an Endpoint, clear inert fields, + // and see if they are the same. + // TODO: Add a watcher for node changes separate from this + // We don't want to trigger multiple syncs at a pod level when a node changes + return endpointChanged(newPod, oldPod), labelsChanged +} + +// GetPodServiceMemberships returns a set of Service keys for Services that have +// a selector matching the given pod. +func GetPodServiceMemberships(serviceLister v1listers.ServiceLister, pod *v1.Pod) (sets.String, error) { + set := sets.String{} + services, err := serviceLister.GetPodServices(pod) + if err != nil { + // don't log this error because this function makes pointless + // errors when no services match + return set, nil + } + for i := range services { + key, err := controller.KeyFunc(services[i]) + if err != nil { + return nil, err + } + set.Insert(key) + } + return set, nil +} + +// GetServicesToUpdateOnPodChange returns a set of Service keys for Services +// that have potentially been affected by a change to this pod. +func GetServicesToUpdateOnPodChange(serviceLister v1listers.ServiceLister, old, cur interface{}, endpointChanged EndpointsMatch) sets.String { + newPod := cur.(*v1.Pod) + oldPod := old.(*v1.Pod) + if newPod.ResourceVersion == oldPod.ResourceVersion { + // Periodic resync will send update events for all known pods. + // Two different versions of the same pod will always have different RVs + return sets.String{} + } + + podChanged, labelsChanged := PodChanged(oldPod, newPod, endpointChanged) + + // If both the pod and labels are unchanged, no update is needed + if !podChanged && !labelsChanged { + return sets.String{} + } + + services, err := GetPodServiceMemberships(serviceLister, newPod) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Unable to get pod %s/%s's service memberships: %v", newPod.Namespace, newPod.Name, err)) + return sets.String{} + } + + if labelsChanged { + oldServices, err := GetPodServiceMemberships(serviceLister, oldPod) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Unable to get pod %s/%s's service memberships: %v", newPod.Namespace, newPod.Name, err)) + } + services = determineNeededServiceUpdates(oldServices, services, podChanged) + } + + return services +} + +// GetPodFromDeleteAction returns a pointer to a pod if one can be derived from +// obj (could be a *v1.Pod, or a DeletionFinalStateUnknown marker item). +func GetPodFromDeleteAction(obj interface{}) *v1.Pod { + if pod, ok := obj.(*v1.Pod); ok { + // Enqueue all the services that the pod used to be a member of. + // This is the same thing we do when we add a pod. + return pod + } + // If we reached here it means the pod was deleted but its final state is unrecorded. + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + utilruntime.HandleError(fmt.Errorf("Couldn't get object from tombstone %#v", obj)) + return nil + } + pod, ok := tombstone.Obj.(*v1.Pod) + if !ok { + utilruntime.HandleError(fmt.Errorf("Tombstone contained object that is not a Pod: %#v", obj)) + return nil + } + return pod +} + +func hostNameAndDomainAreEqual(pod1, pod2 *v1.Pod) bool { + return pod1.Spec.Hostname == pod2.Spec.Hostname && + pod1.Spec.Subdomain == pod2.Spec.Subdomain +} + +func determineNeededServiceUpdates(oldServices, services sets.String, podChanged bool) sets.String { + if podChanged { + // if the labels and pod changed, all services need to be updated + services = services.Union(oldServices) + } else { + // if only the labels changed, services not common to both the new + // and old service set (the disjuntive union) need to be updated + services = services.Difference(oldServices).Union(oldServices.Difference(services)) + } + return services +} diff --git a/pkg/controller/util/endpoint/controller_utils_test.go b/pkg/controller/util/endpoint/controller_utils_test.go new file mode 100644 index 00000000000..1f12fbcb83c --- /dev/null +++ b/pkg/controller/util/endpoint/controller_utils_test.go @@ -0,0 +1,226 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpoint + +import ( + "testing" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" +) + +func TestDetermineNeededServiceUpdates(t *testing.T) { + testCases := []struct { + name string + a sets.String + b sets.String + union sets.String + xor sets.String + }{ + { + name: "no services changed", + a: sets.NewString("a", "b", "c"), + b: sets.NewString("a", "b", "c"), + xor: sets.NewString(), + union: sets.NewString("a", "b", "c"), + }, + { + name: "all old services removed, new services added", + a: sets.NewString("a", "b", "c"), + b: sets.NewString("d", "e", "f"), + xor: sets.NewString("a", "b", "c", "d", "e", "f"), + union: sets.NewString("a", "b", "c", "d", "e", "f"), + }, + { + name: "all old services removed, no new services added", + a: sets.NewString("a", "b", "c"), + b: sets.NewString(), + xor: sets.NewString("a", "b", "c"), + union: sets.NewString("a", "b", "c"), + }, + { + name: "no old services, but new services added", + a: sets.NewString(), + b: sets.NewString("a", "b", "c"), + xor: sets.NewString("a", "b", "c"), + union: sets.NewString("a", "b", "c"), + }, + { + name: "one service removed, one service added, two unchanged", + a: sets.NewString("a", "b", "c"), + b: sets.NewString("b", "c", "d"), + xor: sets.NewString("a", "d"), + union: sets.NewString("a", "b", "c", "d"), + }, + { + name: "no services", + a: sets.NewString(), + b: sets.NewString(), + xor: sets.NewString(), + union: sets.NewString(), + }, + } + for _, testCase := range testCases { + retval := determineNeededServiceUpdates(testCase.a, testCase.b, false) + if !retval.Equal(testCase.xor) { + t.Errorf("%s (with podChanged=false): expected: %v got: %v", testCase.name, testCase.xor.List(), retval.List()) + } + + retval = determineNeededServiceUpdates(testCase.a, testCase.b, true) + if !retval.Equal(testCase.union) { + t.Errorf("%s (with podChanged=true): expected: %v got: %v", testCase.name, testCase.union.List(), retval.List()) + } + } +} + +// There are 3*5 possibilities(3 types of RestartPolicy by 5 types of PodPhase). +// Not listing them all here. Just listing all of the 3 false cases and 3 of the +// 12 true cases. +func TestShouldPodBeInEndpoints(t *testing.T) { + testCases := []struct { + name string + pod *v1.Pod + expected bool + }{ + // Pod should not be in endpoints: + { + name: "Failed pod with Never RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + }, + Status: v1.PodStatus{ + Phase: v1.PodFailed, + PodIP: "1.2.3.4", + }, + }, + expected: false, + }, + { + name: "Succeeded pod with Never RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + PodIP: "1.2.3.4", + }, + }, + expected: false, + }, + { + name: "Succeeded pod with OnFailure RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyOnFailure, + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + PodIP: "1.2.3.4", + }, + }, + expected: false, + }, + { + name: "Empty Pod IPs, Running pod with OnFailure RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + PodIP: "", + PodIPs: []v1.PodIP{}, + }, + }, + expected: false, + }, + // Pod should be in endpoints: + { + name: "Failed pod with Always RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyAlways, + }, + Status: v1.PodStatus{ + Phase: v1.PodFailed, + PodIP: "1.2.3.4", + }, + }, + expected: true, + }, + { + name: "Pending pod with Never RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + }, + Status: v1.PodStatus{ + Phase: v1.PodPending, + PodIP: "1.2.3.4", + }, + }, + expected: true, + }, + { + name: "Unknown pod with OnFailure RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyOnFailure, + }, + Status: v1.PodStatus{ + Phase: v1.PodUnknown, + PodIP: "1.2.3.4", + }, + }, + expected: true, + }, + { + name: "Running pod with Never RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + PodIP: "1.2.3.4", + }, + }, + expected: true, + }, + { + name: "Multiple Pod IPs, Running pod with OnFailure RestartPolicy", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + }, + Status: v1.PodStatus{ + Phase: v1.PodRunning, + PodIPs: []v1.PodIP{{IP: "1.2.3.4"}, {IP: "1234::5678:0000:0000:9abc:def0"}}, + }, + }, + expected: true, + }, + } + for _, test := range testCases { + result := ShouldPodBeInEndpoints(test.pod) + if result != test.expected { + t.Errorf("%s: expected : %t, got: %t", test.name, test.expected, result) + } + } +} diff --git a/pkg/controller/util/endpoint/trigger_time_tracker.go b/pkg/controller/util/endpoint/trigger_time_tracker.go new file mode 100644 index 00000000000..fb75a4fa935 --- /dev/null +++ b/pkg/controller/util/endpoint/trigger_time_tracker.go @@ -0,0 +1,161 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpoint + +import ( + "sync" + "time" + + v1 "k8s.io/api/core/v1" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" +) + +// TriggerTimeTracker is used to compute an EndpointsLastChangeTriggerTime +// annotation. See the documentation for that annotation for more details. +// +// Please note that this util may compute a wrong EndpointsLastChangeTriggerTime +// if the same object changes multiple times between two consecutive syncs. +// We're aware of this limitation but we decided to accept it, as fixing it +// would require a major rewrite of the endpoint(Slice) controller and +// Informer framework. Such situations, i.e. frequent updates of the same object +// in a single sync period, should be relatively rare and therefore this util +// should provide a good approximation of the EndpointsLastChangeTriggerTime. +type TriggerTimeTracker struct { + // ServiceStates is a map, indexed by Service object key, storing the last + // known Service object state observed during the most recent call of the + // ComputeEndpointLastChangeTriggerTime function. + ServiceStates map[ServiceKey]ServiceState + + // mutex guarding the serviceStates map. + mutex sync.Mutex +} + +// NewTriggerTimeTracker creates a new instance of the TriggerTimeTracker. +func NewTriggerTimeTracker() *TriggerTimeTracker { + return &TriggerTimeTracker{ + ServiceStates: make(map[ServiceKey]ServiceState), + } +} + +// ServiceKey is a key uniquely identifying a Service. +type ServiceKey struct { + // namespace, name composing a namespaced name - an unique identifier of every Service. + Namespace, Name string +} + +// ServiceState represents a state of an Service object that is known to this util. +type ServiceState struct { + // lastServiceTriggerTime is a service trigger time observed most recently. + lastServiceTriggerTime time.Time + // lastPodTriggerTimes is a map (Pod name -> time) storing the pod trigger + // times that were observed during the most recent call of the + // ComputeEndpointLastChangeTriggerTime function. + lastPodTriggerTimes map[string]time.Time +} + +// ComputeEndpointLastChangeTriggerTime updates the state of the Service/Endpoint +// object being synced and returns the time that should be exported as the +// EndpointsLastChangeTriggerTime annotation. +// +// If the method returns a 'zero' time the EndpointsLastChangeTriggerTime +// annotation shouldn't be exported. +// +// Please note that this function may compute a wrong value if the same object +// (pod/service) changes multiple times between two consecutive syncs. +// +// Important: This method is go-routing safe but only when called for different +// keys. The method shouldn't be called concurrently for the same key! This +// contract is fulfilled in the current implementation of the endpoint(slice) +// controller. +func (t *TriggerTimeTracker) ComputeEndpointLastChangeTriggerTime( + namespace string, service *v1.Service, pods []*v1.Pod) time.Time { + + key := ServiceKey{Namespace: namespace, Name: service.Name} + // As there won't be any concurrent calls for the same key, we need to guard + // access only to the serviceStates map. + t.mutex.Lock() + state, wasKnown := t.ServiceStates[key] + t.mutex.Unlock() + + // Update the state before returning. + defer func() { + t.mutex.Lock() + t.ServiceStates[key] = state + t.mutex.Unlock() + }() + + // minChangedTriggerTime is the min trigger time of all trigger times that + // have changed since the last sync. + var minChangedTriggerTime time.Time + podTriggerTimes := make(map[string]time.Time) + for _, pod := range pods { + if podTriggerTime := getPodTriggerTime(pod); !podTriggerTime.IsZero() { + podTriggerTimes[pod.Name] = podTriggerTime + if podTriggerTime.After(state.lastPodTriggerTimes[pod.Name]) { + // Pod trigger time has changed since the last sync, update minChangedTriggerTime. + minChangedTriggerTime = min(minChangedTriggerTime, podTriggerTime) + } + } + } + serviceTriggerTime := getServiceTriggerTime(service) + if serviceTriggerTime.After(state.lastServiceTriggerTime) { + // Service trigger time has changed since the last sync, update minChangedTriggerTime. + minChangedTriggerTime = min(minChangedTriggerTime, serviceTriggerTime) + } + + state.lastPodTriggerTimes = podTriggerTimes + state.lastServiceTriggerTime = serviceTriggerTime + + if !wasKnown { + // New Service, use Service creationTimestamp. + return service.CreationTimestamp.Time + } + + // Regular update of endpoint objects, return min of changed trigger times. + return minChangedTriggerTime +} + +// DeleteService deletes service state stored in this util. +func (t *TriggerTimeTracker) DeleteService(namespace, name string) { + key := ServiceKey{Namespace: namespace, Name: name} + t.mutex.Lock() + defer t.mutex.Unlock() + delete(t.ServiceStates, key) +} + +// getPodTriggerTime returns the time of the pod change (trigger) that resulted +// or will result in the endpoint object change. +func getPodTriggerTime(pod *v1.Pod) (triggerTime time.Time) { + if readyCondition := podutil.GetPodReadyCondition(pod.Status); readyCondition != nil { + triggerTime = readyCondition.LastTransitionTime.Time + } + return triggerTime +} + +// getServiceTriggerTime returns the time of the service change (trigger) that +// resulted or will result in the endpoint change. +func getServiceTriggerTime(service *v1.Service) (triggerTime time.Time) { + return service.CreationTimestamp.Time +} + +// min returns minimum of the currentMin and newValue or newValue if the currentMin is not set. +func min(currentMin, newValue time.Time) time.Time { + if currentMin.IsZero() || newValue.Before(currentMin) { + return newValue + } + return currentMin +} diff --git a/pkg/controller/util/endpoint/trigger_time_tracker_test.go b/pkg/controller/util/endpoint/trigger_time_tracker_test.go new file mode 100644 index 00000000000..043664e2c05 --- /dev/null +++ b/pkg/controller/util/endpoint/trigger_time_tracker_test.go @@ -0,0 +1,204 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package endpoint + +import ( + "runtime" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +var ( + t0 = time.Date(2019, 01, 01, 0, 0, 0, 0, time.UTC) + t1 = t0.Add(time.Second) + t2 = t1.Add(time.Second) + t3 = t2.Add(time.Second) + t4 = t3.Add(time.Second) + t5 = t4.Add(time.Second) + + ttNamespace = "ttNamespace1" + ttServiceName = "my-service" +) + +func TestNewServiceNoPods(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t2) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service).expect(t2) +} + +func TestNewServiceExistingPods(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t3) + pod1 := createPod(ttNamespace, "pod1", t0) + pod2 := createPod(ttNamespace, "pod2", t1) + pod3 := createPod(ttNamespace, "pod3", t5) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2, pod3). + // Pods were created before service, but trigger time is the time when service was created. + expect(t3) +} + +func TestPodsAdded(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t0) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service).expect(t0) + + pod1 := createPod(ttNamespace, "pod1", t2) + pod2 := createPod(ttNamespace, "pod2", t1) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2).expect(t1) +} + +func TestPodsUpdated(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t0) + pod1 := createPod(ttNamespace, "pod1", t1) + pod2 := createPod(ttNamespace, "pod2", t2) + pod3 := createPod(ttNamespace, "pod3", t3) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2, pod3).expect(t0) + + pod1 = createPod(ttNamespace, "pod1", t5) + pod2 = createPod(ttNamespace, "pod2", t4) + // pod3 doesn't change. + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2, pod3).expect(t4) +} + +func TestPodsUpdatedNoOp(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t0) + pod1 := createPod(ttNamespace, "pod1", t1) + pod2 := createPod(ttNamespace, "pod2", t2) + pod3 := createPod(ttNamespace, "pod3", t3) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2, pod3).expect(t0) + + // Nothing has changed. + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2, pod3).expectNil() +} + +func TestPodDeletedThenAdded(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t0) + pod1 := createPod(ttNamespace, "pod1", t1) + pod2 := createPod(ttNamespace, "pod2", t2) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2).expect(t0) + + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1).expectNil() + + pod2 = createPod(ttNamespace, "pod2", t4) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2).expect(t4) +} + +func TestServiceDeletedThenAdded(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t0) + pod1 := createPod(ttNamespace, "pod1", t1) + pod2 := createPod(ttNamespace, "pod2", t2) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2).expect(t0) + + tester.DeleteService(ttNamespace, ttServiceName) + + service = createService(ttNamespace, ttServiceName, t3) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2).expect(t3) +} + +func TestServiceUpdatedNoPodChange(t *testing.T) { + tester := newTester(t) + + service := createService(ttNamespace, ttServiceName, t0) + pod1 := createPod(ttNamespace, "pod1", t1) + pod2 := createPod(ttNamespace, "pod2", t2) + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2).expect(t0) + + // service's ports have changed. + service.Spec = v1.ServiceSpec{ + Selector: map[string]string{}, + Ports: []v1.ServicePort{{Port: 80, TargetPort: intstr.FromInt(8080), Protocol: "TCP"}}, + } + + // Currently we're not able to calculate trigger time for service updates, hence the returned + // value is a nil time. + tester.whenComputeEndpointLastChangeTriggerTime(ttNamespace, service, pod1, pod2).expectNil() +} + +// ------- Test Utils ------- + +type tester struct { + *TriggerTimeTracker + t *testing.T +} + +func newTester(t *testing.T) *tester { + return &tester{NewTriggerTimeTracker(), t} +} + +func (t *tester) whenComputeEndpointLastChangeTriggerTime( + namespace string, service *v1.Service, pods ...*v1.Pod) subject { + return subject{t.ComputeEndpointLastChangeTriggerTime(namespace, service, pods), t.t} +} + +type subject struct { + got time.Time + t *testing.T +} + +func (s subject) expect(expected time.Time) { + s.doExpect(expected) +} + +func (s subject) expectNil() { + s.doExpect(time.Time{}) +} + +func (s subject) doExpect(expected time.Time) { + if s.got != expected { + _, fn, line, _ := runtime.Caller(2) + s.t.Errorf("Wrong trigger time in %s:%d expected %s, got %s", fn, line, expected, s.got) + } +} + +func createPod(namespace, ttServiceName string, readyTime time.Time) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: ttServiceName}, + Status: v1.PodStatus{Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + Status: v1.ConditionTrue, + LastTransitionTime: metav1.NewTime(readyTime), + }, + }, + }, + } +} + +func createService(namespace, ttServiceName string, creationTime time.Time) *v1.Service { + return &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: ttServiceName, + CreationTimestamp: metav1.NewTime(creationTime), + }, + } +} diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 3746033a562..9a4402d945e 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -476,6 +476,12 @@ const ( // Enables ipv6 dual stack IPv6DualStack featuregate.Feature = "IPv6DualStack" + // owner: @robscott @freehan + // alpha: v1.16 + // + // Enable Endpoint Slices for more scalable Service endpoints. + EndpointSlice featuregate.Feature = "EndpointSlice" + // owner: @Huang-Wei // alpha: v1.16 // @@ -559,6 +565,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS VolumePVCDataSource: {Default: false, PreRelease: featuregate.Alpha}, PodOverhead: {Default: false, PreRelease: featuregate.Alpha}, IPv6DualStack: {Default: false, PreRelease: featuregate.Alpha}, + EndpointSlice: {Default: false, PreRelease: featuregate.Alpha}, EvenPodsSpread: {Default: false, PreRelease: featuregate.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go index fef50d68b6d..08d57be51a1 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go @@ -149,6 +149,17 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) }, }) + if utilfeature.DefaultFeatureGate.Enabled(features.EndpointSlice) { + addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "endpointslice-controller"}, + Rules: []rbacv1.PolicyRule{ + rbacv1helpers.NewRule("get", "list", "watch").Groups(legacyGroup).Resources("services", "pods", "nodes").RuleOrDie(), + rbacv1helpers.NewRule("get", "list", "create", "update", "delete").Groups(discoveryGroup).Resources("endpointslices").RuleOrDie(), + eventsRule(), + }, + }) + } + if utilfeature.DefaultFeatureGate.Enabled(features.ExpandPersistentVolumes) { addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{ ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "expand-controller"}, diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go index 2f9bcb8ad75..4e9433a69b9 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/policy.go @@ -45,6 +45,7 @@ const ( autoscalingGroup = "autoscaling" batchGroup = "batch" certificatesGroup = "certificates.k8s.io" + discoveryGroup = "discovery.k8s.io" extensionsGroup = "extensions" policyGroup = "policy" rbacGroup = "rbac.authorization.k8s.io" diff --git a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go index dee7bb15693..3d040abdcbe 100644 --- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go +++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go @@ -111,6 +111,9 @@ type KubeControllerManagerConfiguration struct { // EndpointControllerConfiguration holds configuration for EndpointController // related features. EndpointController EndpointControllerConfiguration + // EndpointSliceControllerConfiguration holds configuration for + // EndpointSliceController related features. + EndpointSliceController EndpointSliceControllerConfiguration // GarbageCollectorControllerConfiguration holds configuration for // GarbageCollectorController related features. GarbageCollectorController GarbageCollectorControllerConfiguration @@ -297,6 +300,20 @@ type EndpointControllerConfiguration struct { EndpointUpdatesBatchPeriod metav1.Duration } +// EndpointSliceControllerConfiguration contains elements describing +// EndpointSliceController. +type EndpointSliceControllerConfiguration struct { + // concurrentServiceEndpointSyncs is the number of service endpoint syncing + // operations that will be done concurrently. Larger number = faster + // endpoint slice updating, but more CPU (and network) load. + ConcurrentServiceEndpointSyncs int32 + + // maxEndpointsPerSlice is the maximum number of endpoints that will be + // added to an EndpointSlice. More endpoints per slice will result in fewer + // and larger endpoint slices, but larger resources. + MaxEndpointsPerSlice int32 +} + // GarbageCollectorControllerConfiguration contains elements describing GarbageCollectorController. type GarbageCollectorControllerConfiguration struct { // enables the generic garbage collector. MUST be synced with the diff --git a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go index 6c9475879ed..e5ed6f2d7de 100644 --- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go @@ -140,6 +140,22 @@ func (in *EndpointControllerConfiguration) DeepCopy() *EndpointControllerConfigu return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EndpointSliceControllerConfiguration) DeepCopyInto(out *EndpointSliceControllerConfiguration) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointSliceControllerConfiguration. +func (in *EndpointSliceControllerConfiguration) DeepCopy() *EndpointSliceControllerConfiguration { + if in == nil { + return nil + } + out := new(EndpointSliceControllerConfiguration) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GarbageCollectorControllerConfiguration) DeepCopyInto(out *GarbageCollectorControllerConfiguration) { *out = *in @@ -289,6 +305,7 @@ func (in *KubeControllerManagerConfiguration) DeepCopyInto(out *KubeControllerMa out.StatefulSetController = in.StatefulSetController out.DeprecatedController = in.DeprecatedController out.EndpointController = in.EndpointController + out.EndpointSliceController = in.EndpointSliceController in.GarbageCollectorController.DeepCopyInto(&out.GarbageCollectorController) in.HPAController.DeepCopyInto(&out.HPAController) out.JobController = in.JobController