Merge pull request #115204 from alexanderConstantinescu/kccm-del-taint-pred

[KCCM - service controller]: KEP-3458 implementation
This commit is contained in:
Kubernetes Prow Robot 2023-03-10 04:35:06 -08:00 committed by GitHub
commit 4b7bd457c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 818 additions and 444 deletions

View File

@ -739,6 +739,14 @@ const (
// Enables kubelet support to size memory backed volumes
SizeMemoryBackedVolumes featuregate.Feature = "SizeMemoryBackedVolumes"
// owner: @alexanderConstantinescu
// kep: http://kep.k8s.io/3458
// beta: v1.27
//
// Enables less load balancer re-configurations by the service controller
// (KCCM) as an effect of changing node state.
StableLoadBalancerNodeSet featuregate.Feature = "StableLoadBalancerNodeSet"
// owner: @mattcary
// alpha: v1.22
//
@ -1057,6 +1065,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
SizeMemoryBackedVolumes: {Default: true, PreRelease: featuregate.Beta},
StableLoadBalancerNodeSet: {Default: true, PreRelease: featuregate.Beta},
StatefulSetAutoDeletePVC: {Default: false, PreRelease: featuregate.Alpha},
StatefulSetStartOrdinal: {Default: true, PreRelease: featuregate.Beta},

View File

@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
@ -41,6 +42,7 @@ import (
servicehelper "k8s.io/cloud-provider/service/helpers"
"k8s.io/component-base/featuregate"
controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers"
"k8s.io/controller-manager/pkg/features"
"k8s.io/klog/v2"
)
@ -658,6 +660,12 @@ func nodeNames(nodes []*v1.Node) sets.String {
}
func shouldSyncUpdatedNode(oldNode, newNode *v1.Node) bool {
if utilfeature.DefaultFeatureGate.Enabled(features.StableLoadBalancerNodeSet) {
// Only Nodes with changes to the label
// "node.kubernetes.io/exclude-from-external-load-balancers" will
// trigger a load balancer re-sync.
return respectsPredicates(oldNode, nodeIncludedPredicate) != respectsPredicates(newNode, nodeIncludedPredicate)
}
// Evaluate the individual node exclusion predicate before evaluating the
// compounded result of all predicates. We don't sync ETP=local services
// for changes on the readiness condition, hence if a node remains NotReady
@ -712,6 +720,7 @@ func (c *Controller) nodeSyncService(svc *v1.Service, oldNodes, newNodes []*v1.N
klog.V(4).Infof("nodeSyncService started for service %s/%s", svc.Namespace, svc.Name)
if err := c.lockedUpdateLoadBalancerHosts(svc, newNodes); err != nil {
runtime.HandleError(fmt.Errorf("failed to update load balancer hosts for service %s/%s: %v", svc.Namespace, svc.Name, err))
nodeSyncErrorCount.Inc()
return retNeedRetry
}
klog.V(4).Infof("nodeSyncService finished successfully for service %s/%s", svc.Namespace, svc.Name)
@ -755,6 +764,7 @@ func (c *Controller) updateLoadBalancerHosts(ctx context.Context, services []*v1
// associated with the service.
func (c *Controller) lockedUpdateLoadBalancerHosts(service *v1.Service, hosts []*v1.Node) error {
startTime := time.Now()
loadBalancerSyncCount.Inc()
defer func() {
latency := time.Since(startTime).Seconds()
klog.V(4).Infof("It took %v seconds to update load balancer hosts for service %s/%s", latency, service.Namespace, service.Name)
@ -932,9 +942,21 @@ var (
nodeIncludedPredicate,
nodeUnTaintedPredicate,
}
stableNodeSetPredicates []NodeConditionPredicate = []NodeConditionPredicate{
nodeIncludedPredicate,
// This is not perfect, but probably good enough. We won't update the
// LBs just because the taint was added (see shouldSyncUpdatedNode) but
// if any other situation causes an LB sync, tainted nodes will be
// excluded at that time and cause connections on said node to not
// connection drain.
nodeUnTaintedPredicate,
}
)
func getNodePredicatesForService(service *v1.Service) []NodeConditionPredicate {
if utilfeature.DefaultFeatureGate.Enabled(features.StableLoadBalancerNodeSet) {
return stableNodeSetPredicates
}
if service.Spec.ExternalTrafficPolicy == v1.ServiceExternalTrafficPolicyLocal {
return etpLocalNodePredicates
}

View File

@ -17,9 +17,10 @@ limitations under the License.
package service
import (
"sync"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sync"
)
const (
@ -32,12 +33,26 @@ var register sync.Once
// registerMetrics registers service-controller metrics.
func registerMetrics() {
register.Do(func() {
legacyregistry.MustRegister(loadBalancerSyncCount)
legacyregistry.MustRegister(nodeSyncLatency)
legacyregistry.MustRegister(nodeSyncErrorCount)
legacyregistry.MustRegister(updateLoadBalancerHostLatency)
})
}
var (
loadBalancerSyncCount = metrics.NewCounter(&metrics.CounterOpts{
Name: "loadbalancer_sync_total",
Subsystem: subSystemName,
Help: "A metric counting the amount of times any load balancer has been configured, as an effect of service/node changes on the cluster",
StabilityLevel: metrics.ALPHA,
})
nodeSyncErrorCount = metrics.NewCounter(&metrics.CounterOpts{
Name: "nodesync_error_total",
Subsystem: subSystemName,
Help: "A metric counting the amount of times any load balancer has been configured and errored, as an effect of node changes on the cluster",
StabilityLevel: metrics.ALPHA,
})
nodeSyncLatency = metrics.NewHistogram(&metrics.HistogramOpts{
Name: "nodesync_latency_seconds",
Subsystem: subSystemName,
@ -46,7 +61,6 @@ var (
Buckets: metrics.ExponentialBuckets(1, 2, 15),
StabilityLevel: metrics.ALPHA,
})
updateLoadBalancerHostLatency = metrics.NewHistogram(&metrics.HistogramOpts{
Name: "update_loadbalancer_host_latency_seconds",
Subsystem: subSystemName,

View File

@ -21,16 +21,24 @@ import (
)
const (
// Every feature gate should add method here following this template:
//
// // owner: @username
// // alpha: v1.4
// MyFeature featuregate.Feature = "MyFeature"
//
// Feature gates should be listed in alphabetical, case-sensitive
// (upper before any lower case character) order. This reduces the risk
// of code conflicts because changes are more likely to be scattered
// across the file.
// Every feature gate should add method here following this template:
//
// // owner: @username
// // alpha: v1.4
// MyFeature featuregate.Feature = "MyFeature"
//
// Feature gates should be listed in alphabetical, case-sensitive
// (upper before any lower case character) order. This reduces the risk
// of code conflicts because changes are more likely to be scattered
// across the file.
// owner: @alexanderConstantinescu
// kep: http://kep.k8s.io/3458
// beta: v1.27
//
// Enables less load balancer re-configurations by the service controller
// (KCCM) as an effect of changing node state.
StableLoadBalancerNodeSet featuregate.Feature = "StableLoadBalancerNodeSet"
)
func SetupCurrentKubernetesSpecificFeatureGates(featuregates featuregate.MutableFeatureGate) error {
@ -39,4 +47,6 @@ func SetupCurrentKubernetesSpecificFeatureGates(featuregates featuregate.Mutable
// cloudPublicFeatureGates consists of cloud-specific feature keys.
// To add a new feature, define a key for it at k8s.io/api/pkg/features and add it here.
var cloudPublicFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{}
var cloudPublicFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
StableLoadBalancerNodeSet: {Default: true, PreRelease: featuregate.Beta},
}