Merge pull request #126329 from serathius/concurrent-transformation-chan-of-chan

[chan of chan] Make object transformation concurrent to remove watch cache scalability issue for conversion webhook
2025-09-27 13:15:36 +00:00 · 2024-07-31 10:41:42 -07:00
parent eb729d1db7 bb686f2033
commit c19d9edfde
4 changed files with 119 additions and 8 deletions
--- a/pkg/features/kube_features.go
+++ b/pkg/features/kube_features.go
@@ -1264,6 +1264,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

 	genericfeatures.AuthorizeWithSelectors: {Default: false, PreRelease: featuregate.Alpha},

+	genericfeatures.ConcurrentWatchObjectDecode: {Default: false, PreRelease: featuregate.Beta},
+
 	genericfeatures.ConsistentListFromCache: {Default: true, PreRelease: featuregate.Beta},

 	genericfeatures.CoordinatedLeaderElection: {Default: false, PreRelease: featuregate.Alpha},
--- a/staging/src/k8s.io/apiserver/pkg/features/kube_features.go
+++ b/staging/src/k8s.io/apiserver/pkg/features/kube_features.go
@@ -101,6 +101,11 @@ const (
 	// Allows authorization to use field and label selectors.
 	AuthorizeWithSelectors featuregate.Feature = "AuthorizeWithSelectors"

+	// owner: @serathius
+	// beta: v1.31
+	// Enables concurrent watch object decoding to avoid starving watch cache when conversion webhook is installed.
+	ConcurrentWatchObjectDecode featuregate.Feature = "ConcurrentWatchObjectDecode"
+
 	// owner: @cici37 @jpbetz
 	// kep: http://kep.k8s.io/3488
 	// alpha: v1.26
@@ -365,6 +370,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

 	AuthorizeWithSelectors: {Default: false, PreRelease: featuregate.Alpha},

+	ConcurrentWatchObjectDecode: {Default: false, PreRelease: featuregate.Beta},
+
 	ValidatingAdmissionPolicy: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.32

 	CoordinatedLeaderElection: {Default: false, PreRelease: featuregate.Alpha},
--- a/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go
+++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go
@@ -46,8 +46,9 @@ import (

 const (
 	// We have set a buffer in order to reduce times of context switches.
-	incomingBufSize = 100
-	outgoingBufSize = 100
+	incomingBufSize         = 100
+	outgoingBufSize         = 100
+	processEventConcurrency = 10
 )

 // defaultWatcherMaxLimit is used to facilitate construction tests
@@ -230,8 +231,7 @@ func (wc *watchChan) run(initialEventsEndBookmarkRequired, forceInitialEvents bo
 	go wc.startWatching(watchClosedCh, initialEventsEndBookmarkRequired, forceInitialEvents)

 	var resultChanWG sync.WaitGroup
-	resultChanWG.Add(1)
-	go wc.processEvent(&resultChanWG)
+	wc.processEvents(&resultChanWG)

 	select {
 	case err := <-wc.errChan:
@@ -424,10 +424,17 @@ func (wc *watchChan) startWatching(watchClosedCh chan struct{}, initialEventsEnd
 	close(watchClosedCh)
 }

-// processEvent processes events from etcd watcher and sends results to resultChan.
-func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
+// processEvents processes events from etcd watcher and sends results to resultChan.
+func (wc *watchChan) processEvents(wg *sync.WaitGroup) {
+	if utilfeature.DefaultFeatureGate.Enabled(features.ConcurrentWatchObjectDecode) {
+		wc.concurrentProcessEvents(wg)
+	} else {
+		wg.Add(1)
+		go wc.serialProcessEvents(wg)
+	}
+}
+func (wc *watchChan) serialProcessEvents(wg *sync.WaitGroup) {
 	defer wg.Done()
-
 	for {
 		select {
 		case e := <-wc.incomingEventChan:
@@ -435,7 +442,7 @@ func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
 			if res == nil {
 				continue
 			}
-			if len(wc.resultChan) == outgoingBufSize {
+			if len(wc.resultChan) == cap(wc.resultChan) {
 				klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", wc.watcher.objectType, "groupResource", wc.watcher.groupResource)
 			}
 			// If user couldn't receive results fast enough, we also block incoming events from watcher.
@@ -452,6 +459,95 @@ func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
 	}
 }

+func (wc *watchChan) concurrentProcessEvents(wg *sync.WaitGroup) {
+	p := concurrentOrderedEventProcessing{
+		input:           wc.incomingEventChan,
+		processFunc:     wc.transform,
+		output:          wc.resultChan,
+		processingQueue: make(chan chan *watch.Event, processEventConcurrency-1),
+
+		objectType:    wc.watcher.objectType,
+		groupResource: wc.watcher.groupResource,
+	}
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		p.scheduleEventProcessing(wc.ctx, wg)
+	}()
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		p.collectEventProcessing(wc.ctx)
+	}()
+}
+
+type concurrentOrderedEventProcessing struct {
+	input       chan *event
+	processFunc func(*event) *watch.Event
+	output      chan watch.Event
+
+	processingQueue chan chan *watch.Event
+	// Metadata for logging
+	objectType    string
+	groupResource schema.GroupResource
+}
+
+func (p *concurrentOrderedEventProcessing) scheduleEventProcessing(ctx context.Context, wg *sync.WaitGroup) {
+	var e *event
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case e = <-p.input:
+		}
+		processingResponse := make(chan *watch.Event, 1)
+		select {
+		case <-ctx.Done():
+			return
+		case p.processingQueue <- processingResponse:
+		}
+		wg.Add(1)
+		go func(e *event, response chan<- *watch.Event) {
+			defer wg.Done()
+			select {
+			case <-ctx.Done():
+			case response <- p.processFunc(e):
+			}
+		}(e, processingResponse)
+	}
+}
+
+func (p *concurrentOrderedEventProcessing) collectEventProcessing(ctx context.Context) {
+	var processingResponse chan *watch.Event
+	var e *watch.Event
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case processingResponse = <-p.processingQueue:
+		}
+		select {
+		case <-ctx.Done():
+			return
+		case e = <-processingResponse:
+		}
+		if e == nil {
+			continue
+		}
+		if len(p.output) == cap(p.output) {
+			klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", p.objectType, "groupResource", p.groupResource)
+		}
+		// If user couldn't receive results fast enough, we also block incoming events from watcher.
+		// Because storing events in local will cause more memory usage.
+		// The worst case would be closing the fast watcher.
+		select {
+		case <-ctx.Done():
+			return
+		case p.output <- *e:
+		}
+	}
+}
+
 func (wc *watchChan) filter(obj runtime.Object) bool {
 	if wc.internalPred.Empty() {
 		return true
--- a/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher_test.go
+++ b/staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher_test.go
@@ -133,6 +133,12 @@ func TestEtcdWatchSemantics(t *testing.T) {
 	storagetesting.RunWatchSemantics(ctx, t, store)
 }

+func TestEtcdWatchSemanticsWithConcurrentDecode(t *testing.T) {
+	featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ConcurrentWatchObjectDecode, true)
+	ctx, store, _ := testSetup(t)
+	storagetesting.RunWatchSemantics(ctx, t, store)
+}
+
 func TestEtcdWatchSemanticInitialEventsExtended(t *testing.T) {
 	ctx, store, _ := testSetup(t)
 	storagetesting.RunWatchSemanticInitialEventsExtended(ctx, t, store)