P&F: update mutating request estimation

2025-08-15 23:03:40 +00:00 · 2021-10-27 10:25:38 +02:00 · 2021-10-27 10:25:38 +02:00 · 4700cf622b
commit 4700cf622b
parent e262db7a4d
2 changed files with 48 additions and 15 deletions
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go
@ -50,7 +50,7 @@ type mutatingWorkEstimator struct {
 }

 func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
-	if (!e.enabled) {
+	if !e.enabled {
 		return WorkEstimate{
 			InitialSeats: 1,
 		}
@ -75,16 +75,13 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
 	// - cost of processing an event object for each watcher (e.g. filtering,
 	//     sending data over network)
 	// We're starting simple to get some operational experience with it and
-	// we will work on tuning the algorithm later. As a starting point we
-	// we simply assume that processing 1 event takes 1/Nth of a seat for
-	// M milliseconds and processing different events is infinitely parallelizable.
-	// We simply record the appropriate values here and rely on potential
-	// reshaping of the request if the concurrency limit for a given priority
-	// level will not allow to run request with that many seats.
-	//
-	// TODO: As described in the KEP, we should take into account that not all
-	//   events are equal and try to estimate the cost of a single event based on
-	//   some historical data about size of events.
+	// we will work on tuning the algorithm later. Given that the actual work
+	// associated with processing watch events is happening in multiple
+	// goroutines (proportional to the number of watchers) that are all
+	// resumed at once, as a starting point we assume that each such goroutine
+	// is taking 1/Nth of a seat for M milliseconds.
+	// We allow the accounting of that work in P&F to be reshaped into another
+	// rectangle of equal area for practical reasons.
 	var finalSeats uint
 	var additionalLatency time.Duration

@ -94,8 +91,44 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
 	//   However, until we tune the estimation we want to stay on the safe side
 	//   an avoid introducing additional latency for almost every single request.
 	if watchCount >= watchesPerSeat {
+		// TODO: As described in the KEP, we should take into account that not all
+		//   events are equal and try to estimate the cost of a single event based on
+		//   some historical data about size of events.
 		finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat))
-		additionalLatency = eventAdditionalDuration
+		finalWork := SeatsTimesDuration(float64(finalSeats), eventAdditionalDuration)
+
+		// While processing individual events is highly parallel,
+		// the design/implementation of P&F has a couple limitations that
+		// make using this assumption in the P&F implementation very
+		// inefficient because:
+		// - we reserve max(initialSeats, finalSeats) for time of executing
+		//   both phases of the request
+		// - even more importantly, when a given `wide` request is the one to
+		//   be dispatched, we are not dispatching any other request until
+		//   we accumulate enough seats to dispatch the nominated one, even
+		//   if currently unoccupied seats would allow for dispatching some
+		//   other requests in the meantime
+		// As a consequence of these, the wider the request, the more capacity
+		// will effectively be blocked and unused during dispatching and
+		// executing this request.
+		//
+		// To mitigate the impact of it, we're capping the maximum number of
+		// seats that can be assigned to a given request. Thanks to it:
+		// 1) we reduce the amount of seat-seconds that are "wasted" during
+		//    dispatching and executing initial phase of the request
+		// 2) we are not changing the finalWork estimate - just potentially
+		//    reshaping it to be narrower and longer. As long as the maximum
+		//    seats setting will prevent dispatching too many requests at once
+		//    to prevent overloading kube-apiserver (and/or etcd or the VM or
+		//    a physical machine it is running on), we believe the relaxed
+		//    version should be good enough to achieve the P&F goals.
+		//
+		// TODO: Confirm that the current cap of maximumSeats allow us to
+		//   achieve the above.
+		if finalSeats > maximumSeats {
+			finalSeats = maximumSeats
+		}
+		additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
 	}

 	return WorkEstimate{
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go
@ -291,7 +291,7 @@ func TestWorkEstimator(t *testing.T) {
 			additionalLatencyExpected: 0,
 		},
 		{
-			name:       "request verb is create, watches registered, maximum is exceeded",
+			name:       "request verb is create, watches registered, maximum is capped",
 			requestURI: "http://server/apis/foo.bar/v1/foos",
 			requestInfo: &apirequest.RequestInfo{
 				Verb:     "create",
@ -300,8 +300,8 @@ func TestWorkEstimator(t *testing.T) {
 			},
 			watchCount:                199,
 			initialSeatsExpected:      1,
-			finalSeatsExpected:        20,
-			additionalLatencyExpected: 5 * time.Millisecond,
+			finalSeatsExpected:        10,
+			additionalLatencyExpected: 10 * time.Millisecond,
 		},
 		{
 			name:       "request verb is update, no watches",