P&F: update mutating request estimation

2025-08-17 15:50:10 +00:00 · 2021-10-27 10:25:38 +02:00 · 2021-10-27 10:25:38 +02:00 · 4700cf622b
commit 4700cf622b
parent e262db7a4d
2 changed files with 48 additions and 15 deletions
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go
@ -50,7 +50,7 @@ type mutatingWorkEstimator struct {
 }
 func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
-	if (!e.enabled) {
+	if !e.enabled {
 		return WorkEstimate{
 			InitialSeats: 1,
 		}
@ -75,16 +75,13 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
 	// - cost of processing an event object for each watcher (e.g. filtering,
 	//     sending data over network)
 	// We're starting simple to get some operational experience with it and
-	// we will work on tuning the algorithm later. As a starting point we
+	// we will work on tuning the algorithm later. Given that the actual work
-	// we simply assume that processing 1 event takes 1/Nth of a seat for
+	// associated with processing watch events is happening in multiple
-	// M milliseconds and processing different events is infinitely parallelizable.
+	// goroutines (proportional to the number of watchers) that are all
-	// We simply record the appropriate values here and rely on potential
+	// resumed at once, as a starting point we assume that each such goroutine
-	// reshaping of the request if the concurrency limit for a given priority
+	// is taking 1/Nth of a seat for M milliseconds.
-	// level will not allow to run request with that many seats.
+	// We allow the accounting of that work in P&F to be reshaped into another
-	//
+	// rectangle of equal area for practical reasons.
 	// TODO: As described in the KEP, we should take into account that not all
 	//   events are equal and try to estimate the cost of a single event based on
 	//   some historical data about size of events.
 	var finalSeats uint
 	var additionalLatency time.Duration
@ -94,8 +91,44 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
 	//   However, until we tune the estimation we want to stay on the safe side
 	//   an avoid introducing additional latency for almost every single request.
 	if watchCount >= watchesPerSeat {
 		// TODO: As described in the KEP, we should take into account that not all
 		//   events are equal and try to estimate the cost of a single event based on
 		//   some historical data about size of events.
 		finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat))
-		additionalLatency = eventAdditionalDuration
+		finalWork := SeatsTimesDuration(float64(finalSeats), eventAdditionalDuration)
 		// While processing individual events is highly parallel,
 		// the design/implementation of P&F has a couple limitations that
 		// make using this assumption in the P&F implementation very
 		// inefficient because:
 		// - we reserve max(initialSeats, finalSeats) for time of executing
 		//   both phases of the request
 		// - even more importantly, when a given `wide` request is the one to
 		//   be dispatched, we are not dispatching any other request until
 		//   we accumulate enough seats to dispatch the nominated one, even
 		//   if currently unoccupied seats would allow for dispatching some
 		//   other requests in the meantime
 		// As a consequence of these, the wider the request, the more capacity
 		// will effectively be blocked and unused during dispatching and
 		// executing this request.
 		//
 		// To mitigate the impact of it, we're capping the maximum number of
 		// seats that can be assigned to a given request. Thanks to it:
 		// 1) we reduce the amount of seat-seconds that are "wasted" during
 		//    dispatching and executing initial phase of the request
 		// 2) we are not changing the finalWork estimate - just potentially
 		//    reshaping it to be narrower and longer. As long as the maximum
 		//    seats setting will prevent dispatching too many requests at once
 		//    to prevent overloading kube-apiserver (and/or etcd or the VM or
 		//    a physical machine it is running on), we believe the relaxed
 		//    version should be good enough to achieve the P&F goals.
 		//
 		// TODO: Confirm that the current cap of maximumSeats allow us to
 		//   achieve the above.
 		if finalSeats > maximumSeats {
 			finalSeats = maximumSeats
 		}
 		additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
 	}
 	return WorkEstimate{
--- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go
+++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go
@ -291,7 +291,7 @@ func TestWorkEstimator(t *testing.T) {
 			additionalLatencyExpected: 0,
 		},
 		{
-			name:       "request verb is create, watches registered, maximum is exceeded",
+			name:       "request verb is create, watches registered, maximum is capped",
 			requestURI: "http://server/apis/foo.bar/v1/foos",
 			requestInfo: &apirequest.RequestInfo{
 				Verb:     "create",
@ -300,8 +300,8 @@ func TestWorkEstimator(t *testing.T) {
 			},
 			watchCount:                199,
 			initialSeatsExpected:      1,
-			finalSeatsExpected:        20,
+			finalSeatsExpected:        10,
-			additionalLatencyExpected: 5 * time.Millisecond,
+			additionalLatencyExpected: 10 * time.Millisecond,
 		},
 		{
 			name:       "request verb is update, no watches",