diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go index f448f819cf2..ff0dd357149 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go +++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/mutating_work_estimator.go @@ -50,7 +50,7 @@ type mutatingWorkEstimator struct { } func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate { - if (!e.enabled) { + if !e.enabled { return WorkEstimate{ InitialSeats: 1, } @@ -75,16 +75,13 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate { // - cost of processing an event object for each watcher (e.g. filtering, // sending data over network) // We're starting simple to get some operational experience with it and - // we will work on tuning the algorithm later. As a starting point we - // we simply assume that processing 1 event takes 1/Nth of a seat for - // M milliseconds and processing different events is infinitely parallelizable. - // We simply record the appropriate values here and rely on potential - // reshaping of the request if the concurrency limit for a given priority - // level will not allow to run request with that many seats. - // - // TODO: As described in the KEP, we should take into account that not all - // events are equal and try to estimate the cost of a single event based on - // some historical data about size of events. + // we will work on tuning the algorithm later. Given that the actual work + // associated with processing watch events is happening in multiple + // goroutines (proportional to the number of watchers) that are all + // resumed at once, as a starting point we assume that each such goroutine + // is taking 1/Nth of a seat for M milliseconds. + // We allow the accounting of that work in P&F to be reshaped into another + // rectangle of equal area for practical reasons. var finalSeats uint var additionalLatency time.Duration @@ -94,8 +91,44 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate { // However, until we tune the estimation we want to stay on the safe side // an avoid introducing additional latency for almost every single request. if watchCount >= watchesPerSeat { + // TODO: As described in the KEP, we should take into account that not all + // events are equal and try to estimate the cost of a single event based on + // some historical data about size of events. finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat)) - additionalLatency = eventAdditionalDuration + finalWork := SeatsTimesDuration(float64(finalSeats), eventAdditionalDuration) + + // While processing individual events is highly parallel, + // the design/implementation of P&F has a couple limitations that + // make using this assumption in the P&F implementation very + // inefficient because: + // - we reserve max(initialSeats, finalSeats) for time of executing + // both phases of the request + // - even more importantly, when a given `wide` request is the one to + // be dispatched, we are not dispatching any other request until + // we accumulate enough seats to dispatch the nominated one, even + // if currently unoccupied seats would allow for dispatching some + // other requests in the meantime + // As a consequence of these, the wider the request, the more capacity + // will effectively be blocked and unused during dispatching and + // executing this request. + // + // To mitigate the impact of it, we're capping the maximum number of + // seats that can be assigned to a given request. Thanks to it: + // 1) we reduce the amount of seat-seconds that are "wasted" during + // dispatching and executing initial phase of the request + // 2) we are not changing the finalWork estimate - just potentially + // reshaping it to be narrower and longer. As long as the maximum + // seats setting will prevent dispatching too many requests at once + // to prevent overloading kube-apiserver (and/or etcd or the VM or + // a physical machine it is running on), we believe the relaxed + // version should be good enough to achieve the P&F goals. + // + // TODO: Confirm that the current cap of maximumSeats allow us to + // achieve the above. + if finalSeats > maximumSeats { + finalSeats = maximumSeats + } + additionalLatency = finalWork.DurationPerSeat(float64(finalSeats)) } return WorkEstimate{ diff --git a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go index ff1200f103a..caf2e53b739 100644 --- a/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go +++ b/staging/src/k8s.io/apiserver/pkg/util/flowcontrol/request/width_test.go @@ -291,7 +291,7 @@ func TestWorkEstimator(t *testing.T) { additionalLatencyExpected: 0, }, { - name: "request verb is create, watches registered, maximum is exceeded", + name: "request verb is create, watches registered, maximum is capped", requestURI: "http://server/apis/foo.bar/v1/foos", requestInfo: &apirequest.RequestInfo{ Verb: "create", @@ -300,8 +300,8 @@ func TestWorkEstimator(t *testing.T) { }, watchCount: 199, initialSeatsExpected: 1, - finalSeatsExpected: 20, - additionalLatencyExpected: 5 * time.Millisecond, + finalSeatsExpected: 10, + additionalLatencyExpected: 10 * time.Millisecond, }, { name: "request verb is update, no watches",