P&F: update mutating request estimation

This commit is contained in:
Wojciech Tyczyński 2021-10-27 10:25:38 +02:00
parent e262db7a4d
commit 4700cf622b
2 changed files with 48 additions and 15 deletions

View File

@ -50,7 +50,7 @@ type mutatingWorkEstimator struct {
} }
func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate { func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
if (!e.enabled) { if !e.enabled {
return WorkEstimate{ return WorkEstimate{
InitialSeats: 1, InitialSeats: 1,
} }
@ -75,16 +75,13 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
// - cost of processing an event object for each watcher (e.g. filtering, // - cost of processing an event object for each watcher (e.g. filtering,
// sending data over network) // sending data over network)
// We're starting simple to get some operational experience with it and // We're starting simple to get some operational experience with it and
// we will work on tuning the algorithm later. As a starting point we // we will work on tuning the algorithm later. Given that the actual work
// we simply assume that processing 1 event takes 1/Nth of a seat for // associated with processing watch events is happening in multiple
// M milliseconds and processing different events is infinitely parallelizable. // goroutines (proportional to the number of watchers) that are all
// We simply record the appropriate values here and rely on potential // resumed at once, as a starting point we assume that each such goroutine
// reshaping of the request if the concurrency limit for a given priority // is taking 1/Nth of a seat for M milliseconds.
// level will not allow to run request with that many seats. // We allow the accounting of that work in P&F to be reshaped into another
// // rectangle of equal area for practical reasons.
// TODO: As described in the KEP, we should take into account that not all
// events are equal and try to estimate the cost of a single event based on
// some historical data about size of events.
var finalSeats uint var finalSeats uint
var additionalLatency time.Duration var additionalLatency time.Duration
@ -94,8 +91,44 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
// However, until we tune the estimation we want to stay on the safe side // However, until we tune the estimation we want to stay on the safe side
// an avoid introducing additional latency for almost every single request. // an avoid introducing additional latency for almost every single request.
if watchCount >= watchesPerSeat { if watchCount >= watchesPerSeat {
// TODO: As described in the KEP, we should take into account that not all
// events are equal and try to estimate the cost of a single event based on
// some historical data about size of events.
finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat)) finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat))
additionalLatency = eventAdditionalDuration finalWork := SeatsTimesDuration(float64(finalSeats), eventAdditionalDuration)
// While processing individual events is highly parallel,
// the design/implementation of P&F has a couple limitations that
// make using this assumption in the P&F implementation very
// inefficient because:
// - we reserve max(initialSeats, finalSeats) for time of executing
// both phases of the request
// - even more importantly, when a given `wide` request is the one to
// be dispatched, we are not dispatching any other request until
// we accumulate enough seats to dispatch the nominated one, even
// if currently unoccupied seats would allow for dispatching some
// other requests in the meantime
// As a consequence of these, the wider the request, the more capacity
// will effectively be blocked and unused during dispatching and
// executing this request.
//
// To mitigate the impact of it, we're capping the maximum number of
// seats that can be assigned to a given request. Thanks to it:
// 1) we reduce the amount of seat-seconds that are "wasted" during
// dispatching and executing initial phase of the request
// 2) we are not changing the finalWork estimate - just potentially
// reshaping it to be narrower and longer. As long as the maximum
// seats setting will prevent dispatching too many requests at once
// to prevent overloading kube-apiserver (and/or etcd or the VM or
// a physical machine it is running on), we believe the relaxed
// version should be good enough to achieve the P&F goals.
//
// TODO: Confirm that the current cap of maximumSeats allow us to
// achieve the above.
if finalSeats > maximumSeats {
finalSeats = maximumSeats
}
additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
} }
return WorkEstimate{ return WorkEstimate{

View File

@ -291,7 +291,7 @@ func TestWorkEstimator(t *testing.T) {
additionalLatencyExpected: 0, additionalLatencyExpected: 0,
}, },
{ {
name: "request verb is create, watches registered, maximum is exceeded", name: "request verb is create, watches registered, maximum is capped",
requestURI: "http://server/apis/foo.bar/v1/foos", requestURI: "http://server/apis/foo.bar/v1/foos",
requestInfo: &apirequest.RequestInfo{ requestInfo: &apirequest.RequestInfo{
Verb: "create", Verb: "create",
@ -300,8 +300,8 @@ func TestWorkEstimator(t *testing.T) {
}, },
watchCount: 199, watchCount: 199,
initialSeatsExpected: 1, initialSeatsExpected: 1,
finalSeatsExpected: 20, finalSeatsExpected: 10,
additionalLatencyExpected: 5 * time.Millisecond, additionalLatencyExpected: 10 * time.Millisecond,
}, },
{ {
name: "request verb is update, no watches", name: "request verb is update, no watches",