P&F: update mutating request estimation

This commit is contained in:
Wojciech Tyczyński 2021-10-27 10:25:38 +02:00
parent e262db7a4d
commit 4700cf622b
2 changed files with 48 additions and 15 deletions

View File

@ -50,7 +50,7 @@ type mutatingWorkEstimator struct {
}
func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
if (!e.enabled) {
if !e.enabled {
return WorkEstimate{
InitialSeats: 1,
}
@ -75,16 +75,13 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
// - cost of processing an event object for each watcher (e.g. filtering,
// sending data over network)
// We're starting simple to get some operational experience with it and
// we will work on tuning the algorithm later. As a starting point we
// we simply assume that processing 1 event takes 1/Nth of a seat for
// M milliseconds and processing different events is infinitely parallelizable.
// We simply record the appropriate values here and rely on potential
// reshaping of the request if the concurrency limit for a given priority
// level will not allow to run request with that many seats.
//
// TODO: As described in the KEP, we should take into account that not all
// events are equal and try to estimate the cost of a single event based on
// some historical data about size of events.
// we will work on tuning the algorithm later. Given that the actual work
// associated with processing watch events is happening in multiple
// goroutines (proportional to the number of watchers) that are all
// resumed at once, as a starting point we assume that each such goroutine
// is taking 1/Nth of a seat for M milliseconds.
// We allow the accounting of that work in P&F to be reshaped into another
// rectangle of equal area for practical reasons.
var finalSeats uint
var additionalLatency time.Duration
@ -94,8 +91,44 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
// However, until we tune the estimation we want to stay on the safe side
// an avoid introducing additional latency for almost every single request.
if watchCount >= watchesPerSeat {
// TODO: As described in the KEP, we should take into account that not all
// events are equal and try to estimate the cost of a single event based on
// some historical data about size of events.
finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat))
additionalLatency = eventAdditionalDuration
finalWork := SeatsTimesDuration(float64(finalSeats), eventAdditionalDuration)
// While processing individual events is highly parallel,
// the design/implementation of P&F has a couple limitations that
// make using this assumption in the P&F implementation very
// inefficient because:
// - we reserve max(initialSeats, finalSeats) for time of executing
// both phases of the request
// - even more importantly, when a given `wide` request is the one to
// be dispatched, we are not dispatching any other request until
// we accumulate enough seats to dispatch the nominated one, even
// if currently unoccupied seats would allow for dispatching some
// other requests in the meantime
// As a consequence of these, the wider the request, the more capacity
// will effectively be blocked and unused during dispatching and
// executing this request.
//
// To mitigate the impact of it, we're capping the maximum number of
// seats that can be assigned to a given request. Thanks to it:
// 1) we reduce the amount of seat-seconds that are "wasted" during
// dispatching and executing initial phase of the request
// 2) we are not changing the finalWork estimate - just potentially
// reshaping it to be narrower and longer. As long as the maximum
// seats setting will prevent dispatching too many requests at once
// to prevent overloading kube-apiserver (and/or etcd or the VM or
// a physical machine it is running on), we believe the relaxed
// version should be good enough to achieve the P&F goals.
//
// TODO: Confirm that the current cap of maximumSeats allow us to
// achieve the above.
if finalSeats > maximumSeats {
finalSeats = maximumSeats
}
additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
}
return WorkEstimate{

View File

@ -291,7 +291,7 @@ func TestWorkEstimator(t *testing.T) {
additionalLatencyExpected: 0,
},
{
name: "request verb is create, watches registered, maximum is exceeded",
name: "request verb is create, watches registered, maximum is capped",
requestURI: "http://server/apis/foo.bar/v1/foos",
requestInfo: &apirequest.RequestInfo{
Verb: "create",
@ -300,8 +300,8 @@ func TestWorkEstimator(t *testing.T) {
},
watchCount: 199,
initialSeatsExpected: 1,
finalSeatsExpected: 20,
additionalLatencyExpected: 5 * time.Millisecond,
finalSeatsExpected: 10,
additionalLatencyExpected: 10 * time.Millisecond,
},
{
name: "request verb is update, no watches",