mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-15 23:03:40 +00:00
P&F: update mutating request estimation
This commit is contained in:
parent
e262db7a4d
commit
4700cf622b
@ -50,7 +50,7 @@ type mutatingWorkEstimator struct {
|
||||
}
|
||||
|
||||
func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
|
||||
if (!e.enabled) {
|
||||
if !e.enabled {
|
||||
return WorkEstimate{
|
||||
InitialSeats: 1,
|
||||
}
|
||||
@ -75,16 +75,13 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
|
||||
// - cost of processing an event object for each watcher (e.g. filtering,
|
||||
// sending data over network)
|
||||
// We're starting simple to get some operational experience with it and
|
||||
// we will work on tuning the algorithm later. As a starting point we
|
||||
// we simply assume that processing 1 event takes 1/Nth of a seat for
|
||||
// M milliseconds and processing different events is infinitely parallelizable.
|
||||
// We simply record the appropriate values here and rely on potential
|
||||
// reshaping of the request if the concurrency limit for a given priority
|
||||
// level will not allow to run request with that many seats.
|
||||
//
|
||||
// TODO: As described in the KEP, we should take into account that not all
|
||||
// events are equal and try to estimate the cost of a single event based on
|
||||
// some historical data about size of events.
|
||||
// we will work on tuning the algorithm later. Given that the actual work
|
||||
// associated with processing watch events is happening in multiple
|
||||
// goroutines (proportional to the number of watchers) that are all
|
||||
// resumed at once, as a starting point we assume that each such goroutine
|
||||
// is taking 1/Nth of a seat for M milliseconds.
|
||||
// We allow the accounting of that work in P&F to be reshaped into another
|
||||
// rectangle of equal area for practical reasons.
|
||||
var finalSeats uint
|
||||
var additionalLatency time.Duration
|
||||
|
||||
@ -94,8 +91,44 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
|
||||
// However, until we tune the estimation we want to stay on the safe side
|
||||
// an avoid introducing additional latency for almost every single request.
|
||||
if watchCount >= watchesPerSeat {
|
||||
// TODO: As described in the KEP, we should take into account that not all
|
||||
// events are equal and try to estimate the cost of a single event based on
|
||||
// some historical data about size of events.
|
||||
finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat))
|
||||
additionalLatency = eventAdditionalDuration
|
||||
finalWork := SeatsTimesDuration(float64(finalSeats), eventAdditionalDuration)
|
||||
|
||||
// While processing individual events is highly parallel,
|
||||
// the design/implementation of P&F has a couple limitations that
|
||||
// make using this assumption in the P&F implementation very
|
||||
// inefficient because:
|
||||
// - we reserve max(initialSeats, finalSeats) for time of executing
|
||||
// both phases of the request
|
||||
// - even more importantly, when a given `wide` request is the one to
|
||||
// be dispatched, we are not dispatching any other request until
|
||||
// we accumulate enough seats to dispatch the nominated one, even
|
||||
// if currently unoccupied seats would allow for dispatching some
|
||||
// other requests in the meantime
|
||||
// As a consequence of these, the wider the request, the more capacity
|
||||
// will effectively be blocked and unused during dispatching and
|
||||
// executing this request.
|
||||
//
|
||||
// To mitigate the impact of it, we're capping the maximum number of
|
||||
// seats that can be assigned to a given request. Thanks to it:
|
||||
// 1) we reduce the amount of seat-seconds that are "wasted" during
|
||||
// dispatching and executing initial phase of the request
|
||||
// 2) we are not changing the finalWork estimate - just potentially
|
||||
// reshaping it to be narrower and longer. As long as the maximum
|
||||
// seats setting will prevent dispatching too many requests at once
|
||||
// to prevent overloading kube-apiserver (and/or etcd or the VM or
|
||||
// a physical machine it is running on), we believe the relaxed
|
||||
// version should be good enough to achieve the P&F goals.
|
||||
//
|
||||
// TODO: Confirm that the current cap of maximumSeats allow us to
|
||||
// achieve the above.
|
||||
if finalSeats > maximumSeats {
|
||||
finalSeats = maximumSeats
|
||||
}
|
||||
additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
|
||||
}
|
||||
|
||||
return WorkEstimate{
|
||||
|
@ -291,7 +291,7 @@ func TestWorkEstimator(t *testing.T) {
|
||||
additionalLatencyExpected: 0,
|
||||
},
|
||||
{
|
||||
name: "request verb is create, watches registered, maximum is exceeded",
|
||||
name: "request verb is create, watches registered, maximum is capped",
|
||||
requestURI: "http://server/apis/foo.bar/v1/foos",
|
||||
requestInfo: &apirequest.RequestInfo{
|
||||
Verb: "create",
|
||||
@ -300,8 +300,8 @@ func TestWorkEstimator(t *testing.T) {
|
||||
},
|
||||
watchCount: 199,
|
||||
initialSeatsExpected: 1,
|
||||
finalSeatsExpected: 20,
|
||||
additionalLatencyExpected: 5 * time.Millisecond,
|
||||
finalSeatsExpected: 10,
|
||||
additionalLatencyExpected: 10 * time.Millisecond,
|
||||
},
|
||||
{
|
||||
name: "request verb is update, no watches",
|
||||
|
Loading…
Reference in New Issue
Block a user