mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-17 15:50:10 +00:00
P&F: update mutating request estimation
This commit is contained in:
parent
e262db7a4d
commit
4700cf622b
@ -50,7 +50,7 @@ type mutatingWorkEstimator struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
|
func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
|
||||||
if (!e.enabled) {
|
if !e.enabled {
|
||||||
return WorkEstimate{
|
return WorkEstimate{
|
||||||
InitialSeats: 1,
|
InitialSeats: 1,
|
||||||
}
|
}
|
||||||
@ -75,16 +75,13 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
|
|||||||
// - cost of processing an event object for each watcher (e.g. filtering,
|
// - cost of processing an event object for each watcher (e.g. filtering,
|
||||||
// sending data over network)
|
// sending data over network)
|
||||||
// We're starting simple to get some operational experience with it and
|
// We're starting simple to get some operational experience with it and
|
||||||
// we will work on tuning the algorithm later. As a starting point we
|
// we will work on tuning the algorithm later. Given that the actual work
|
||||||
// we simply assume that processing 1 event takes 1/Nth of a seat for
|
// associated with processing watch events is happening in multiple
|
||||||
// M milliseconds and processing different events is infinitely parallelizable.
|
// goroutines (proportional to the number of watchers) that are all
|
||||||
// We simply record the appropriate values here and rely on potential
|
// resumed at once, as a starting point we assume that each such goroutine
|
||||||
// reshaping of the request if the concurrency limit for a given priority
|
// is taking 1/Nth of a seat for M milliseconds.
|
||||||
// level will not allow to run request with that many seats.
|
// We allow the accounting of that work in P&F to be reshaped into another
|
||||||
//
|
// rectangle of equal area for practical reasons.
|
||||||
// TODO: As described in the KEP, we should take into account that not all
|
|
||||||
// events are equal and try to estimate the cost of a single event based on
|
|
||||||
// some historical data about size of events.
|
|
||||||
var finalSeats uint
|
var finalSeats uint
|
||||||
var additionalLatency time.Duration
|
var additionalLatency time.Duration
|
||||||
|
|
||||||
@ -94,8 +91,44 @@ func (e *mutatingWorkEstimator) estimate(r *http.Request) WorkEstimate {
|
|||||||
// However, until we tune the estimation we want to stay on the safe side
|
// However, until we tune the estimation we want to stay on the safe side
|
||||||
// an avoid introducing additional latency for almost every single request.
|
// an avoid introducing additional latency for almost every single request.
|
||||||
if watchCount >= watchesPerSeat {
|
if watchCount >= watchesPerSeat {
|
||||||
|
// TODO: As described in the KEP, we should take into account that not all
|
||||||
|
// events are equal and try to estimate the cost of a single event based on
|
||||||
|
// some historical data about size of events.
|
||||||
finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat))
|
finalSeats = uint(math.Ceil(float64(watchCount) / watchesPerSeat))
|
||||||
additionalLatency = eventAdditionalDuration
|
finalWork := SeatsTimesDuration(float64(finalSeats), eventAdditionalDuration)
|
||||||
|
|
||||||
|
// While processing individual events is highly parallel,
|
||||||
|
// the design/implementation of P&F has a couple limitations that
|
||||||
|
// make using this assumption in the P&F implementation very
|
||||||
|
// inefficient because:
|
||||||
|
// - we reserve max(initialSeats, finalSeats) for time of executing
|
||||||
|
// both phases of the request
|
||||||
|
// - even more importantly, when a given `wide` request is the one to
|
||||||
|
// be dispatched, we are not dispatching any other request until
|
||||||
|
// we accumulate enough seats to dispatch the nominated one, even
|
||||||
|
// if currently unoccupied seats would allow for dispatching some
|
||||||
|
// other requests in the meantime
|
||||||
|
// As a consequence of these, the wider the request, the more capacity
|
||||||
|
// will effectively be blocked and unused during dispatching and
|
||||||
|
// executing this request.
|
||||||
|
//
|
||||||
|
// To mitigate the impact of it, we're capping the maximum number of
|
||||||
|
// seats that can be assigned to a given request. Thanks to it:
|
||||||
|
// 1) we reduce the amount of seat-seconds that are "wasted" during
|
||||||
|
// dispatching and executing initial phase of the request
|
||||||
|
// 2) we are not changing the finalWork estimate - just potentially
|
||||||
|
// reshaping it to be narrower and longer. As long as the maximum
|
||||||
|
// seats setting will prevent dispatching too many requests at once
|
||||||
|
// to prevent overloading kube-apiserver (and/or etcd or the VM or
|
||||||
|
// a physical machine it is running on), we believe the relaxed
|
||||||
|
// version should be good enough to achieve the P&F goals.
|
||||||
|
//
|
||||||
|
// TODO: Confirm that the current cap of maximumSeats allow us to
|
||||||
|
// achieve the above.
|
||||||
|
if finalSeats > maximumSeats {
|
||||||
|
finalSeats = maximumSeats
|
||||||
|
}
|
||||||
|
additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
|
||||||
}
|
}
|
||||||
|
|
||||||
return WorkEstimate{
|
return WorkEstimate{
|
||||||
|
@ -291,7 +291,7 @@ func TestWorkEstimator(t *testing.T) {
|
|||||||
additionalLatencyExpected: 0,
|
additionalLatencyExpected: 0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "request verb is create, watches registered, maximum is exceeded",
|
name: "request verb is create, watches registered, maximum is capped",
|
||||||
requestURI: "http://server/apis/foo.bar/v1/foos",
|
requestURI: "http://server/apis/foo.bar/v1/foos",
|
||||||
requestInfo: &apirequest.RequestInfo{
|
requestInfo: &apirequest.RequestInfo{
|
||||||
Verb: "create",
|
Verb: "create",
|
||||||
@ -300,8 +300,8 @@ func TestWorkEstimator(t *testing.T) {
|
|||||||
},
|
},
|
||||||
watchCount: 199,
|
watchCount: 199,
|
||||||
initialSeatsExpected: 1,
|
initialSeatsExpected: 1,
|
||||||
finalSeatsExpected: 20,
|
finalSeatsExpected: 10,
|
||||||
additionalLatencyExpected: 5 * time.Millisecond,
|
additionalLatencyExpected: 10 * time.Millisecond,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "request verb is update, no watches",
|
name: "request verb is update, no watches",
|
||||||
|
Loading…
Reference in New Issue
Block a user