mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #88714 from MikeSpreitzer/apf-finer-metrics2
Extend API Priority and Fairness metrics
This commit is contained in:
commit
d90b37f16e
@ -648,7 +648,7 @@ func (cfgCtl *configController) startRequest(ctx context.Context, rd RequestDige
|
||||
}
|
||||
startWaitingTime = time.Now()
|
||||
klog.V(7).Infof("startRequest(%#+v) => fsName=%q, distMethod=%#+v, plName=%q, numQueues=%d", rd, fs.Name, fs.Spec.DistinguisherMethod, plName, numQueues)
|
||||
req, idle := plState.queues.StartRequest(ctx, hashValue, rd.RequestInfo, rd.User)
|
||||
req, idle := plState.queues.StartRequest(ctx, hashValue, fs.Name, rd.RequestInfo, rd.User)
|
||||
if idle {
|
||||
cfgCtl.maybeReapLocked(plName, plState)
|
||||
}
|
||||
|
@ -21,13 +21,6 @@ import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
// TODO: decide whether to use the existing metrics, which
|
||||
// categorize according to mutating vs readonly, or make new
|
||||
// metrics because this filter does not pay attention to that
|
||||
// distinction
|
||||
|
||||
// "k8s.io/apiserver/pkg/endpoints/metrics"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/clock"
|
||||
"k8s.io/apiserver/pkg/util/flowcontrol/counter"
|
||||
fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
|
||||
@ -109,6 +102,7 @@ func (cfgCtl *configController) Handle(ctx context.Context, requestDigest Reques
|
||||
if queued {
|
||||
metrics.ObserveWaitingDuration(pl.Name, fs.Name, strconv.FormatBool(req != nil), time.Since(startWaitingTime))
|
||||
}
|
||||
metrics.AddDispatch(pl.Name, fs.Name)
|
||||
executed = true
|
||||
startExecutionTime := time.Now()
|
||||
execFn()
|
||||
|
@ -115,11 +115,11 @@ func (cqs *ctlTestQueueSet) IsIdle() bool {
|
||||
return cqs.countActive == 0
|
||||
}
|
||||
|
||||
func (cqs *ctlTestQueueSet) StartRequest(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) (req fq.Request, idle bool) {
|
||||
func (cqs *ctlTestQueueSet) StartRequest(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) (req fq.Request, idle bool) {
|
||||
cqs.cts.lock.Lock()
|
||||
defer cqs.cts.lock.Unlock()
|
||||
cqs.countActive++
|
||||
cqs.cts.t.Logf("Queued %#+v %#+v for %p QS=%s, countActive:=%d", descr1, descr2, cqs, cqs.qc.Name, cqs.countActive)
|
||||
cqs.cts.t.Logf("Queued %q %#+v %#+v for %p QS=%s, countActive:=%d", fsName, descr1, descr2, cqs, cqs.qc.Name, cqs.countActive)
|
||||
return &ctlTestRequest{cqs, cqs.qc.Name, descr1, descr2}, false
|
||||
}
|
||||
|
||||
|
@ -76,7 +76,7 @@ type QueueSet interface {
|
||||
// returned bool indicates whether the QueueSet was idle at the
|
||||
// moment of the return. Otherwise idle==false and the client
|
||||
// must call the Wait method of the Request exactly once.
|
||||
StartRequest(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) (req Request, idle bool)
|
||||
StartRequest(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) (req Request, idle bool)
|
||||
}
|
||||
|
||||
// Request represents the remainder of the handling of one request
|
||||
|
@ -33,6 +33,7 @@ go_test(
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/testing:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/testing/clock:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/flowcontrol/metrics:go_default_library",
|
||||
"//vendor/k8s.io/klog:go_default_library",
|
||||
],
|
||||
)
|
||||
|
@ -218,10 +218,10 @@ const (
|
||||
|
||||
// StartRequest begins the process of handling a request. We take the
|
||||
// approach of updating the metrics about total requests queued and
|
||||
// executing on each path out of this method and Request::Wait. We do
|
||||
// not update those metrics in lower level functions because there can
|
||||
// be multiple lower level changes in one invocation here.
|
||||
func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) (fq.Request, bool) {
|
||||
// executing at each point where there is a change in that quantity,
|
||||
// because the metrics --- and only the metrics --- track that
|
||||
// quantity per FlowSchema.
|
||||
func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) (fq.Request, bool) {
|
||||
qs.lockAndSyncTime()
|
||||
defer qs.lock.Unlock()
|
||||
var req *request
|
||||
@ -231,11 +231,11 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
|
||||
// Apply only concurrency limit, if zero queues desired
|
||||
if qs.qCfg.DesiredNumQueues < 1 {
|
||||
if qs.totRequestsExecuting >= qs.dCfg.ConcurrencyLimit {
|
||||
klog.V(5).Infof("QS(%s): rejecting request %#+v %#+v because %d are executing and the limit is %d", qs.qCfg.Name, descr1, descr2, qs.totRequestsExecuting, qs.dCfg.ConcurrencyLimit)
|
||||
klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v because %d are executing and the limit is %d", qs.qCfg.Name, fsName, descr1, descr2, qs.totRequestsExecuting, qs.dCfg.ConcurrencyLimit)
|
||||
metrics.AddReject(qs.qCfg.Name, fsName, "concurrency-limit")
|
||||
return nil, qs.isIdleLocked()
|
||||
}
|
||||
req = qs.dispatchSansQueueLocked(ctx, descr1, descr2)
|
||||
metrics.UpdateFlowControlRequestsExecuting(qs.qCfg.Name, qs.totRequestsExecuting)
|
||||
req = qs.dispatchSansQueueLocked(ctx, fsName, descr1, descr2)
|
||||
return req, false
|
||||
}
|
||||
|
||||
@ -246,13 +246,12 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
|
||||
// 3) Reject current request if there is not enough concurrency shares and
|
||||
// we are at max queue length
|
||||
// 4) If not rejected, create a request and enqueue
|
||||
req = qs.timeoutOldRequestsAndRejectOrEnqueueLocked(ctx, hashValue, descr1, descr2)
|
||||
defer metrics.UpdateFlowControlRequestsInQueue(qs.qCfg.Name, qs.totRequestsWaiting)
|
||||
req = qs.timeoutOldRequestsAndRejectOrEnqueueLocked(ctx, hashValue, fsName, descr1, descr2)
|
||||
// req == nil means that the request was rejected - no remaining
|
||||
// concurrency shares and at max queue length already
|
||||
if req == nil {
|
||||
klog.V(5).Infof("QS(%s): rejecting request %#+v %#+v due to queue full", qs.qCfg.Name, descr1, descr2)
|
||||
metrics.AddReject(qs.qCfg.Name, "queue-full")
|
||||
klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v due to queue full", qs.qCfg.Name, fsName, descr1, descr2)
|
||||
metrics.AddReject(qs.qCfg.Name, fsName, "queue-full")
|
||||
return nil, qs.isIdleLocked()
|
||||
}
|
||||
|
||||
@ -266,7 +265,6 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
|
||||
// fair queuing technique to pick a queue and dispatch a
|
||||
// request from that queue.
|
||||
qs.dispatchAsMuchAsPossibleLocked()
|
||||
defer metrics.UpdateFlowControlRequestsExecuting(qs.qCfg.Name, qs.totRequestsExecuting)
|
||||
|
||||
// ========================================================================
|
||||
// Step 3:
|
||||
@ -288,7 +286,7 @@ func (qs *queueSet) StartRequest(ctx context.Context, hashValue uint64, descr1,
|
||||
// known that the count does not need to be accurate.
|
||||
// BTW, the count only needs to be accurate in a test that
|
||||
// uses FakeEventClock::Run().
|
||||
klog.V(6).Infof("QS(%s): Context of request %#+v %#+v is Done", qs.qCfg.Name, descr1, descr2)
|
||||
klog.V(6).Infof("QS(%s): Context of request %q %#+v %#+v is Done", qs.qCfg.Name, fsName, descr1, descr2)
|
||||
qs.cancelWait(req)
|
||||
qs.goroutineDoneOrBlocked()
|
||||
}()
|
||||
@ -329,7 +327,7 @@ func (req *request) wait() (bool, bool) {
|
||||
switch decision {
|
||||
case decisionReject:
|
||||
klog.V(5).Infof("QS(%s): request %#+v %#+v timed out after being enqueued\n", qs.qCfg.Name, req.descr1, req.descr2)
|
||||
metrics.AddReject(qs.qCfg.Name, "time-out")
|
||||
metrics.AddReject(qs.qCfg.Name, req.fsName, "time-out")
|
||||
return false, qs.isIdleLocked()
|
||||
case decisionCancel:
|
||||
// TODO(aaron-prindle) add metrics for this case
|
||||
@ -400,12 +398,12 @@ func (qs *queueSet) getVirtualTimeRatioLocked() float64 {
|
||||
// returns the enqueud request on a successful enqueue
|
||||
// returns nil in the case that there is no available concurrency or
|
||||
// the queuelengthlimit has been reached
|
||||
func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) *request {
|
||||
func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) *request {
|
||||
// Start with the shuffle sharding, to pick a queue.
|
||||
queueIdx := qs.chooseQueueIndexLocked(hashValue, descr1, descr2)
|
||||
queue := qs.queues[queueIdx]
|
||||
// The next step is the logic to reject requests that have been waiting too long
|
||||
qs.removeTimedOutRequestsFromQueueLocked(queue)
|
||||
qs.removeTimedOutRequestsFromQueueLocked(queue, fsName)
|
||||
// NOTE: currently timeout is only checked for each new request. This means that there can be
|
||||
// requests that are in the queue longer than the timeout if there are no new requests
|
||||
// We prefer the simplicity over the promptness, at least for now.
|
||||
@ -413,6 +411,7 @@ func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Conte
|
||||
// Create a request and enqueue
|
||||
req := &request{
|
||||
qs: qs,
|
||||
fsName: fsName,
|
||||
ctx: ctx,
|
||||
decision: lockingpromise.NewWriteOnce(&qs.lock, qs.counter),
|
||||
arrivalTime: qs.clock.Now(),
|
||||
@ -423,7 +422,7 @@ func (qs *queueSet) timeoutOldRequestsAndRejectOrEnqueueLocked(ctx context.Conte
|
||||
if ok := qs.rejectOrEnqueueLocked(req); !ok {
|
||||
return nil
|
||||
}
|
||||
metrics.ObserveQueueLength(qs.qCfg.Name, len(queue.requests))
|
||||
metrics.ObserveQueueLength(qs.qCfg.Name, fsName, len(queue.requests))
|
||||
return req
|
||||
}
|
||||
|
||||
@ -446,7 +445,7 @@ func (qs *queueSet) chooseQueueIndexLocked(hashValue uint64, descr1, descr2 inte
|
||||
|
||||
// removeTimedOutRequestsFromQueueLocked rejects old requests that have been enqueued
|
||||
// past the requestWaitLimit
|
||||
func (qs *queueSet) removeTimedOutRequestsFromQueueLocked(queue *queue) {
|
||||
func (qs *queueSet) removeTimedOutRequestsFromQueueLocked(queue *queue, fsName string) {
|
||||
timeoutIdx := -1
|
||||
now := qs.clock.Now()
|
||||
reqs := queue.requests
|
||||
@ -461,6 +460,7 @@ func (qs *queueSet) removeTimedOutRequestsFromQueueLocked(queue *queue) {
|
||||
req.decision.SetLocked(decisionReject)
|
||||
// get index for timed out requests
|
||||
timeoutIdx = i
|
||||
metrics.AddRequestsInQueues(qs.qCfg.Name, req.fsName, -1)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
@ -505,6 +505,7 @@ func (qs *queueSet) enqueueLocked(request *request) {
|
||||
}
|
||||
queue.Enqueue(request)
|
||||
qs.totRequestsWaiting++
|
||||
metrics.AddRequestsInQueues(qs.qCfg.Name, request.fsName, 1)
|
||||
}
|
||||
|
||||
// dispatchAsMuchAsPossibleLocked runs a loop, as long as there
|
||||
@ -522,10 +523,11 @@ func (qs *queueSet) dispatchAsMuchAsPossibleLocked() {
|
||||
}
|
||||
}
|
||||
|
||||
func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, descr1, descr2 interface{}) *request {
|
||||
func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, fsName string, descr1, descr2 interface{}) *request {
|
||||
now := qs.clock.Now()
|
||||
req := &request{
|
||||
qs: qs,
|
||||
fsName: fsName,
|
||||
ctx: ctx,
|
||||
startTime: now,
|
||||
decision: lockingpromise.NewWriteOnce(&qs.lock, qs.counter),
|
||||
@ -535,8 +537,9 @@ func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, descr1, descr2
|
||||
}
|
||||
req.decision.SetLocked(decisionExecute)
|
||||
qs.totRequestsExecuting++
|
||||
metrics.AddRequestsExecuting(qs.qCfg.Name, fsName, 1)
|
||||
if klog.V(5) {
|
||||
klog.Infof("QS(%s) at r=%s v=%.9fs: immediate dispatch of request %#+v %#+v, qs will have %d executing", qs.qCfg.Name, now.Format(nsTimeFmt), qs.virtualTime, descr1, descr2, qs.totRequestsExecuting)
|
||||
klog.Infof("QS(%s) at r=%s v=%.9fs: immediate dispatch of request %q %#+v %#+v, qs will have %d executing", qs.qCfg.Name, now.Format(nsTimeFmt), qs.virtualTime, fsName, descr1, descr2, qs.totRequestsExecuting)
|
||||
}
|
||||
return req
|
||||
}
|
||||
@ -563,6 +566,8 @@ func (qs *queueSet) dispatchLocked() bool {
|
||||
qs.totRequestsWaiting--
|
||||
qs.totRequestsExecuting++
|
||||
queue.requestsExecuting++
|
||||
metrics.AddRequestsInQueues(qs.qCfg.Name, request.fsName, -1)
|
||||
metrics.AddRequestsExecuting(qs.qCfg.Name, request.fsName, 1)
|
||||
if klog.V(6) {
|
||||
klog.Infof("QS(%s) at r=%s v=%.9fs: dispatching request %#+v %#+v from queue %d with virtual start time %.9fs, queue will have %d waiting & %d executing", qs.qCfg.Name, request.startTime.Format(nsTimeFmt), qs.virtualTime, request.descr1, request.descr2, queue.index, queue.virtualStart, len(queue.requests), queue.requestsExecuting)
|
||||
}
|
||||
@ -590,6 +595,7 @@ func (qs *queueSet) cancelWait(req *request) {
|
||||
// remove the request
|
||||
queue.requests = append(queue.requests[:i], queue.requests[i+1:]...)
|
||||
qs.totRequestsWaiting--
|
||||
metrics.AddRequestsInQueues(qs.qCfg.Name, req.fsName, -1)
|
||||
break
|
||||
}
|
||||
}
|
||||
@ -634,8 +640,6 @@ func (qs *queueSet) finishRequestAndDispatchAsMuchAsPossible(req *request) bool
|
||||
|
||||
qs.finishRequestLocked(req)
|
||||
qs.dispatchAsMuchAsPossibleLocked()
|
||||
metrics.UpdateFlowControlRequestsInQueue(qs.qCfg.Name, qs.totRequestsWaiting)
|
||||
metrics.UpdateFlowControlRequestsExecuting(qs.qCfg.Name, qs.totRequestsExecuting)
|
||||
return qs.isIdleLocked()
|
||||
}
|
||||
|
||||
@ -644,6 +648,7 @@ func (qs *queueSet) finishRequestAndDispatchAsMuchAsPossible(req *request) bool
|
||||
// callback updates important state in the queueSet
|
||||
func (qs *queueSet) finishRequestLocked(r *request) {
|
||||
qs.totRequestsExecuting--
|
||||
metrics.AddRequestsExecuting(qs.qCfg.Name, r.fsName, -1)
|
||||
|
||||
if r.queue == nil {
|
||||
if klog.V(6) {
|
||||
|
@ -18,6 +18,7 @@ package queueset
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
@ -27,6 +28,7 @@ import (
|
||||
fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
|
||||
test "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/testing"
|
||||
"k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/testing/clock"
|
||||
"k8s.io/apiserver/pkg/util/flowcontrol/metrics"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
@ -52,24 +54,36 @@ type uniformClient struct {
|
||||
// expectPass indicates whether the QueueSet is expected to be fair.
|
||||
// expectedAllRequests indicates whether all requests are expected to get dispatched.
|
||||
func exerciseQueueSetUniformScenario(t *testing.T, name string, qs fq.QueueSet, sc uniformScenario,
|
||||
evalDuration time.Duration, expectPass bool, expectedAllRequests bool,
|
||||
evalDuration time.Duration,
|
||||
expectPass, expectedAllRequests, expectInqueueMetrics, expectExecutingMetrics bool,
|
||||
rejectReason string,
|
||||
clk *clock.FakeEventClock, counter counter.GoRoutineCounter) {
|
||||
|
||||
now := time.Now()
|
||||
t.Logf("%s: Start %s, clk=%p, grc=%p", clk.Now().Format(nsTimeFmt), name, clk, counter)
|
||||
integrators := make([]test.Integrator, len(sc))
|
||||
var failedCount uint64
|
||||
expectedInqueue := ""
|
||||
expectedExecuting := ""
|
||||
if expectInqueueMetrics || expectExecutingMetrics {
|
||||
metrics.Reset()
|
||||
}
|
||||
executions := make([]int32, len(sc))
|
||||
rejects := make([]int32, len(sc))
|
||||
for i, uc := range sc {
|
||||
integrators[i] = test.NewIntegrator(clk)
|
||||
fsName := fmt.Sprintf("client%d", i)
|
||||
expectedInqueue = expectedInqueue + fmt.Sprintf(` apiserver_flowcontrol_current_inqueue_requests{flowSchema=%q,priorityLevel=%q} 0%s`, fsName, name, "\n")
|
||||
for j := 0; j < uc.nThreads; j++ {
|
||||
counter.Add(1)
|
||||
go func(i, j int, uc uniformClient, igr test.Integrator) {
|
||||
for k := 0; k < uc.nCalls; k++ {
|
||||
ClockWait(clk, counter, uc.thinkDuration)
|
||||
req, idle := qs.StartRequest(context.Background(), uc.hash, name, []int{i, j, k})
|
||||
req, idle := qs.StartRequest(context.Background(), uc.hash, fsName, name, []int{i, j, k})
|
||||
t.Logf("%s: %d, %d, %d got req=%p, idle=%v", clk.Now().Format(nsTimeFmt), i, j, k, req, idle)
|
||||
if req == nil {
|
||||
atomic.AddUint64(&failedCount, 1)
|
||||
atomic.AddInt32(&rejects[i], 1)
|
||||
break
|
||||
}
|
||||
if idle {
|
||||
@ -79,6 +93,7 @@ func exerciseQueueSetUniformScenario(t *testing.T, name string, qs fq.QueueSet,
|
||||
idle2 := req.Finish(func() {
|
||||
executed = true
|
||||
t.Logf("%s: %d, %d, %d executing", clk.Now().Format(nsTimeFmt), i, j, k)
|
||||
atomic.AddInt32(&executions[i], 1)
|
||||
igr.Add(1)
|
||||
ClockWait(clk, counter, uc.execDuration)
|
||||
igr.Add(-1)
|
||||
@ -86,6 +101,7 @@ func exerciseQueueSetUniformScenario(t *testing.T, name string, qs fq.QueueSet,
|
||||
t.Logf("%s: %d, %d, %d got executed=%v, idle2=%v", clk.Now().Format(nsTimeFmt), i, j, k, executed, idle2)
|
||||
if !executed {
|
||||
atomic.AddUint64(&failedCount, 1)
|
||||
atomic.AddInt32(&rejects[i], 1)
|
||||
}
|
||||
}
|
||||
counter.Add(-1)
|
||||
@ -124,6 +140,52 @@ func exerciseQueueSetUniformScenario(t *testing.T, name string, qs fq.QueueSet,
|
||||
} else if !expectedAllRequests && failedCount == 0 {
|
||||
t.Errorf("Expected failed requests but all requests succeeded")
|
||||
}
|
||||
if expectInqueueMetrics {
|
||||
e := `
|
||||
# HELP apiserver_flowcontrol_current_inqueue_requests [ALPHA] Number of requests currently pending in queues of the API Priority and Fairness system
|
||||
# TYPE apiserver_flowcontrol_current_inqueue_requests gauge
|
||||
` + expectedInqueue
|
||||
err := metrics.GatherAndCompare(e, "apiserver_flowcontrol_current_inqueue_requests")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
t.Log("Success with" + e)
|
||||
}
|
||||
}
|
||||
expectedRejects := ""
|
||||
for i := range sc {
|
||||
fsName := fmt.Sprintf("client%d", i)
|
||||
if atomic.AddInt32(&executions[i], 0) > 0 {
|
||||
expectedExecuting = expectedExecuting + fmt.Sprintf(` apiserver_flowcontrol_current_executing_requests{flowSchema=%q,priorityLevel=%q} 0%s`, fsName, name, "\n")
|
||||
}
|
||||
if atomic.AddInt32(&rejects[i], 0) > 0 {
|
||||
expectedRejects = expectedRejects + fmt.Sprintf(` apiserver_flowcontrol_rejected_requests_total{flowSchema=%q,priorityLevel=%q,reason=%q} %d%s`, fsName, name, rejectReason, rejects[i], "\n")
|
||||
}
|
||||
}
|
||||
if expectExecutingMetrics && len(expectedExecuting) > 0 {
|
||||
e := `
|
||||
# HELP apiserver_flowcontrol_current_executing_requests [ALPHA] Number of requests currently executing in the API Priority and Fairness system
|
||||
# TYPE apiserver_flowcontrol_current_executing_requests gauge
|
||||
` + expectedExecuting
|
||||
err := metrics.GatherAndCompare(e, "apiserver_flowcontrol_current_executing_requests")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
t.Log("Success with" + e)
|
||||
}
|
||||
}
|
||||
if expectExecutingMetrics && len(expectedRejects) > 0 {
|
||||
e := `
|
||||
# HELP apiserver_flowcontrol_rejected_requests_total [ALPHA] Number of requests rejected by API Priority and Fairness system
|
||||
# TYPE apiserver_flowcontrol_rejected_requests_total counter
|
||||
` + expectedRejects
|
||||
err := metrics.GatherAndCompare(e, "apiserver_flowcontrol_rejected_requests_total")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
t.Log("Success with" + e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ClockWait(clk *clock.FakeEventClock, counter counter.GoRoutineCounter, duration time.Duration) {
|
||||
@ -144,6 +206,7 @@ func init() {
|
||||
|
||||
// TestNoRestraint should fail because the dummy QueueSet exercises no control
|
||||
func TestNoRestraint(t *testing.T) {
|
||||
metrics.Register()
|
||||
now := time.Now()
|
||||
clk, counter := clock.NewFakeEventClock(now, 0, nil)
|
||||
nrc, err := test.NewNoRestraintFactory().BeginConstruction(fq.QueuingConfig{})
|
||||
@ -154,10 +217,11 @@ func TestNoRestraint(t *testing.T) {
|
||||
exerciseQueueSetUniformScenario(t, "NoRestraint", nr, []uniformClient{
|
||||
{1001001001, 5, 10, time.Second, time.Second},
|
||||
{2002002002, 2, 10, time.Second, time.Second / 2},
|
||||
}, time.Second*10, false, true, clk, counter)
|
||||
}, time.Second*10, false, true, false, false, "", clk, counter)
|
||||
}
|
||||
|
||||
func TestUniformFlows(t *testing.T) {
|
||||
metrics.Register()
|
||||
now := time.Now()
|
||||
|
||||
clk, counter := clock.NewFakeEventClock(now, 0, nil)
|
||||
@ -175,13 +239,14 @@ func TestUniformFlows(t *testing.T) {
|
||||
}
|
||||
qs := qsc.Complete(fq.DispatchingConfig{ConcurrencyLimit: 4})
|
||||
|
||||
exerciseQueueSetUniformScenario(t, "UniformFlows", qs, []uniformClient{
|
||||
exerciseQueueSetUniformScenario(t, qCfg.Name, qs, []uniformClient{
|
||||
{1001001001, 5, 10, time.Second, time.Second},
|
||||
{2002002002, 5, 10, time.Second, time.Second},
|
||||
}, time.Second*20, true, true, clk, counter)
|
||||
}, time.Second*20, true, true, true, true, "", clk, counter)
|
||||
}
|
||||
|
||||
func TestDifferentFlows(t *testing.T) {
|
||||
metrics.Register()
|
||||
now := time.Now()
|
||||
|
||||
clk, counter := clock.NewFakeEventClock(now, 0, nil)
|
||||
@ -199,13 +264,14 @@ func TestDifferentFlows(t *testing.T) {
|
||||
}
|
||||
qs := qsc.Complete(fq.DispatchingConfig{ConcurrencyLimit: 4})
|
||||
|
||||
exerciseQueueSetUniformScenario(t, "DifferentFlows", qs, []uniformClient{
|
||||
exerciseQueueSetUniformScenario(t, qCfg.Name, qs, []uniformClient{
|
||||
{1001001001, 6, 10, time.Second, time.Second},
|
||||
{2002002002, 5, 15, time.Second, time.Second / 2},
|
||||
}, time.Second*20, true, true, clk, counter)
|
||||
}, time.Second*20, true, true, true, true, "", clk, counter)
|
||||
}
|
||||
|
||||
func TestDifferentFlowsWithoutQueuing(t *testing.T) {
|
||||
metrics.Register()
|
||||
now := time.Now()
|
||||
|
||||
clk, counter := clock.NewFakeEventClock(now, 0, nil)
|
||||
@ -220,13 +286,24 @@ func TestDifferentFlowsWithoutQueuing(t *testing.T) {
|
||||
}
|
||||
qs := qsc.Complete(fq.DispatchingConfig{ConcurrencyLimit: 4})
|
||||
|
||||
exerciseQueueSetUniformScenario(t, "DifferentFlowsWithoutQueuing", qs, []uniformClient{
|
||||
exerciseQueueSetUniformScenario(t, qCfg.Name, qs, []uniformClient{
|
||||
{1001001001, 6, 10, time.Second, 57 * time.Millisecond},
|
||||
{2002002002, 4, 15, time.Second, 750 * time.Millisecond},
|
||||
}, time.Second*13, false, false, clk, counter)
|
||||
}, time.Second*13, false, false, false, true, "concurrency-limit", clk, counter)
|
||||
err = metrics.GatherAndCompare(`
|
||||
# HELP apiserver_flowcontrol_rejected_requests_total [ALPHA] Number of requests rejected by API Priority and Fairness system
|
||||
# TYPE apiserver_flowcontrol_rejected_requests_total counter
|
||||
apiserver_flowcontrol_rejected_requests_total{flowSchema="client0",priorityLevel="TestDifferentFlowsWithoutQueuing",reason="concurrency-limit"} 2
|
||||
apiserver_flowcontrol_rejected_requests_total{flowSchema="client1",priorityLevel="TestDifferentFlowsWithoutQueuing",reason="concurrency-limit"} 4
|
||||
`,
|
||||
"apiserver_flowcontrol_rejected_requests_total")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTimeout(t *testing.T) {
|
||||
metrics.Register()
|
||||
now := time.Now()
|
||||
|
||||
clk, counter := clock.NewFakeEventClock(now, 0, nil)
|
||||
@ -244,17 +321,19 @@ func TestTimeout(t *testing.T) {
|
||||
}
|
||||
qs := qsc.Complete(fq.DispatchingConfig{ConcurrencyLimit: 1})
|
||||
|
||||
exerciseQueueSetUniformScenario(t, "Timeout", qs, []uniformClient{
|
||||
exerciseQueueSetUniformScenario(t, qCfg.Name, qs, []uniformClient{
|
||||
{1001001001, 5, 100, time.Second, time.Second},
|
||||
}, time.Second*10, true, false, clk, counter)
|
||||
}, time.Second*10, true, false, true, true, "time-out", clk, counter)
|
||||
}
|
||||
|
||||
func TestContextCancel(t *testing.T) {
|
||||
metrics.Register()
|
||||
metrics.Reset()
|
||||
now := time.Now()
|
||||
clk, counter := clock.NewFakeEventClock(now, 0, nil)
|
||||
qsf := NewQueueSetFactory(clk, counter)
|
||||
qCfg := fq.QueuingConfig{
|
||||
Name: "TestTimeout",
|
||||
Name: "TestContextCancel",
|
||||
DesiredNumQueues: 11,
|
||||
QueueLengthLimit: 11,
|
||||
HandSize: 1,
|
||||
@ -267,7 +346,7 @@ func TestContextCancel(t *testing.T) {
|
||||
qs := qsc.Complete(fq.DispatchingConfig{ConcurrencyLimit: 1})
|
||||
counter.Add(1) // account for the goroutine running this test
|
||||
ctx1 := context.Background()
|
||||
req1, _ := qs.StartRequest(ctx1, 1, "test", "one")
|
||||
req1, _ := qs.StartRequest(ctx1, 1, "fs1", "test", "one")
|
||||
if req1 == nil {
|
||||
t.Error("Request rejected")
|
||||
return
|
||||
@ -283,7 +362,7 @@ func TestContextCancel(t *testing.T) {
|
||||
counter.Add(1)
|
||||
cancel2()
|
||||
}()
|
||||
req2, idle2a := qs.StartRequest(ctx2, 2, "test", "two")
|
||||
req2, idle2a := qs.StartRequest(ctx2, 2, "fs2", "test", "two")
|
||||
if idle2a {
|
||||
t.Error("2nd StartRequest returned idle")
|
||||
}
|
||||
|
@ -26,8 +26,9 @@ import (
|
||||
// request is a temporary container for "requests" with additional
|
||||
// tracking fields required for the functionality FQScheduler
|
||||
type request struct {
|
||||
qs *queueSet
|
||||
ctx context.Context
|
||||
qs *queueSet
|
||||
fsName string
|
||||
ctx context.Context
|
||||
|
||||
// The relevant queue. Is nil if this request did not go through
|
||||
// a queue.
|
||||
|
@ -53,7 +53,7 @@ func (noRestraint) IsIdle() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (noRestraint) StartRequest(ctx context.Context, hashValue uint64, descr1, descr2 interface{}) (fq.Request, bool) {
|
||||
func (noRestraint) StartRequest(ctx context.Context, hashValue uint64, fsName string, descr1, descr2 interface{}) (fq.Request, bool) {
|
||||
return noRestraintRequest{}, false
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ go_library(
|
||||
deps = [
|
||||
"//staging/src/k8s.io/component-base/metrics:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -17,11 +17,13 @@ limitations under the License.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
compbasemetrics "k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
basemetricstestutil "k8s.io/component-base/metrics/testutil"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -50,41 +52,67 @@ func Register() {
|
||||
})
|
||||
}
|
||||
|
||||
type resettable interface {
|
||||
Reset()
|
||||
}
|
||||
|
||||
// Reset all metrics to zero
|
||||
func Reset() {
|
||||
for _, metric := range metrics {
|
||||
rm := metric.(resettable)
|
||||
rm.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
// GatherAndCompare the given metrics with the given Prometheus syntax expected value
|
||||
func GatherAndCompare(expected string, metricNames ...string) error {
|
||||
return basemetricstestutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), metricNames...)
|
||||
}
|
||||
|
||||
var (
|
||||
apiserverRejectedRequestsTotal = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "rejected_requests_total",
|
||||
Help: "Number of rejected requests by api priority and fairness system",
|
||||
Help: "Number of requests rejected by API Priority and Fairness system",
|
||||
},
|
||||
[]string{priorityLevel, "reason"},
|
||||
[]string{priorityLevel, flowSchema, "reason"},
|
||||
)
|
||||
apiserverDispatchedRequestsTotal = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dispatched_requests_total",
|
||||
Help: "Number of requests released by API Priority and Fairness system for service",
|
||||
},
|
||||
[]string{priorityLevel, flowSchema},
|
||||
)
|
||||
apiserverCurrentInqueueRequests = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "current_inqueue_requests",
|
||||
Help: "Number of requests currently pending in the queue by the api priority and fairness system",
|
||||
Help: "Number of requests currently pending in queues of the API Priority and Fairness system",
|
||||
},
|
||||
[]string{priorityLevel},
|
||||
[]string{priorityLevel, flowSchema},
|
||||
)
|
||||
apiserverRequestQueueLength = compbasemetrics.NewHistogramVec(
|
||||
&compbasemetrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_queue_length",
|
||||
Help: "Length of queue in the api priority and fairness system",
|
||||
Name: "request_queue_length_after_enqueue",
|
||||
Help: "Length of queue in the API Priority and Fairness system, as seen by each request after it is enqueued",
|
||||
Buckets: queueLengthBuckets,
|
||||
},
|
||||
[]string{priorityLevel},
|
||||
[]string{priorityLevel, flowSchema},
|
||||
)
|
||||
apiserverRequestConcurrencyLimit = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_concurrency_limit",
|
||||
Help: "Shared concurrency limit in the api priority and fairness system",
|
||||
Help: "Shared concurrency limit in the API Priority and Fairness system",
|
||||
},
|
||||
[]string{priorityLevel},
|
||||
)
|
||||
@ -93,9 +121,9 @@ var (
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "current_executing_requests",
|
||||
Help: "Number of requests currently executing in the api priority and fairness system",
|
||||
Help: "Number of requests currently executing in the API Priority and Fairness system",
|
||||
},
|
||||
[]string{priorityLevel},
|
||||
[]string{priorityLevel, flowSchema},
|
||||
)
|
||||
apiserverRequestWaitingSeconds = compbasemetrics.NewHistogramVec(
|
||||
&compbasemetrics.HistogramOpts{
|
||||
@ -112,13 +140,14 @@ var (
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_execution_seconds",
|
||||
Help: "Time of request executing in the api priority and fairness system",
|
||||
Help: "Duration of request execution in the API Priority and Fairness system",
|
||||
Buckets: requestDurationSecondsBuckets,
|
||||
},
|
||||
[]string{priorityLevel, flowSchema},
|
||||
)
|
||||
metrics = []compbasemetrics.Registerable{
|
||||
apiserverRejectedRequestsTotal,
|
||||
apiserverDispatchedRequestsTotal,
|
||||
apiserverCurrentInqueueRequests,
|
||||
apiserverRequestQueueLength,
|
||||
apiserverRequestConcurrencyLimit,
|
||||
@ -128,14 +157,14 @@ var (
|
||||
}
|
||||
)
|
||||
|
||||
// UpdateFlowControlRequestsInQueue updates the value for the # of requests in the specified queues in flow control
|
||||
func UpdateFlowControlRequestsInQueue(priorityLevel string, inqueue int) {
|
||||
apiserverCurrentInqueueRequests.WithLabelValues(priorityLevel).Set(float64(inqueue))
|
||||
// AddRequestsInQueues adds the given delta to the gauge of the # of requests in the queues of the specified flowSchema and priorityLevel
|
||||
func AddRequestsInQueues(priorityLevel, flowSchema string, delta int) {
|
||||
apiserverCurrentInqueueRequests.WithLabelValues(priorityLevel, flowSchema).Add(float64(delta))
|
||||
}
|
||||
|
||||
// UpdateFlowControlRequestsExecuting updates the value for the # of requests executing in flow control
|
||||
func UpdateFlowControlRequestsExecuting(priorityLevel string, executing int) {
|
||||
apiserverCurrentExecutingRequests.WithLabelValues(priorityLevel).Set(float64(executing))
|
||||
// AddRequestsExecuting adds the given delta to the gauge of executing requests of the given flowSchema and priorityLevel
|
||||
func AddRequestsExecuting(priorityLevel, flowSchema string, delta int) {
|
||||
apiserverCurrentExecutingRequests.WithLabelValues(priorityLevel, flowSchema).Add(float64(delta))
|
||||
}
|
||||
|
||||
// UpdateSharedConcurrencyLimit updates the value for the concurrency limit in flow control
|
||||
@ -144,13 +173,18 @@ func UpdateSharedConcurrencyLimit(priorityLevel string, limit int) {
|
||||
}
|
||||
|
||||
// AddReject increments the # of rejected requests for flow control
|
||||
func AddReject(priorityLevel string, reason string) {
|
||||
apiserverRejectedRequestsTotal.WithLabelValues(priorityLevel, reason).Add(1)
|
||||
func AddReject(priorityLevel, flowSchema, reason string) {
|
||||
apiserverRejectedRequestsTotal.WithLabelValues(priorityLevel, flowSchema, reason).Add(1)
|
||||
}
|
||||
|
||||
// AddDispatch increments the # of dispatched requests for flow control
|
||||
func AddDispatch(priorityLevel, flowSchema string) {
|
||||
apiserverDispatchedRequestsTotal.WithLabelValues(priorityLevel, flowSchema).Add(1)
|
||||
}
|
||||
|
||||
// ObserveQueueLength observes the queue length for flow control
|
||||
func ObserveQueueLength(priorityLevel string, length int) {
|
||||
apiserverRequestQueueLength.WithLabelValues(priorityLevel).Observe(float64(length))
|
||||
func ObserveQueueLength(priorityLevel, flowSchema string, length int) {
|
||||
apiserverRequestQueueLength.WithLabelValues(priorityLevel, flowSchema).Observe(float64(length))
|
||||
}
|
||||
|
||||
// ObserveWaitingDuration observes the queue length for flow control
|
||||
|
Loading…
Reference in New Issue
Block a user