mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 07:47:56 +00:00
pkg/storage: cache timers
A previous change here replaced time.After with an explicit timer that can be stopped, to avoid filling up the active timer list with timers that are no longer needed. But an even better fix is to reuse the timers across calls, to avoid filling the allocated heap with work for the garbage collector. On top of that, try a quick non-blocking send to avoid the timer entirely. For the e2e 1000-node kubemark test, basically everything gets faster, some things significantly so. The 90th and 99th percentile for LIST nodes in particular are the worst case that has caused SLO/SLA problems in the past, and this reduces 99th percentile by 10%. name old ms/op new ms/op delta LIST_nodes_p50 127 ±16% 124 ±13% ~ (p=0.136 n=29+29) LIST_nodes_p90 326 ±12% 278 ±15% -14.85% (p=0.000 n=29+29) LIST_nodes_p99 453 ±11% 405 ±19% -10.70% (p=0.000 n=29+28) LIST_replicationcontrollers_p50 29.4 ±49% 26.6 ±43% ~ (p=0.176 n=30+29) LIST_replicationcontrollers_p90 83.0 ±78% 68.7 ±63% -17.30% (p=0.020 n=30+29) LIST_replicationcontrollers_p99 216 ±43% 173 ±41% -19.53% (p=0.000 n=29+28) DELETE_pods_p50 24.5 ±14% 24.3 ±17% ~ (p=0.562 n=30+28) DELETE_pods_p90 30.7 ± 1% 30.6 ± 0% -0.44% (p=0.000 n=29+28) DELETE_pods_p99 77.2 ±34% 56.3 ±27% -26.99% (p=0.000 n=30+28) PUT_replicationcontrollers_p50 5.86 ±26% 5.83 ±36% ~ (p=1.000 n=29+28) PUT_replicationcontrollers_p90 15.8 ± 7% 15.9 ± 6% ~ (p=0.936 n=29+28) PUT_replicationcontrollers_p99 57.8 ±35% 56.7 ±41% ~ (p=0.725 n=29+28) PUT_nodes_p50 14.9 ± 2% 14.9 ± 1% -0.55% (p=0.020 n=30+28) PUT_nodes_p90 16.5 ± 1% 16.4 ± 2% -0.60% (p=0.040 n=27+28) PUT_nodes_p99 57.9 ±47% 44.6 ±42% -23.02% (p=0.000 n=30+29) POST_replicationcontrollers_p50 6.35 ±29% 6.33 ±23% ~ (p=0.957 n=30+28) POST_replicationcontrollers_p90 15.4 ± 5% 15.2 ± 6% -1.14% (p=0.034 n=29+28) POST_replicationcontrollers_p99 52.2 ±71% 53.4 ±52% ~ (p=0.720 n=29+27) POST_pods_p50 8.99 ±13% 9.33 ±13% +3.79% (p=0.023 n=30+29) POST_pods_p90 16.2 ± 4% 16.3 ± 4% ~ (p=0.113 n=29+29) POST_pods_p99 30.9 ±21% 28.4 ±23% -8.26% (p=0.001 n=28+29) POST_bindings_p50 9.34 ±12% 8.98 ±17% ~ (p=0.083 n=30+29) POST_bindings_p90 16.6 ± 1% 16.5 ± 2% -0.76% (p=0.000 n=28+26) POST_bindings_p99 23.5 ± 9% 21.4 ± 5% -8.98% (p=0.000 n=27+27) PUT_pods_p50 10.8 ±11% 10.3 ± 5% -4.67% (p=0.000 n=30+28) PUT_pods_p90 16.1 ± 1% 16.0 ± 1% -0.55% (p=0.003 n=29+29) PUT_pods_p99 23.4 ± 9% 21.6 ±14% -8.03% (p=0.000 n=28+28) DELETE_replicationcontrollers_p50 2.42 ±16% 2.50 ±13% ~ (p=0.072 n=29+29) DELETE_replicationcontrollers_p90 11.5 ±12% 11.7 ±10% ~ (p=0.190 n=30+28) DELETE_replicationcontrollers_p99 19.5 ±21% 19.0 ±22% ~ (p=0.298 n=29+28) GET_nodes_p90 1.20 ±16% 1.18 ±19% ~ (p=0.626 n=28+29) GET_nodes_p99 11.4 ±48% 8.3 ±40% -27.31% (p=0.000 n=28+28) GET_replicationcontrollers_p90 1.04 ±25% 1.03 ±21% ~ (p=0.682 n=30+29) GET_replicationcontrollers_p99 12.1 ±81% 10.0 ±123% ~ (p=0.135 n=28+28) GET_pods_p90 1.06 ±19% 1.08 ±21% ~ (p=0.597 n=29+29) GET_pods_p99 3.92 ±43% 2.81 ±39% -28.39% (p=0.000 n=27+28) LIST_pods_p50 68.0 ±16% 65.3 ±13% ~ (p=0.066 n=29+29) LIST_pods_p90 119 ±19% 115 ±12% ~ (p=0.091 n=28+27) LIST_pods_p99 230 ±18% 226 ±21% ~ (p=0.251 n=27+28)
This commit is contained in:
parent
de3ce4f465
commit
6a19e46ed6
@ -497,14 +497,39 @@ func (c *cacheWatcher) stop() {
|
||||
}
|
||||
}
|
||||
|
||||
var timerPool sync.Pool
|
||||
|
||||
func (c *cacheWatcher) add(event watchCacheEvent) {
|
||||
t := time.NewTimer(5 * time.Second)
|
||||
defer t.Stop()
|
||||
// Try to send the event immediately, without blocking.
|
||||
select {
|
||||
case c.input <- event:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
// OK, block sending, but only for up to 5 seconds.
|
||||
// cacheWatcher.add is called very often, so arrange
|
||||
// to reuse timers instead of constantly allocating.
|
||||
const timeout = 5 * time.Second
|
||||
t, ok := timerPool.Get().(*time.Timer)
|
||||
if ok {
|
||||
t.Reset(timeout)
|
||||
} else {
|
||||
t = time.NewTimer(timeout)
|
||||
}
|
||||
defer timerPool.Put(t)
|
||||
|
||||
select {
|
||||
case c.input <- event:
|
||||
stopped := t.Stop()
|
||||
if !stopped {
|
||||
// Consume triggered (but not yet received) timer event
|
||||
// so that future reuse does not get a spurious timeout.
|
||||
<-t.C
|
||||
}
|
||||
case <-t.C:
|
||||
// This means that we couldn't send event to that watcher.
|
||||
// Since we don't want to blockin on it infinitely,
|
||||
// Since we don't want to block on it infinitely,
|
||||
// we simply terminate it.
|
||||
c.forget(false)
|
||||
c.stop()
|
||||
|
Loading…
Reference in New Issue
Block a user