apiserver: make watch termination during shutdown configurable

This commit is contained in:
Abu Kashem 2023-02-10 11:43:06 -05:00
parent 6385b86a9b
commit 791fcd6fb4
No known key found for this signature in database
GPG Key ID: 33A4FA7088DB68A9
5 changed files with 140 additions and 21 deletions

View File

@ -276,6 +276,23 @@ type Config struct {
// AggregatedDiscoveryGroupManager serves /apis in an aggregated form.
AggregatedDiscoveryGroupManager discoveryendpoint.ResourceManager
// ShutdownWatchTerminationGracePeriod, if set to a positive value,
// is the maximum duration the apiserver will wait for all active
// watch request(s) to drain.
// Once this grace period elapses, the apiserver will no longer
// wait for any active watch request(s) in flight to drain, it will
// proceed to the next step in the graceful server shutdown process.
// If set to a positive value, the apiserver will keep track of the
// number of active watch request(s) in flight and during shutdown
// it will wait, at most, for the specified duration and allow these
// active watch requests to drain with some rate limiting in effect.
// The default is zero, which implies the apiserver will not keep
// track of active watch request(s) in flight and will not wait
// for them to drain, this maintains backward compatibility.
// This grace period is orthogonal to other grace periods, and
// it is not overridden by any other grace period.
ShutdownWatchTerminationGracePeriod time.Duration
}
type RecommendedConfig struct {
@ -416,6 +433,7 @@ func NewConfig(codecs serializer.CodecFactory) *Config {
LongRunningFunc: genericfilters.BasicLongRunningRequestCheck(sets.NewString("watch"), sets.NewString()),
lifecycleSignals: lifecycleSignals,
StorageObjectCountTracker: flowcontrolrequest.NewStorageObjectCountTracker(),
ShutdownWatchTerminationGracePeriod: time.Duration(0),
APIServerID: id,
StorageVersionManager: storageversion.NewDefaultManager(),
@ -684,6 +702,7 @@ func (c completedConfig) New(name string, delegationTarget DelegationTarget) (*G
minRequestTimeout: time.Duration(c.MinRequestTimeout) * time.Second,
ShutdownTimeout: c.RequestTimeout,
ShutdownDelayDuration: c.ShutdownDelayDuration,
ShutdownWatchTerminationGracePeriod: c.ShutdownWatchTerminationGracePeriod,
SecureServingInfo: c.SecureServing,
ExternalAddress: c.ExternalAddress,
@ -913,7 +932,9 @@ func DefaultBuildHandlerChain(apiHandler http.Handler, c *Config) http.Handler {
handler = genericapifilters.WithRequestDeadline(handler, c.AuditBackend, c.AuditPolicyRuleEvaluator,
c.LongRunningFunc, c.Serializer, c.RequestTimeout)
handler = genericfilters.WithWaitGroup(handler, c.LongRunningFunc, c.NonLongRunningRequestWaitGroup)
if c.ShutdownWatchTerminationGracePeriod > 0 {
handler = genericfilters.WithWatchTerminationDuringShutdown(handler, c.lifecycleSignals, c.WatchRequestWaitGroup)
}
if c.SecureServing != nil && !c.SecureServing.DisableHTTP2 && c.GoawayChance > 0 {
handler = genericfilters.WithProbabilisticGoaway(handler, c.GoawayChance)
}

View File

@ -266,6 +266,23 @@ type GenericAPIServer struct {
// If enabled, after ShutdownDelayDuration elapses, any incoming request is
// rejected with a 429 status code and a 'Retry-After' response.
ShutdownSendRetryAfter bool
// ShutdownWatchTerminationGracePeriod, if set to a positive value,
// is the maximum duration the apiserver will wait for all active
// watch request(s) to drain.
// Once this grace period elapses, the apiserver will no longer
// wait for any active watch request(s) in flight to drain, it will
// proceed to the next step in the graceful server shutdown process.
// If set to a positive value, the apiserver will keep track of the
// number of active watch request(s) in flight and during shutdown
// it will wait, at most, for the specified duration and allow these
// active watch requests to drain with some rate limiting in effect.
// The default is zero, which implies the apiserver will not keep
// track of active watch request(s) in flight and will not wait
// for them to drain, this maintains backward compatibility.
// This grace period is orthogonal to other grace periods, and
// it is not overridden by any other grace period.
ShutdownWatchTerminationGracePeriod time.Duration
}
// DelegationTarget is an interface which allows for composition of API servers with top level handling that works
@ -617,10 +634,13 @@ func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
defer close(activeWatchesDrainedCh)
<-notAcceptingNewRequestCh.Signaled()
if s.ShutdownWatchTerminationGracePeriod <= time.Duration(0) {
klog.V(1).InfoS("[graceful-termination] not going to wait for active watch request(s) to drain")
return
}
// Wait for all active watches to finish
// TODO(tkashem): make the grace period configurable
grace := 10 * time.Second
grace := s.ShutdownWatchTerminationGracePeriod
activeBefore, activeAfter, err := s.WatchRequestWaitGroup.Wait(func(count int) (utilwaitgroup.RateLimiter, context.Context, context.CancelFunc) {
qps := float64(count) / grace.Seconds()
// TODO: we don't want the QPS (max requests drained per second) to

View File

@ -1011,6 +1011,8 @@ func newGenericAPIServer(t *testing.T, fAudit *fakeAudit, keepListening bool) *G
config, _ := setUp(t)
config.ShutdownDelayDuration = 100 * time.Millisecond
config.ShutdownSendRetryAfter = keepListening
// we enable watch draining, any positive value will do that
config.ShutdownWatchTerminationGracePeriod = 2 * time.Second
config.AuditPolicyRuleEvaluator = fAudit
config.AuditBackend = fAudit

View File

@ -73,6 +73,23 @@ type ServerRunOptions struct {
// If enabled, after ShutdownDelayDuration elapses, any incoming request is
// rejected with a 429 status code and a 'Retry-After' response.
ShutdownSendRetryAfter bool
// ShutdownWatchTerminationGracePeriod, if set to a positive value,
// is the maximum duration the apiserver will wait for all active
// watch request(s) to drain.
// Once this grace period elapses, the apiserver will no longer
// wait for any active watch request(s) in flight to drain, it will
// proceed to the next step in the graceful server shutdown process.
// If set to a positive value, the apiserver will keep track of the
// number of active watch request(s) in flight and during shutdown
// it will wait, at most, for the specified duration and allow these
// active watch requests to drain with some rate limiting in effect.
// The default is zero, which implies the apiserver will not keep
// track of active watch request(s) in flight and will not wait
// for them to drain, this maintains backward compatibility.
// This grace period is orthogonal to other grace periods, and
// it is not overridden by any other grace period.
ShutdownWatchTerminationGracePeriod time.Duration
}
func NewServerRunOptions() *ServerRunOptions {
@ -84,6 +101,7 @@ func NewServerRunOptions() *ServerRunOptions {
LivezGracePeriod: defaults.LivezGracePeriod,
MinRequestTimeout: defaults.MinRequestTimeout,
ShutdownDelayDuration: defaults.ShutdownDelayDuration,
ShutdownWatchTerminationGracePeriod: defaults.ShutdownWatchTerminationGracePeriod,
JSONPatchMaxCopyBytes: defaults.JSONPatchMaxCopyBytes,
MaxRequestBodyBytes: defaults.MaxRequestBodyBytes,
EnablePriorityAndFairness: true,
@ -107,6 +125,7 @@ func (s *ServerRunOptions) ApplyTo(c *server.Config) error {
c.MaxRequestBodyBytes = s.MaxRequestBodyBytes
c.PublicAddress = s.AdvertiseAddress
c.ShutdownSendRetryAfter = s.ShutdownSendRetryAfter
c.ShutdownWatchTerminationGracePeriod = s.ShutdownWatchTerminationGracePeriod
return nil
}
@ -160,6 +179,10 @@ func (s *ServerRunOptions) Validate() []error {
errors = append(errors, fmt.Errorf("--shutdown-delay-duration can not be negative value"))
}
if s.ShutdownWatchTerminationGracePeriod < 0 {
errors = append(errors, fmt.Errorf("shutdown-watch-termination-grace-period, if provided, can not be a negative value"))
}
if s.JSONPatchMaxCopyBytes < 0 {
errors = append(errors, fmt.Errorf("ServerRunOptions.JSONPatchMaxCopyBytes can not be negative value"))
}
@ -315,5 +338,9 @@ func (s *ServerRunOptions) AddUniversalFlags(fs *pflag.FlagSet) {
"during this window all incoming requests will be rejected with a status code 429 and a 'Retry-After' response header, "+
"in addition 'Connection: close' response header is set in order to tear down the TCP connection when idle.")
fs.DurationVar(&s.ShutdownWatchTerminationGracePeriod, "shutdown-watch-termination-grace-period", s.ShutdownWatchTerminationGracePeriod, ""+
"This option, if set, represents the maximum amount of grace period the apiserver will wait "+
"for active watch request(s) to drain during the graceful server shutdown window.")
utilfeature.DefaultMutableFeatureGate.AddFlag(fs)
}

View File

@ -261,3 +261,52 @@ func TestValidateCorsAllowedOriginList(t *testing.T) {
}
}
}
func TestServerRunOptionsWithShutdownWatchTerminationGracePeriod(t *testing.T) {
tests := []struct {
name string
optionsFn func() *ServerRunOptions
errShouldContain string
}{
{
name: "default should be valid",
optionsFn: func() *ServerRunOptions {
return NewServerRunOptions()
},
},
{
name: "negative not allowed",
optionsFn: func() *ServerRunOptions {
o := NewServerRunOptions()
o.ShutdownWatchTerminationGracePeriod = -time.Second
return o
},
errShouldContain: "shutdown-watch-termination-grace-period, if provided, can not be a negative value",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
options := test.optionsFn()
errsGot := options.Validate()
switch {
case len(test.errShouldContain) == 0:
if len(errsGot) != 0 {
t.Errorf("expected no error, but got: %v", errsGot)
}
default:
if len(errsGot) == 0 ||
!strings.Contains(utilerrors.NewAggregate(errsGot).Error(), test.errShouldContain) {
t.Errorf("expected error to contain: %s, but got: %v", test.errShouldContain, errsGot)
}
}
})
}
t.Run("default should be zero", func(t *testing.T) {
options := NewServerRunOptions()
if options.ShutdownWatchTerminationGracePeriod != time.Duration(0) {
t.Errorf("expected default of ShutdownWatchTerminationGracePeriod to be zero, but got: %s", options.ShutdownWatchTerminationGracePeriod)
}
})
}