mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 18:00:08 +00:00
Merge pull request #125086 from oxxenix/exponential-backoff
add exponential backoff in NodeResourceSlices controller
This commit is contained in:
commit
fad52aedfc
@ -37,6 +37,7 @@ import (
|
|||||||
resourceinformers "k8s.io/client-go/informers/resource/v1alpha2"
|
resourceinformers "k8s.io/client-go/informers/resource/v1alpha2"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
|
"k8s.io/client-go/util/flowcontrol"
|
||||||
"k8s.io/client-go/util/workqueue"
|
"k8s.io/client-go/util/workqueue"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
|
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
|
||||||
@ -46,7 +47,10 @@ import (
|
|||||||
const (
|
const (
|
||||||
// resyncPeriod for informer
|
// resyncPeriod for informer
|
||||||
// TODO (https://github.com/kubernetes/kubernetes/issues/123688): disable?
|
// TODO (https://github.com/kubernetes/kubernetes/issues/123688): disable?
|
||||||
resyncPeriod = time.Duration(10 * time.Minute)
|
resyncPeriod = time.Duration(10 * time.Minute)
|
||||||
|
retryPeriod = 5 * time.Second
|
||||||
|
maxRetryPeriod = 180 * time.Second
|
||||||
|
backoffFactor = 2.0 // Introduce a backoff multiplier as jitter factor
|
||||||
)
|
)
|
||||||
|
|
||||||
// nodeResourcesController collects resource information from all registered
|
// nodeResourcesController collects resource information from all registered
|
||||||
@ -185,6 +189,9 @@ func (c *nodeResourcesController) monitorPlugin(ctx context.Context, active *act
|
|||||||
logger.Info("Stopping to monitor node resources of the plugin", "reason", context.Cause(ctx), "err", ctx.Err(), "recover", r)
|
logger.Info("Stopping to monitor node resources of the plugin", "reason", context.Cause(ctx), "err", ctx.Err(), "recover", r)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
backOff := flowcontrol.NewBackOffWithJitter(retryPeriod, maxRetryPeriod, backoffFactor)
|
||||||
|
backOffID := "retry"
|
||||||
|
|
||||||
// Keep trying until canceled.
|
// Keep trying until canceled.
|
||||||
for ctx.Err() == nil {
|
for ctx.Err() == nil {
|
||||||
logger.V(5).Info("Calling NodeListAndWatchResources")
|
logger.V(5).Info("Calling NodeListAndWatchResources")
|
||||||
@ -197,9 +204,9 @@ func (c *nodeResourcesController) monitorPlugin(ctx context.Context, active *act
|
|||||||
default:
|
default:
|
||||||
// This is a problem, report it and retry.
|
// This is a problem, report it and retry.
|
||||||
logger.Error(err, "Creating gRPC stream for node resources failed")
|
logger.Error(err, "Creating gRPC stream for node resources failed")
|
||||||
// TODO (https://github.com/kubernetes/kubernetes/issues/123689): expontential backoff?
|
|
||||||
select {
|
select {
|
||||||
case <-time.After(5 * time.Second):
|
case <-time.After(backOff.Get(backOffID)):
|
||||||
|
backOff.Next(backOffID, time.Now())
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -219,9 +226,9 @@ func (c *nodeResourcesController) monitorPlugin(ctx context.Context, active *act
|
|||||||
case ctx.Err() == nil:
|
case ctx.Err() == nil:
|
||||||
// This is a problem, report it and retry.
|
// This is a problem, report it and retry.
|
||||||
logger.Error(err, "Reading node resources from gRPC stream failed")
|
logger.Error(err, "Reading node resources from gRPC stream failed")
|
||||||
// TODO (https://github.com/kubernetes/kubernetes/issues/123689): expontential backoff?
|
|
||||||
select {
|
select {
|
||||||
case <-time.After(5 * time.Second):
|
case <-time.After(backOff.Get(backOffID)):
|
||||||
|
backOff.Next(backOffID, time.Now())
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user