mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 13:37:30 +00:00
Merge pull request #130711 from lauralorenz/crashloopbackoff-featuregate-reducedefaultcrashloopbackoffdelay
KEP-4603: Add ReduceDefaultCrashLoopBackoffDecay featuregate, feature and unit and e2e tests
This commit is contained in:
commit
396c3ebe4c
@ -565,6 +565,13 @@ const (
|
|||||||
// Allows recursive read-only mounts.
|
// Allows recursive read-only mounts.
|
||||||
RecursiveReadOnlyMounts featuregate.Feature = "RecursiveReadOnlyMounts"
|
RecursiveReadOnlyMounts featuregate.Feature = "RecursiveReadOnlyMounts"
|
||||||
|
|
||||||
|
// owner: @lauralorenz
|
||||||
|
// kep: https://kep.k8s.io/4603
|
||||||
|
//
|
||||||
|
// Enables support for a lower internal cluster-wide backoff maximum for restarting
|
||||||
|
// containers (aka containers in CrashLoopBackOff)
|
||||||
|
ReduceDefaultCrashLoopBackOffDecay featuregate.Feature = "ReduceDefaultCrashLoopBackOffDecay"
|
||||||
|
|
||||||
// owner: @adrianmoisey
|
// owner: @adrianmoisey
|
||||||
// kep: https://kep.k8s.io/4427
|
// kep: https://kep.k8s.io/4427
|
||||||
//
|
//
|
||||||
|
@ -625,6 +625,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
|
|||||||
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.36
|
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.36
|
||||||
},
|
},
|
||||||
|
|
||||||
|
ReduceDefaultCrashLoopBackOffDecay: {
|
||||||
|
{Version: version.MustParse("1.33"), Default: false, PreRelease: featuregate.Alpha},
|
||||||
|
},
|
||||||
|
|
||||||
RelaxedDNSSearchValidation: {
|
RelaxedDNSSearchValidation: {
|
||||||
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha},
|
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha},
|
||||||
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Beta},
|
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Beta},
|
||||||
|
@ -154,8 +154,19 @@ const (
|
|||||||
// DefaultContainerLogsDir is the location of container logs.
|
// DefaultContainerLogsDir is the location of container logs.
|
||||||
DefaultContainerLogsDir = "/var/log/containers"
|
DefaultContainerLogsDir = "/var/log/containers"
|
||||||
|
|
||||||
// MaxContainerBackOff is the max backoff period for container restarts, exported for the e2e test
|
// MaxCrashLoopBackOff is the max backoff period for container restarts, exported for the e2e test
|
||||||
MaxContainerBackOff = v1beta1.MaxContainerBackOff
|
MaxCrashLoopBackOff = v1beta1.MaxContainerBackOff
|
||||||
|
|
||||||
|
// reducedMaxCrashLoopBackOff is the default max backoff period for container restarts when the alpha feature
|
||||||
|
// gate ReduceDefaultCrashLoopBackOffDecay is enabled
|
||||||
|
reducedMaxCrashLoopBackOff = 60 * time.Second
|
||||||
|
|
||||||
|
// Initial period for the exponential backoff for container restarts.
|
||||||
|
initialCrashLoopBackOff = time.Second * 10
|
||||||
|
|
||||||
|
// reducedInitialCrashLoopBackOff is the default initial backoff period for container restarts when the alpha feature
|
||||||
|
// gate ReduceDefaultCrashLoopBackOffDecay is enabled
|
||||||
|
reducedInitialCrashLoopBackOff = 1 * time.Second
|
||||||
|
|
||||||
// MaxImageBackOff is the max backoff period for image pulls, exported for the e2e test
|
// MaxImageBackOff is the max backoff period for image pulls, exported for the e2e test
|
||||||
MaxImageBackOff = 300 * time.Second
|
MaxImageBackOff = 300 * time.Second
|
||||||
@ -205,9 +216,6 @@ const (
|
|||||||
// error.
|
// error.
|
||||||
backOffPeriod = time.Second * 10
|
backOffPeriod = time.Second * 10
|
||||||
|
|
||||||
// Initial period for the exponential backoff for container restarts.
|
|
||||||
containerBackOffPeriod = time.Second * 10
|
|
||||||
|
|
||||||
// Initial period for the exponential backoff for image pulls.
|
// Initial period for the exponential backoff for image pulls.
|
||||||
imageBackOffPeriod = time.Second * 10
|
imageBackOffPeriod = time.Second * 10
|
||||||
|
|
||||||
@ -322,6 +330,27 @@ type Dependencies struct {
|
|||||||
useLegacyCadvisorStats bool
|
useLegacyCadvisorStats bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newCrashLoopBackOff configures the backoff maximum to be used
|
||||||
|
// by kubelet for container restarts depending on the alpha gates
|
||||||
|
// and kubelet configuration set
|
||||||
|
func newCrashLoopBackOff(kubeCfg *kubeletconfiginternal.KubeletConfiguration) (time.Duration, time.Duration) {
|
||||||
|
boMax := MaxCrashLoopBackOff
|
||||||
|
boInitial := initialCrashLoopBackOff
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.ReduceDefaultCrashLoopBackOffDecay) {
|
||||||
|
boMax = reducedMaxCrashLoopBackOff
|
||||||
|
boInitial = reducedInitialCrashLoopBackOff
|
||||||
|
}
|
||||||
|
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.KubeletCrashLoopBackOffMax) {
|
||||||
|
// operator-invoked configuration always has precedence if valid
|
||||||
|
boMax = kubeCfg.CrashLoopBackOff.MaxContainerRestartPeriod.Duration
|
||||||
|
if boMax < boInitial {
|
||||||
|
boInitial = boMax
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return boMax, boInitial
|
||||||
|
}
|
||||||
|
|
||||||
// makePodSourceConfig creates a config.PodConfig from the given
|
// makePodSourceConfig creates a config.PodConfig from the given
|
||||||
// KubeletConfiguration or returns an error.
|
// KubeletConfiguration or returns an error.
|
||||||
func makePodSourceConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps *Dependencies, nodeName types.NodeName, nodeHasSynced func() bool) (*config.PodConfig, error) {
|
func makePodSourceConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps *Dependencies, nodeName types.NodeName, nodeHasSynced func() bool) (*config.PodConfig, error) {
|
||||||
@ -939,16 +968,10 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
|||||||
kubeDeps.Recorder,
|
kubeDeps.Recorder,
|
||||||
volumepathhandler.NewBlockVolumePathHandler())
|
volumepathhandler.NewBlockVolumePathHandler())
|
||||||
|
|
||||||
boMax := MaxContainerBackOff
|
boMax, base := newCrashLoopBackOff(kubeCfg)
|
||||||
base := containerBackOffPeriod
|
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.KubeletCrashLoopBackOffMax) {
|
klet.crashLoopBackOff = flowcontrol.NewBackOff(base, boMax)
|
||||||
boMax = kubeCfg.CrashLoopBackOff.MaxContainerRestartPeriod.Duration
|
klet.crashLoopBackOff.HasExpiredFunc = func(eventTime time.Time, lastUpdate time.Time, maxDuration time.Duration) bool {
|
||||||
if boMax < containerBackOffPeriod {
|
|
||||||
base = boMax
|
|
||||||
}
|
|
||||||
}
|
|
||||||
klet.backOff = flowcontrol.NewBackOff(base, boMax)
|
|
||||||
klet.backOff.HasExpiredFunc = func(eventTime time.Time, lastUpdate time.Time, maxDuration time.Duration) bool {
|
|
||||||
return eventTime.Sub(lastUpdate) > 600*time.Second
|
return eventTime.Sub(lastUpdate) > 600*time.Second
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1349,7 +1372,7 @@ type Kubelet struct {
|
|||||||
syncLoopMonitor atomic.Value
|
syncLoopMonitor atomic.Value
|
||||||
|
|
||||||
// Container restart Backoff
|
// Container restart Backoff
|
||||||
backOff *flowcontrol.Backoff
|
crashLoopBackOff *flowcontrol.Backoff
|
||||||
|
|
||||||
// Information about the ports which are opened by daemons on Node running this Kubelet server.
|
// Information about the ports which are opened by daemons on Node running this Kubelet server.
|
||||||
daemonEndpoints *v1.NodeDaemonEndpoints
|
daemonEndpoints *v1.NodeDaemonEndpoints
|
||||||
@ -2039,7 +2062,7 @@ func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType
|
|||||||
// Use WithoutCancel instead of a new context.TODO() to propagate trace context
|
// Use WithoutCancel instead of a new context.TODO() to propagate trace context
|
||||||
// Call the container runtime's SyncPod callback
|
// Call the container runtime's SyncPod callback
|
||||||
sctx := context.WithoutCancel(ctx)
|
sctx := context.WithoutCancel(ctx)
|
||||||
result := kl.containerRuntime.SyncPod(sctx, pod, podStatus, pullSecrets, kl.backOff)
|
result := kl.containerRuntime.SyncPod(sctx, pod, podStatus, pullSecrets, kl.crashLoopBackOff)
|
||||||
kl.reasonCache.Update(pod.UID, result)
|
kl.reasonCache.Update(pod.UID, result)
|
||||||
if err := result.Error(); err != nil {
|
if err := result.Error(); err != nil {
|
||||||
// Do not return error if the only failures were pods in backoff
|
// Do not return error if the only failures were pods in backoff
|
||||||
@ -2928,14 +2951,14 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
|
|||||||
for i, container := range pod.Spec.Containers {
|
for i, container := range pod.Spec.Containers {
|
||||||
if !apiequality.Semantic.DeepEqual(container.Resources, allocatedPod.Spec.Containers[i].Resources) {
|
if !apiequality.Semantic.DeepEqual(container.Resources, allocatedPod.Spec.Containers[i].Resources) {
|
||||||
key := kuberuntime.GetStableKey(pod, &container)
|
key := kuberuntime.GetStableKey(pod, &container)
|
||||||
kl.backOff.Reset(key)
|
kl.crashLoopBackOff.Reset(key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for i, container := range pod.Spec.InitContainers {
|
for i, container := range pod.Spec.InitContainers {
|
||||||
if podutil.IsRestartableInitContainer(&container) {
|
if podutil.IsRestartableInitContainer(&container) {
|
||||||
if !apiequality.Semantic.DeepEqual(container.Resources, allocatedPod.Spec.InitContainers[i].Resources) {
|
if !apiequality.Semantic.DeepEqual(container.Resources, allocatedPod.Spec.InitContainers[i].Resources) {
|
||||||
key := kuberuntime.GetStableKey(pod, &container)
|
key := kuberuntime.GetStableKey(pod, &container)
|
||||||
kl.backOff.Reset(key)
|
kl.crashLoopBackOff.Reset(key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1383,7 +1383,7 @@ func (kl *Kubelet) HandlePodCleanups(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Cleanup any backoff entries.
|
// Cleanup any backoff entries.
|
||||||
kl.backOff.GC()
|
kl.crashLoopBackOff.GC()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,6 +55,7 @@ import (
|
|||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
"k8s.io/client-go/tools/record"
|
"k8s.io/client-go/tools/record"
|
||||||
"k8s.io/client-go/util/flowcontrol"
|
"k8s.io/client-go/util/flowcontrol"
|
||||||
|
"k8s.io/component-base/featuregate"
|
||||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
"k8s.io/component-base/metrics/testutil"
|
"k8s.io/component-base/metrics/testutil"
|
||||||
internalapi "k8s.io/cri-api/pkg/apis"
|
internalapi "k8s.io/cri-api/pkg/apis"
|
||||||
@ -338,8 +339,8 @@ func newTestKubeletWithImageList(
|
|||||||
kubelet.containerGC = containerGC
|
kubelet.containerGC = containerGC
|
||||||
|
|
||||||
fakeClock := testingclock.NewFakeClock(time.Now())
|
fakeClock := testingclock.NewFakeClock(time.Now())
|
||||||
kubelet.backOff = flowcontrol.NewBackOff(time.Second, time.Minute)
|
kubelet.crashLoopBackOff = flowcontrol.NewBackOff(time.Second, time.Minute)
|
||||||
kubelet.backOff.Clock = fakeClock
|
kubelet.crashLoopBackOff.Clock = fakeClock
|
||||||
kubelet.resyncInterval = 10 * time.Second
|
kubelet.resyncInterval = 10 * time.Second
|
||||||
kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock)
|
kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock)
|
||||||
// Relist period does not affect the tests.
|
// Relist period does not affect the tests.
|
||||||
@ -2900,7 +2901,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
|
|||||||
now := kubelet.clock.Now()
|
now := kubelet.clock.Now()
|
||||||
// Put the container in backoff so we can confirm backoff is reset.
|
// Put the container in backoff so we can confirm backoff is reset.
|
||||||
backoffKey := kuberuntime.GetStableKey(originalPod, originalCtr)
|
backoffKey := kuberuntime.GetStableKey(originalPod, originalCtr)
|
||||||
kubelet.backOff.Next(backoffKey, now)
|
kubelet.crashLoopBackOff.Next(backoffKey, now)
|
||||||
|
|
||||||
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)
|
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -2922,7 +2923,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
|
|||||||
resizeStatus := kubelet.statusManager.GetPodResizeStatus(newPod.UID)
|
resizeStatus := kubelet.statusManager.GetPodResizeStatus(newPod.UID)
|
||||||
assert.Equal(t, tt.expectedResize, resizeStatus)
|
assert.Equal(t, tt.expectedResize, resizeStatus)
|
||||||
|
|
||||||
isInBackoff := kubelet.backOff.IsInBackOffSince(backoffKey, now)
|
isInBackoff := kubelet.crashLoopBackOff.IsInBackOffSince(backoffKey, now)
|
||||||
if tt.expectBackoffReset {
|
if tt.expectBackoffReset {
|
||||||
assert.False(t, isInBackoff, "container backoff should be reset")
|
assert.False(t, isInBackoff, "container backoff should be reset")
|
||||||
} else {
|
} else {
|
||||||
@ -3408,7 +3409,7 @@ func TestSyncPodSpans(t *testing.T) {
|
|||||||
kubelet.os,
|
kubelet.os,
|
||||||
kubelet,
|
kubelet,
|
||||||
nil,
|
nil,
|
||||||
kubelet.backOff,
|
kubelet.crashLoopBackOff,
|
||||||
kubeCfg.SerializeImagePulls,
|
kubeCfg.SerializeImagePulls,
|
||||||
kubeCfg.MaxParallelImagePulls,
|
kubeCfg.MaxParallelImagePulls,
|
||||||
float32(kubeCfg.RegistryPullQPS),
|
float32(kubeCfg.RegistryPullQPS),
|
||||||
@ -3938,3 +3939,86 @@ func TestIsPodResizeInProgress(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCrashLoopBackOffConfiguration(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
featureGates []featuregate.Feature
|
||||||
|
nodeDecay metav1.Duration
|
||||||
|
expectedInitial time.Duration
|
||||||
|
expectedMax time.Duration
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Prior behavior",
|
||||||
|
expectedMax: time.Duration(300 * time.Second),
|
||||||
|
expectedInitial: time.Duration(10 * time.Second),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "New default only",
|
||||||
|
featureGates: []featuregate.Feature{features.ReduceDefaultCrashLoopBackOffDecay},
|
||||||
|
expectedMax: time.Duration(60 * time.Second),
|
||||||
|
expectedInitial: time.Duration(1 * time.Second),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Faster per node config; only node config configured",
|
||||||
|
featureGates: []featuregate.Feature{features.KubeletCrashLoopBackOffMax},
|
||||||
|
nodeDecay: metav1.Duration{Duration: 2 * time.Second},
|
||||||
|
expectedMax: time.Duration(2 * time.Second),
|
||||||
|
expectedInitial: time.Duration(2 * time.Second),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Faster per node config; new default and node config configured",
|
||||||
|
featureGates: []featuregate.Feature{features.KubeletCrashLoopBackOffMax, features.ReduceDefaultCrashLoopBackOffDecay},
|
||||||
|
nodeDecay: metav1.Duration{Duration: 2 * time.Second},
|
||||||
|
expectedMax: time.Duration(2 * time.Second),
|
||||||
|
expectedInitial: time.Duration(1 * time.Second),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Slower per node config; new default and node config configured, set A",
|
||||||
|
featureGates: []featuregate.Feature{features.KubeletCrashLoopBackOffMax, features.ReduceDefaultCrashLoopBackOffDecay},
|
||||||
|
nodeDecay: metav1.Duration{Duration: 10 * time.Second},
|
||||||
|
expectedMax: time.Duration(10 * time.Second),
|
||||||
|
expectedInitial: time.Duration(1 * time.Second),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Slower per node config; new default and node config configured, set B",
|
||||||
|
featureGates: []featuregate.Feature{features.KubeletCrashLoopBackOffMax, features.ReduceDefaultCrashLoopBackOffDecay},
|
||||||
|
nodeDecay: metav1.Duration{Duration: 300 * time.Second},
|
||||||
|
expectedMax: time.Duration(300 * time.Second),
|
||||||
|
expectedInitial: time.Duration(1 * time.Second),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Slower per node config; only node config configured, set A",
|
||||||
|
featureGates: []featuregate.Feature{features.KubeletCrashLoopBackOffMax},
|
||||||
|
nodeDecay: metav1.Duration{Duration: 11 * time.Second},
|
||||||
|
expectedMax: time.Duration(11 * time.Second),
|
||||||
|
expectedInitial: time.Duration(10 * time.Second),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Slower per node config; only node config configured, set B",
|
||||||
|
featureGates: []featuregate.Feature{features.KubeletCrashLoopBackOffMax},
|
||||||
|
nodeDecay: metav1.Duration{Duration: 300 * time.Second},
|
||||||
|
expectedMax: time.Duration(300 * time.Second),
|
||||||
|
expectedInitial: time.Duration(10 * time.Second),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
kubeCfg := &kubeletconfiginternal.KubeletConfiguration{}
|
||||||
|
|
||||||
|
for _, f := range tc.featureGates {
|
||||||
|
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, f, true)
|
||||||
|
}
|
||||||
|
if tc.nodeDecay.Duration > 0 {
|
||||||
|
kubeCfg.CrashLoopBackOff.MaxContainerRestartPeriod = &tc.nodeDecay
|
||||||
|
}
|
||||||
|
|
||||||
|
resultMax, resultInitial := newCrashLoopBackOff(kubeCfg)
|
||||||
|
|
||||||
|
assert.Equalf(t, tc.expectedMax, resultMax, "wrong max calculated, want: %v, got %v", tc.expectedMax, resultMax)
|
||||||
|
assert.Equalf(t, tc.expectedInitial, resultInitial, "wrong base calculated, want: %v, got %v", tc.expectedInitial, resultInitial)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -64,7 +64,7 @@ import (
|
|||||||
const (
|
const (
|
||||||
buildBackOffDuration = time.Minute
|
buildBackOffDuration = time.Minute
|
||||||
syncLoopFrequency = 10 * time.Second
|
syncLoopFrequency = 10 * time.Second
|
||||||
maxBackOffTolerance = time.Duration(1.3 * float64(kubelet.MaxContainerBackOff))
|
maxBackOffTolerance = time.Duration(1.3 * float64(kubelet.MaxCrashLoopBackOff))
|
||||||
podRetryPeriod = 1 * time.Second
|
podRetryPeriod = 1 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -739,7 +739,7 @@ var _ = SIGDescribe("Pods", func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
podClient.CreateSync(ctx, pod)
|
podClient.CreateSync(ctx, pod)
|
||||||
time.Sleep(2 * kubelet.MaxContainerBackOff) // it takes slightly more than 2*x to get to a back-off of x
|
time.Sleep(2 * kubelet.MaxCrashLoopBackOff) // it takes slightly more than 2*x to get to a back-off of x
|
||||||
|
|
||||||
// wait for a delay == capped delay of MaxContainerBackOff
|
// wait for a delay == capped delay of MaxContainerBackOff
|
||||||
ginkgo.By("getting restart delay when capped")
|
ginkgo.By("getting restart delay when capped")
|
||||||
@ -753,13 +753,13 @@ var _ = SIGDescribe("Pods", func() {
|
|||||||
framework.Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
framework.Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||||
}
|
}
|
||||||
|
|
||||||
if delay1 < kubelet.MaxContainerBackOff {
|
if delay1 < kubelet.MaxCrashLoopBackOff {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (delay1 < kubelet.MaxContainerBackOff) || (delay1 > maxBackOffTolerance) {
|
if (delay1 < kubelet.MaxCrashLoopBackOff) || (delay1 > maxBackOffTolerance) {
|
||||||
framework.Failf("expected %s back-off got=%s in delay1", kubelet.MaxContainerBackOff, delay1)
|
framework.Failf("expected %s back-off got=%s in delay1", kubelet.MaxCrashLoopBackOff, delay1)
|
||||||
}
|
}
|
||||||
|
|
||||||
ginkgo.By("getting restart delay after a capped delay")
|
ginkgo.By("getting restart delay after a capped delay")
|
||||||
@ -768,8 +768,8 @@ var _ = SIGDescribe("Pods", func() {
|
|||||||
framework.Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
framework.Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||||
}
|
}
|
||||||
|
|
||||||
if delay2 < kubelet.MaxContainerBackOff || delay2 > maxBackOffTolerance { // syncloop cumulative drift
|
if delay2 < kubelet.MaxCrashLoopBackOff || delay2 > maxBackOffTolerance { // syncloop cumulative drift
|
||||||
framework.Failf("expected %s back-off got=%s on delay2", kubelet.MaxContainerBackOff, delay2)
|
framework.Failf("expected %s back-off got=%s on delay2", kubelet.MaxCrashLoopBackOff, delay2)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -85,6 +85,54 @@ var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(
|
|||||||
doTest(ctx, f, 3, containerName, 13)
|
doTest(ctx, f, 3, containerName, 13)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
ginkgo.Context("Reduced default container restart backs off as expected", func() {
|
||||||
|
|
||||||
|
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||||
|
initialConfig.FeatureGates = map[string]bool{"ReduceDefaultCrashLoopBackOffDecay": true}
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.BeforeEach(func() {
|
||||||
|
if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
|
||||||
|
ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.AfterEach(func() {
|
||||||
|
err := resetCRIProxyInjector(e2eCriProxy)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.It("Reduced default restart backs off.", func(ctx context.Context) {
|
||||||
|
// 0s, 0s, 10s, 30s, 60s, 90s, 120s, 150s, 180s, 210s, 240s, 270s, 300s
|
||||||
|
doTest(ctx, f, 3, containerName, 13)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.Context("Lower node config container restart takes precedence", func() {
|
||||||
|
|
||||||
|
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||||
|
initialConfig.FeatureGates = map[string]bool{"ReduceDefaultCrashLoopBackOffDecay": true}
|
||||||
|
initialConfig.CrashLoopBackOff.MaxContainerRestartPeriod = &metav1.Duration{Duration: time.Duration(1 * time.Second)}
|
||||||
|
initialConfig.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true}
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.BeforeEach(func() {
|
||||||
|
if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
|
||||||
|
ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.AfterEach(func() {
|
||||||
|
err := resetCRIProxyInjector(e2eCriProxy)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.It("Reduced default restart backs off.", func(ctx context.Context) {
|
||||||
|
// 0s, 0s, 1s, 2s, 3s, 4s, 5s, 6s, 7s, and so on
|
||||||
|
doTest(ctx, f, 3, containerName, 298)
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, containerName string, maxRestarts int) {
|
func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, containerName string, maxRestarts int) {
|
||||||
|
@ -1060,6 +1060,12 @@
|
|||||||
lockToDefault: true
|
lockToDefault: true
|
||||||
preRelease: GA
|
preRelease: GA
|
||||||
version: "1.33"
|
version: "1.33"
|
||||||
|
- name: ReduceDefaultCrashLoopBackOffDecay
|
||||||
|
versionedSpecs:
|
||||||
|
- default: false
|
||||||
|
lockToDefault: false
|
||||||
|
preRelease: Alpha
|
||||||
|
version: "1.33"
|
||||||
- name: RelaxedDNSSearchValidation
|
- name: RelaxedDNSSearchValidation
|
||||||
versionedSpecs:
|
versionedSpecs:
|
||||||
- default: false
|
- default: false
|
||||||
|
Loading…
Reference in New Issue
Block a user