mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #128559 from lauralorenz/crashloopbackoff-refactorimagepullbackoff-e2enodecriproxytest
E2E Node tests for image pull backoff and crashloopbackoff behavior
This commit is contained in:
commit
5ee686b6cf
155
test/e2e_node/container_restart_test.go
Normal file
155
test/e2e_node/container_restart_test.go
Normal file
@ -0,0 +1,155 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2enode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
podv1util "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
"github.com/pkg/errors"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/kubernetes/test/e2e/feature"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
)
|
||||
|
||||
const containerName = "restarts"
|
||||
|
||||
var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(), func() {
|
||||
f := framework.NewDefaultFramework("container-restart")
|
||||
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
|
||||
|
||||
ginkgo.Context("Container restart backs off", func() {
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
|
||||
ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
|
||||
}
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func() {
|
||||
err := resetCRIProxyInjector(e2eCriProxy)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
ginkgo.It("Container restart backs off.", func(ctx context.Context) {
|
||||
// 0s, 0s, 10s, 30s, 70s, 150s, 310s
|
||||
doTest(ctx, f, 3, containerName, 7)
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.Context("Alternate container restart backs off as expected", func() {
|
||||
|
||||
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
initialConfig.CrashLoopBackOff.MaxContainerRestartPeriod = &metav1.Duration{Duration: time.Duration(30 * time.Second)}
|
||||
initialConfig.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true}
|
||||
})
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
|
||||
ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
|
||||
}
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func() {
|
||||
err := resetCRIProxyInjector(e2eCriProxy)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
ginkgo.It("Alternate restart backs off.", func(ctx context.Context) {
|
||||
// 0s, 0s, 10s, 30s, 60s, 90s, 120s, 150s, 180s, 210s, 240s, 270s, 300s
|
||||
doTest(ctx, f, 3, containerName, 13)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, containerName string, maxRestarts int) {
|
||||
|
||||
pod := e2epod.NewPodClient(f).Create(ctx, newFailAlwaysPod())
|
||||
podErr := e2epod.WaitForPodContainerToFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, "CrashLoopBackOff", 1*time.Minute)
|
||||
gomega.Expect(podErr).To(gomega.HaveOccurred())
|
||||
|
||||
// Hard wait 30 seconds for targetRestarts in the best case; longer timeout later will handle if infra was slow.
|
||||
time.Sleep(30 * time.Second)
|
||||
podErr = waitForContainerRestartedNTimes(ctx, f, f.Namespace.Name, pod.Name, containerName, 5*time.Minute, targetRestarts)
|
||||
gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
|
||||
|
||||
r, err := extractObservedBackoff(ctx, f, pod.Name, containerName)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
gomega.Expect(r).Should(gomega.BeNumerically("<=", maxRestarts))
|
||||
}
|
||||
|
||||
func extractObservedBackoff(ctx context.Context, f *framework.Framework, podName string, containerName string) (int32, error) {
|
||||
var r int32
|
||||
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, podName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return r, err
|
||||
}
|
||||
for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
|
||||
for _, cs := range statuses {
|
||||
if cs.Name == containerName {
|
||||
return cs.RestartCount, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return r, errors.Errorf("Could not find container status for container %s in pod %s", containerName, podName)
|
||||
}
|
||||
|
||||
func newFailAlwaysPod() *v1.Pod {
|
||||
podName := "container-restart" + string(uuid.NewUUID())
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: containerName,
|
||||
Image: imageutils.GetE2EImage(imageutils.BusyBox),
|
||||
ImagePullPolicy: v1.PullIfNotPresent,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
return pod
|
||||
}
|
||||
|
||||
func waitForContainerRestartedNTimes(ctx context.Context, f *framework.Framework, namespace string, podName string, containerName string, timeout time.Duration, target int) error {
|
||||
conditionDesc := fmt.Sprintf("A container in pod %s restarted at least %d times", podName, target)
|
||||
return e2epod.WaitForPodCondition(ctx, f.ClientSet, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
|
||||
cs, found := podv1util.GetContainerStatus(pod.Status.ContainerStatuses, containerName)
|
||||
if !found {
|
||||
return false, fmt.Errorf("could not find container %s in pod %s", containerName, podName)
|
||||
}
|
||||
return cs.RestartCount >= int32(target), nil
|
||||
})
|
||||
}
|
@ -84,6 +84,20 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.Context("Image pull backoff", func() {
|
||||
ginkgo.BeforeEach(func() {
|
||||
if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
|
||||
ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
|
||||
}
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func() {
|
||||
err := resetCRIProxyInjector(e2eCriProxy)
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
ginkgo.Context("Inject a pull image timeout exception into the CriProxy", func() {
|
||||
ginkgo.BeforeEach(func() {
|
||||
if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
|
||||
|
@ -34,6 +34,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
kubeletevents "k8s.io/kubernetes/pkg/kubelet/events"
|
||||
"k8s.io/kubernetes/pkg/kubelet/images"
|
||||
"k8s.io/kubernetes/test/e2e/feature"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
@ -230,6 +231,44 @@ var _ = SIGDescribe("Pull Image", feature.CriProxy, framework.WithSerial(), func
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
|
||||
// inject PullImage failed to trigger backoff
|
||||
expectedErr := fmt.Errorf("PullImage failed")
|
||||
err := addCRIProxyInjector(e2eCriProxy, func(apiName string) error {
|
||||
if apiName == criproxy.PullImage {
|
||||
return expectedErr
|
||||
}
|
||||
return nil
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
pod := e2epod.NewPodClient(f).Create(ctx, newPullImageAlwaysPod())
|
||||
podErr := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "ImagePullBackOff", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
|
||||
if len(pod.Status.ContainerStatuses) > 0 && pod.Status.Reason == images.ErrImagePullBackOff.Error() {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
gomega.Expect(podErr).To(gomega.HaveOccurred())
|
||||
|
||||
eventMsg, err := getFailedToPullImageMsg(ctx, f, pod.Name)
|
||||
framework.ExpectNoError(err)
|
||||
isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
|
||||
gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
|
||||
|
||||
// Hard wait 30 seconds for image pulls to repeatedly back off.
|
||||
time.Sleep(30 * time.Second)
|
||||
|
||||
e, err := getImagePullAttempts(ctx, f, pod.Name)
|
||||
framework.ExpectNoError(err)
|
||||
// 3 would take 10s best case.
|
||||
gomega.Expect(e.Count).Should(gomega.BeNumerically(">=", 3))
|
||||
// 7 would take 310s best case, if the infra went slow.
|
||||
gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 7))
|
||||
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
func getPodImagePullDurations(ctx context.Context, f *framework.Framework, testpods []*v1.Pod) (map[string]*pulledStruct, map[string]metav1.Time, map[string]metav1.Time, error) {
|
||||
@ -343,3 +382,18 @@ func getDurationsFromPulledEventMsg(msg string) (*pulledStruct, error) {
|
||||
pulledIncludeWaitingDuration: pulledIncludeWaitingDuration,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func getImagePullAttempts(ctx context.Context, f *framework.Framework, podName string) (v1.Event, error) {
|
||||
event := v1.Event{}
|
||||
e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return event, err
|
||||
}
|
||||
|
||||
for _, event := range e.Items {
|
||||
if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.PullingImage {
|
||||
return event, nil
|
||||
}
|
||||
}
|
||||
return event, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user