Merge pull request #123343 from haircommander/image-gc-e2e-2

KEP-4210: add e2e tests and add small fix for ImageGCMaxAge
This commit is contained in:
Kubernetes Prow Robot 2024-02-20 10:48:15 -08:00 committed by GitHub
commit d311ce0435
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 149 additions and 16 deletions

View File

@ -69,7 +69,7 @@ type StatsProvider interface {
type ImageGCManager interface {
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect(ctx context.Context) error
GarbageCollect(ctx context.Context, beganGC time.Time) error
// Start async garbage collection of images.
Start()
@ -308,7 +308,7 @@ func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.
return imagesInUse, nil
}
func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.Time) error {
ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect")
defer otelSpan.End()
@ -318,7 +318,7 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return err
}
images, err = im.freeOldImages(ctx, images, freeTime)
images, err = im.freeOldImages(ctx, images, freeTime, beganGC)
if err != nil {
return err
}
@ -369,10 +369,16 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return nil
}
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime time.Time) ([]evictionInfo, error) {
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime, beganGC time.Time) ([]evictionInfo, error) {
if im.policy.MaxAge == 0 {
return images, nil
}
// Wait until the MaxAge has passed since the Kubelet has started,
// or else we risk prematurely garbage collecting images.
if freeTime.Sub(beganGC) <= im.policy.MaxAge {
return images, nil
}
var deletionErrors []error
remainingImages := make([]evictionInfo, 0)
for _, image := range images {

View File

@ -659,7 +659,7 @@ func TestGarbageCollectBelowLowThreshold(t *testing.T) {
}
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageStats, imageStats, nil)
assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectCadvisorFailure(t *testing.T) {
@ -674,7 +674,7 @@ func TestGarbageCollectCadvisorFailure(t *testing.T) {
manager, _ := newRealImageGCManager(policy, mockStatsProvider)
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, &statsapi.FsStats{}, fmt.Errorf("error"))
assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectBelowSuccess(t *testing.T) {
@ -699,7 +699,7 @@ func TestGarbageCollectBelowSuccess(t *testing.T) {
makeImage(0, 450),
}
assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectNotEnoughFreed(t *testing.T) {
@ -723,7 +723,7 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
makeImage(0, 50),
}
assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectImageNotOldEnough(t *testing.T) {
@ -824,14 +824,15 @@ func TestGarbageCollectImageTooOld(t *testing.T) {
// First GC round should not GC remaining image, as it was used too recently.
assert := assert.New(t)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
oldStartTime := fakeClock.Now()
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)
// move clock by a millisecond past maxAge duration, then 1 image will be garbage collected
fakeClock.Step(policy.MaxAge + 1)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 0)
assert.Len(fakeRuntime.ImageList, 1)
@ -879,8 +880,10 @@ func TestGarbageCollectImageMaxAgeDisabled(t *testing.T) {
require.Equal(t, len(images), 1)
assert.Len(fakeRuntime.ImageList, 2)
oldStartTime := fakeClock.Now()
// First GC round should not GC remaining image, as it was used too recently.
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)
@ -888,7 +891,7 @@ func TestGarbageCollectImageMaxAgeDisabled(t *testing.T) {
// Move clock by a lot, and the images should continue to not be garbage colleced
// See https://stackoverflow.com/questions/25065055/what-is-the-maximum-time-time-in-go
fakeClock.SetTime(time.Unix(1<<63-62135596801, 999999999))
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)

View File

@ -1433,16 +1433,19 @@ func (kl *Kubelet) StartGarbageCollection() {
}
}, ContainerGCPeriod, wait.NeverStop)
// when the high threshold is set to 100, stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100, Disable image GC")
// when the high threshold is set to 100, and the max age is 0 (or the max age feature is disabled)
// stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 &&
(!utilfeature.DefaultFeatureGate.Enabled(features.ImageMaximumGCAge) || kl.kubeletConfiguration.ImageMaximumGCAge.Duration == 0) {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100 and ImageMaximumGCAge is 0, Disable image GC")
return
}
prevImageGCFailed := false
beganGC := time.Now()
go wait.Until(func() {
ctx := context.Background()
if err := kl.imageManager.GarbageCollect(ctx); err != nil {
if err := kl.imageManager.GarbageCollect(ctx, beganGC); err != nil {
if prevImageGCFailed {
klog.ErrorS(err, "Image garbage collection failed multiple times in a row")
// Only create an event for repeated failures

View File

@ -0,0 +1,117 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2enode
import (
"context"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
kubefeatures "k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
admissionapi "k8s.io/pod-security-admission/api"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
)
const (
// Kubelet GC's on a frequency of once every 5 minutes
// Add a little leeway to give it time.
checkGCUntil time.Duration = 6 * time.Minute
checkGCFreq time.Duration = 30 * time.Second
)
var _ = SIGDescribe("ImageGarbageCollect", framework.WithSerial(), framework.WithNodeFeature("GarbageCollect"), func() {
f := framework.NewDefaultFramework("image-garbage-collect-test")
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
var is internalapi.ImageManagerService
ginkgo.BeforeEach(func() {
var err error
_, is, err = getCRIClient()
framework.ExpectNoError(err)
})
ginkgo.AfterEach(func() {
framework.ExpectNoError(PrePullAllImages())
})
ginkgo.Context("when ImageMaximumGCAge is set", func() {
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
initialConfig.ImageMaximumGCAge = metav1.Duration{Duration: time.Duration(time.Minute * 1)}
initialConfig.ImageMinimumGCAge = metav1.Duration{Duration: time.Duration(time.Second * 1)}
if initialConfig.FeatureGates == nil {
initialConfig.FeatureGates = make(map[string]bool)
}
initialConfig.FeatureGates[string(kubefeatures.ImageMaximumGCAge)] = true
})
ginkgo.It("should GC unused images", func(ctx context.Context) {
pod := innocentPod()
e2epod.NewPodClient(f).CreateBatch(ctx, []*v1.Pod{pod})
_, err := is.PullImage(context.Background(), &runtimeapi.ImageSpec{Image: agnhostImage}, nil, nil)
framework.ExpectNoError(err)
allImages, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
e2epod.NewPodClient(f).DeleteSync(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)
// Even though the image gc max timing is less, we are bound by the kubelet's
// ImageGCPeriod, which is hardcoded to 5 minutes.
gomega.Eventually(ctx, func() int {
gcdImageList, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
return len(gcdImageList)
}, checkGCUntil, checkGCFreq).Should(gomega.BeNumerically("<", len(allImages)))
})
ginkgo.It("should not GC unused images prematurely", func(ctx context.Context) {
pod := innocentPod()
e2epod.NewPodClient(f).CreateBatch(ctx, []*v1.Pod{pod})
_, err := is.PullImage(context.Background(), &runtimeapi.ImageSpec{Image: agnhostImage}, nil, nil)
framework.ExpectNoError(err)
allImages, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
e2epod.NewPodClient(f).DeleteSync(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)
restartKubelet(true)
waitForKubeletToStart(ctx, f)
// Wait until the maxAge of the image after the kubelet is restarted to ensure it doesn't
// GC too early.
gomega.Consistently(ctx, func() int {
gcdImageList, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
return len(gcdImageList)
}, 50*time.Second, 10*time.Second).Should(gomega.Equal(len(allImages)))
// Even though the image gc max timing is less, we are bound by the kubelet's
// ImageGCPeriod, which is hardcoded to 5 minutes.
gomega.Eventually(ctx, func() int {
gcdImageList, err := is.ListImages(context.Background(), &runtimeapi.ImageFilter{})
framework.ExpectNoError(err)
return len(gcdImageList)
}, checkGCUntil, checkGCFreq).Should(gomega.BeNumerically("<", len(allImages)))
})
})
})

View File

@ -74,6 +74,7 @@ import (
var startServices = flag.Bool("start-services", true, "If true, start local node services")
var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
var busyboxImage = imageutils.GetE2EImage(imageutils.BusyBox)
var agnhostImage = imageutils.GetE2EImage(imageutils.Agnhost)
const (
// Kubelet internal cgroup name for node allocatable cgroup.
@ -233,7 +234,10 @@ func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletCon
ginkgo.By("Starting the kubelet")
startKubelet()
waitForKubeletToStart(ctx, f)
}
func waitForKubeletToStart(ctx context.Context, f *framework.Framework) {
// wait until the kubelet health check will succeed
gomega.Eventually(ctx, func() bool {
return kubeletHealthCheck(kubeletHealthCheckURL)