image gc: don't start until max age has passed since kubelet started

Signed-off-by: Peter Hunt <pehunt@redhat.com>
This commit is contained in:
Peter Hunt 2023-11-15 15:15:54 -05:00
parent 6cd78bc5fc
commit a8ea936364
3 changed files with 23 additions and 13 deletions

View File

@ -62,7 +62,7 @@ type StatsProvider interface {
type ImageGCManager interface {
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect(ctx context.Context) error
GarbageCollect(ctx context.Context, beganGC time.Time) error
// Start async garbage collection of images.
Start()
@ -301,7 +301,7 @@ func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.
return imagesInUse, nil
}
func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.Time) error {
ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect")
defer otelSpan.End()
@ -311,7 +311,7 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return err
}
images, err = im.freeOldImages(ctx, images, freeTime)
images, err = im.freeOldImages(ctx, images, freeTime, beganGC)
if err != nil {
return err
}
@ -362,10 +362,16 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return nil
}
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime time.Time) ([]evictionInfo, error) {
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime, beganGC time.Time) ([]evictionInfo, error) {
if im.policy.MaxAge == 0 {
return images, nil
}
// Wait until the MaxAge has passed since the Kubelet has started,
// or else we risk prematurely garbage collecting images.
if freeTime.Sub(beganGC) <= im.policy.MaxAge {
return images, nil
}
var deletionErrors []error
remainingImages := make([]evictionInfo, 0)
for _, image := range images {

View File

@ -659,7 +659,7 @@ func TestGarbageCollectBelowLowThreshold(t *testing.T) {
}
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageStats, imageStats, nil)
assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectCadvisorFailure(t *testing.T) {
@ -674,7 +674,7 @@ func TestGarbageCollectCadvisorFailure(t *testing.T) {
manager, _ := newRealImageGCManager(policy, mockStatsProvider)
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, &statsapi.FsStats{}, fmt.Errorf("error"))
assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectBelowSuccess(t *testing.T) {
@ -699,7 +699,7 @@ func TestGarbageCollectBelowSuccess(t *testing.T) {
makeImage(0, 450),
}
assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectNotEnoughFreed(t *testing.T) {
@ -723,7 +723,7 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
makeImage(0, 50),
}
assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectImageNotOldEnough(t *testing.T) {
@ -824,14 +824,15 @@ func TestGarbageCollectImageTooOld(t *testing.T) {
// First GC round should not GC remaining image, as it was used too recently.
assert := assert.New(t)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
oldStartTime := fakeClock.Now()
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)
// move clock by a millisecond past maxAge duration, then 1 image will be garbage collected
fakeClock.Step(policy.MaxAge + 1)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 0)
assert.Len(fakeRuntime.ImageList, 1)
@ -879,8 +880,10 @@ func TestGarbageCollectImageMaxAgeDisabled(t *testing.T) {
require.Equal(t, len(images), 1)
assert.Len(fakeRuntime.ImageList, 2)
oldStartTime := fakeClock.Now()
// First GC round should not GC remaining image, as it was used too recently.
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)
@ -888,7 +891,7 @@ func TestGarbageCollectImageMaxAgeDisabled(t *testing.T) {
// Move clock by a lot, and the images should continue to not be garbage colleced
// See https://stackoverflow.com/questions/25065055/what-is-the-maximum-time-time-in-go
fakeClock.SetTime(time.Unix(1<<63-62135596801, 999999999))
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)

View File

@ -1440,9 +1440,10 @@ func (kl *Kubelet) StartGarbageCollection() {
}
prevImageGCFailed := false
beganGC := time.Now()
go wait.Until(func() {
ctx := context.Background()
if err := kl.imageManager.GarbageCollect(ctx); err != nil {
if err := kl.imageManager.GarbageCollect(ctx, beganGC); err != nil {
if prevImageGCFailed {
klog.ErrorS(err, "Image garbage collection failed multiple times in a row")
// Only create an event for repeated failures