diff --git a/pkg/kubelet/container/container_gc.go b/pkg/kubelet/container/container_gc.go index b0a25d50058..ae1c6eaaeea 100644 --- a/pkg/kubelet/container/container_gc.go +++ b/pkg/kubelet/container/container_gc.go @@ -45,6 +45,8 @@ type GC interface { GarbageCollect(ctx context.Context) error // Deletes all unused containers, including containers belonging to pods that are terminated but not deleted DeleteAllUnusedContainers(ctx context.Context) error + // IsContainerFsSeparateFromImageFs tells if writeable layer and read-only layer are separate. + IsContainerFsSeparateFromImageFs(ctx context.Context) bool } // SourcesReadyProvider knows how to determine if configuration sources are ready @@ -86,3 +88,22 @@ func (cgc *realContainerGC) DeleteAllUnusedContainers(ctx context.Context) error klog.InfoS("Attempting to delete unused containers") return cgc.runtime.GarbageCollect(ctx, cgc.policy, cgc.sourcesReadyProvider.AllReady(), true) } + +func (cgc *realContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool { + resp, err := cgc.runtime.ImageFsInfo(ctx) + if err != nil { + return false + } + // These fields can be empty if CRI implementation didn't populate. + if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 { + return false + } + // KEP 4191 explains that multiple filesystems for images and containers is not + // supported at the moment. + // See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4191-split-image-filesystem#comment-on-future-extensions + // for work needed to support multiple filesystems. + if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil { + return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint + } + return false +} diff --git a/pkg/kubelet/container/container_gc_test.go b/pkg/kubelet/container/container_gc_test.go new file mode 100644 index 00000000000..7f30883ce0e --- /dev/null +++ b/pkg/kubelet/container/container_gc_test.go @@ -0,0 +1,96 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package container_test + +import ( + "context" + "reflect" + "testing" + + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" + . "k8s.io/kubernetes/pkg/kubelet/container" + ctest "k8s.io/kubernetes/pkg/kubelet/container/testing" +) + +func TestIsContainerFsSeparateFromImageFs(t *testing.T) { + runtime := &ctest.FakeRuntime{} + fakeSources := ctest.NewFakeReadyProvider() + + gcContainer, err := NewContainerGC(runtime, GCPolicy{}, fakeSources) + if err != nil { + t.Errorf("unexpected error") + } + + cases := []struct { + name string + containerFs []*runtimeapi.FilesystemUsage + imageFs []*runtimeapi.FilesystemUsage + writeableSeparateFromReadOnly bool + }{ + { + name: "Only images", + imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, + writeableSeparateFromReadOnly: false, + }, + { + name: "images and containers", + imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, + containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "container"}}}, + writeableSeparateFromReadOnly: true, + }, + { + name: "same filesystem", + imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, + containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, + writeableSeparateFromReadOnly: false, + }, + + { + name: "Only containers", + containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, + writeableSeparateFromReadOnly: false, + }, + { + name: "neither are specified", + writeableSeparateFromReadOnly: false, + }, + { + name: "both are empty arrays", + writeableSeparateFromReadOnly: false, + containerFs: []*runtimeapi.FilesystemUsage{}, + imageFs: []*runtimeapi.FilesystemUsage{}, + }, + { + name: "FsId does not exist", + writeableSeparateFromReadOnly: false, + containerFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}}, + imageFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}}, + }, + } + + for _, tc := range cases { + runtime.SetContainerFsStats(tc.containerFs) + runtime.SetImageFsStats(tc.imageFs) + actualCommand := gcContainer.IsContainerFsSeparateFromImageFs(context.TODO()) + + if e, a := tc.writeableSeparateFromReadOnly, actualCommand; !reflect.DeepEqual(e, a) { + t.Errorf("%v: unexpected value; expected %v, got %v", tc.name, e, a) + } + runtime.SetContainerFsStats(nil) + runtime.SetImageFsStats(nil) + } +} diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index f6f3930cdb9..9d1b435ab2a 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -252,17 +252,13 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act // build the ranking functions (if not yet known) // TODO: have a function in cadvisor that lets us know if global housekeeping has completed if m.dedicatedImageFs == nil { - hasImageFs, imageFsErr := diskInfoProvider.HasDedicatedImageFs(ctx) - if imageFsErr != nil { - klog.ErrorS(imageFsErr, "Eviction manager: failed to get HasDedicatedImageFs") - return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %w", imageFsErr) + hasImageFs, splitDiskError := diskInfoProvider.HasDedicatedImageFs(ctx) + if splitDiskError != nil { + klog.ErrorS(splitDiskError, "Eviction manager: failed to get HasDedicatedImageFs") + return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %w", splitDiskError) } m.dedicatedImageFs = &hasImageFs - splitContainerImageFs, splitErr := diskInfoProvider.HasDedicatedContainerFs(ctx) - if splitErr != nil { - klog.ErrorS(splitErr, "Eviction manager: failed to get HasDedicatedContainerFs") - return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedContainerFs: %w", splitErr) - } + splitContainerImageFs := m.containerGC.IsContainerFsSeparateFromImageFs(ctx) // If we are a split filesystem but the feature is turned off // we should return an error. diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 2befa71b945..c8993087246 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -66,8 +66,7 @@ func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride // mockDiskInfoProvider is used to simulate testing. type mockDiskInfoProvider struct { - dedicatedImageFs *bool - dedicatedContainerFs *bool + dedicatedImageFs *bool } // HasDedicatedImageFs returns the mocked value @@ -75,18 +74,14 @@ func (m *mockDiskInfoProvider) HasDedicatedImageFs(_ context.Context) (bool, err return ptr.Deref(m.dedicatedImageFs, false), nil } -// HasDedicatedContainerFs returns the mocked value -func (m *mockDiskInfoProvider) HasDedicatedContainerFs(_ context.Context) (bool, error) { - return ptr.Deref(m.dedicatedContainerFs, false), nil -} - // mockDiskGC is used to simulate invoking image and container garbage collection. type mockDiskGC struct { - err error - imageGCInvoked bool - containerGCInvoked bool - fakeSummaryProvider *fakeSummaryProvider - summaryAfterGC *statsapi.Summary + err error + imageGCInvoked bool + containerGCInvoked bool + readAndWriteSeparate bool + fakeSummaryProvider *fakeSummaryProvider + summaryAfterGC *statsapi.Summary } // DeleteUnusedImages returns the mocked values. @@ -107,6 +102,10 @@ func (m *mockDiskGC) DeleteAllUnusedContainers(_ context.Context) error { return m.err } +func (m *mockDiskGC) IsContainerFsSeparateFromImageFs(_ context.Context) bool { + return m.readAndWriteSeparate +} + func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) { pod := newPod(name, priority, []v1.Container{ newContainer(name, requests, limits), @@ -385,7 +384,7 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false), dedicatedContainerFs: ptr.To(false)} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} diskGC := &mockDiskGC{err: nil} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} @@ -563,8 +562,8 @@ func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} - diskGC := &mockDiskGC{err: nil} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} + diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -1308,8 +1307,8 @@ func TestDiskPressureNodeFs(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} - diskGC := &mockDiskGC{err: nil} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} + diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -1830,7 +1829,7 @@ func TestNodeReclaimFuncs(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -1847,7 +1846,7 @@ func TestNodeReclaimFuncs(t *testing.T) { // This is a constant that we use to test that disk pressure is over. Don't change! diskStatConst := diskStatStart summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)} - diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil} + diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, @@ -2293,8 +2292,8 @@ func TestInodePressureFsInodes(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} - diskGC := &mockDiskGC{err: nil} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} + diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ diff --git a/pkg/kubelet/eviction/types.go b/pkg/kubelet/eviction/types.go index 83cdfcdf853..0c3f8a39691 100644 --- a/pkg/kubelet/eviction/types.go +++ b/pkg/kubelet/eviction/types.go @@ -75,8 +75,6 @@ type Manager interface { type DiskInfoProvider interface { // HasDedicatedImageFs returns true if the imagefs is on a separate device from the rootfs. HasDedicatedImageFs(ctx context.Context) (bool, error) - // HasDedicatedContainerFs returns true if the container fs is on a separate device from the rootfs. - HasDedicatedContainerFs(ctx context.Context) (bool, error) } // ImageGC is responsible for performing garbage collection of unused images. @@ -89,6 +87,8 @@ type ImageGC interface { type ContainerGC interface { // DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted. DeleteAllUnusedContainers(ctx context.Context) error + // IsContainerFsSeparateFromImageFs checks if container filesystem is split from image filesystem. + IsContainerFsSeparateFromImageFs(ctx context.Context) bool } // KillPodFunc kills a pod. diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 9949a6d9b73..e4a5d00c55b 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -276,11 +276,11 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *s if imageStats == nil || len(imageStats.ImageFilesystems) == 0 || len(imageStats.ContainerFilesystems) == 0 { return nil, nil, fmt.Errorf("missing image stats: %+v", imageStats) } - splitFileSystem := false imageFs := imageStats.ImageFilesystems[0] containerFs := imageStats.ContainerFilesystems[0] if imageFs.FsId != nil && containerFs.FsId != nil && imageFs.FsId.Mountpoint != containerFs.FsId.Mountpoint { + klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageFs, "ContainerFilesystems", containerFs) splitFileSystem = true } var imageFsInodesUsed *uint64 @@ -312,12 +312,6 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *s if err != nil { return nil, nil, fmt.Errorf("failed to get container fs info: %w", err) } - // ImageFs and ContainerFs could be on different paths on the same device. - if containerFsInfo.Device == imageFsInfo.Device { - return fsStats, fsStats, nil - } - - klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageStats.ImageFilesystems[0], "ContainerFilesystems", imageStats.ContainerFilesystems[0]) var containerFsInodesUsed *uint64 if containerFsInfo.Inodes != nil && containerFsInfo.InodesFree != nil { diff --git a/pkg/kubelet/stats/cadvisor_stats_provider_test.go b/pkg/kubelet/stats/cadvisor_stats_provider_test.go index 54110111f04..4cf988f4fe1 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider_test.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider_test.go @@ -680,7 +680,7 @@ func TestCadvisorSplitImagesFsStats(t *testing.T) { mockRuntime = containertest.NewMockRuntime(t) seed = 1000 - imageFsInfo = getTestFsInfoWithDifferentMount(seed, "image") + imageFsInfo = getTestFsInfo(seed) containerSeed = 1001 containerFsInfo = getTestFsInfo(containerSeed) ) @@ -723,59 +723,7 @@ func TestCadvisorSplitImagesFsStats(t *testing.T) { assert.Equal(containerFsInfo.InodesFree, containerfs.InodesFree) assert.Equal(containerFsInfo.Inodes, containerfs.Inodes) assert.Equal(*containerFsInfo.Inodes-*containerFsInfo.InodesFree, *containerfs.InodesUsed) -} -func TestCadvisorSameDiskDifferentLocations(t *testing.T) { - ctx := context.Background() - var ( - assert = assert.New(t) - mockCadvisor = cadvisortest.NewMockInterface(t) - mockRuntime = containertest.NewMockRuntime(t) - - seed = 1000 - imageFsInfo = getTestFsInfo(seed) - containerSeed = 1001 - containerFsInfo = getTestFsInfo(containerSeed) - ) - imageFsInfoCRI := &runtimeapi.FilesystemUsage{ - Timestamp: imageFsInfo.Timestamp.Unix(), - FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "images"}, - UsedBytes: &runtimeapi.UInt64Value{Value: imageFsInfo.Usage}, - InodesUsed: &runtimeapi.UInt64Value{Value: *imageFsInfo.Inodes}, - } - containerFsInfoCRI := &runtimeapi.FilesystemUsage{ - Timestamp: containerFsInfo.Timestamp.Unix(), - FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "containers"}, - UsedBytes: &runtimeapi.UInt64Value{Value: containerFsInfo.Usage}, - InodesUsed: &runtimeapi.UInt64Value{Value: *containerFsInfo.Inodes}, - } - imageFsInfoResponse := &runtimeapi.ImageFsInfoResponse{ - ImageFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI}, - ContainerFilesystems: []*runtimeapi.FilesystemUsage{containerFsInfoCRI}, - } - - mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil) - mockCadvisor.EXPECT().ContainerFsInfo().Return(containerFsInfo, nil) - mockRuntime.EXPECT().ImageFsInfo(ctx).Return(imageFsInfoResponse, nil) - featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, true) - - provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider()) - stats, containerfs, err := provider.ImageFsStats(ctx) - require.NoError(t, err, "imageFsStats should have no error") - - assert.Equal(imageFsInfo.Timestamp, stats.Time.Time) - assert.Equal(imageFsInfo.Available, *stats.AvailableBytes) - assert.Equal(imageFsInfo.Capacity, *stats.CapacityBytes) - assert.Equal(imageFsInfo.InodesFree, stats.InodesFree) - assert.Equal(imageFsInfo.Inodes, stats.Inodes) - assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed) - - assert.Equal(imageFsInfo.Timestamp, containerfs.Time.Time) - assert.Equal(imageFsInfo.Available, *containerfs.AvailableBytes) - assert.Equal(imageFsInfo.Capacity, *containerfs.CapacityBytes) - assert.Equal(imageFsInfo.InodesFree, containerfs.InodesFree) - assert.Equal(imageFsInfo.Inodes, containerfs.Inodes) - assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *containerfs.InodesUsed) } func TestCadvisorListPodStatsWhenContainerLogFound(t *testing.T) { diff --git a/pkg/kubelet/stats/provider.go b/pkg/kubelet/stats/provider.go index 05b889aabf3..4b747c42a0c 100644 --- a/pkg/kubelet/stats/provider.go +++ b/pkg/kubelet/stats/provider.go @@ -197,20 +197,6 @@ func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) { return device != rootFsInfo.Device, nil } -// HasDedicatedImageFs returns true if a dedicated image filesystem exists for storing images. -// KEP Issue Number 4191: Enhanced this to allow for the containers to be separate from images. -func (p *Provider) HasDedicatedContainerFs(ctx context.Context) (bool, error) { - imageFs, err := p.cadvisor.ImagesFsInfo() - if err != nil { - return false, err - } - containerFs, err := p.cadvisor.ContainerFsInfo() - if err != nil { - return false, err - } - return imageFs.Device != containerFs.Device, nil -} - func equalFileSystems(a, b *statsapi.FsStats) bool { if a == nil || b == nil { return false diff --git a/pkg/kubelet/stats/provider_test.go b/pkg/kubelet/stats/provider_test.go index 978537fe106..4ab66e33ae7 100644 --- a/pkg/kubelet/stats/provider_test.go +++ b/pkg/kubelet/stats/provider_test.go @@ -347,23 +347,6 @@ func getTestFsInfo(seed int) cadvisorapiv2.FsInfo { } } -func getTestFsInfoWithDifferentMount(seed int, device string) cadvisorapiv2.FsInfo { - var ( - inodes = uint64(seed + offsetFsInodes) - inodesFree = uint64(seed + offsetFsInodesFree) - ) - return cadvisorapiv2.FsInfo{ - Timestamp: time.Now(), - Device: device, - Mountpoint: "test-mount-point", - Capacity: uint64(seed + offsetFsCapacity), - Available: uint64(seed + offsetFsAvailable), - Usage: uint64(seed + offsetFsUsage), - Inodes: &inodes, - InodesFree: &inodesFree, - } -} - func getPodVolumeStats(seed int, volumeName string) statsapi.VolumeStats { availableBytes := uint64(seed + offsetFsAvailable) capacityBytes := uint64(seed + offsetFsCapacity)