diff --git a/pkg/kubelet/container/container_gc.go b/pkg/kubelet/container/container_gc.go index ae1c6eaaeea..b0a25d50058 100644 --- a/pkg/kubelet/container/container_gc.go +++ b/pkg/kubelet/container/container_gc.go @@ -45,8 +45,6 @@ type GC interface { GarbageCollect(ctx context.Context) error // Deletes all unused containers, including containers belonging to pods that are terminated but not deleted DeleteAllUnusedContainers(ctx context.Context) error - // IsContainerFsSeparateFromImageFs tells if writeable layer and read-only layer are separate. - IsContainerFsSeparateFromImageFs(ctx context.Context) bool } // SourcesReadyProvider knows how to determine if configuration sources are ready @@ -88,22 +86,3 @@ func (cgc *realContainerGC) DeleteAllUnusedContainers(ctx context.Context) error klog.InfoS("Attempting to delete unused containers") return cgc.runtime.GarbageCollect(ctx, cgc.policy, cgc.sourcesReadyProvider.AllReady(), true) } - -func (cgc *realContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool { - resp, err := cgc.runtime.ImageFsInfo(ctx) - if err != nil { - return false - } - // These fields can be empty if CRI implementation didn't populate. - if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 { - return false - } - // KEP 4191 explains that multiple filesystems for images and containers is not - // supported at the moment. - // See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4191-split-image-filesystem#comment-on-future-extensions - // for work needed to support multiple filesystems. - if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil { - return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint - } - return false -} diff --git a/pkg/kubelet/container/container_gc_test.go b/pkg/kubelet/container/container_gc_test.go deleted file mode 100644 index 7f30883ce0e..00000000000 --- a/pkg/kubelet/container/container_gc_test.go +++ /dev/null @@ -1,96 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package container_test - -import ( - "context" - "reflect" - "testing" - - runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" - . "k8s.io/kubernetes/pkg/kubelet/container" - ctest "k8s.io/kubernetes/pkg/kubelet/container/testing" -) - -func TestIsContainerFsSeparateFromImageFs(t *testing.T) { - runtime := &ctest.FakeRuntime{} - fakeSources := ctest.NewFakeReadyProvider() - - gcContainer, err := NewContainerGC(runtime, GCPolicy{}, fakeSources) - if err != nil { - t.Errorf("unexpected error") - } - - cases := []struct { - name string - containerFs []*runtimeapi.FilesystemUsage - imageFs []*runtimeapi.FilesystemUsage - writeableSeparateFromReadOnly bool - }{ - { - name: "Only images", - imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, - writeableSeparateFromReadOnly: false, - }, - { - name: "images and containers", - imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, - containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "container"}}}, - writeableSeparateFromReadOnly: true, - }, - { - name: "same filesystem", - imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, - containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, - writeableSeparateFromReadOnly: false, - }, - - { - name: "Only containers", - containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}}, - writeableSeparateFromReadOnly: false, - }, - { - name: "neither are specified", - writeableSeparateFromReadOnly: false, - }, - { - name: "both are empty arrays", - writeableSeparateFromReadOnly: false, - containerFs: []*runtimeapi.FilesystemUsage{}, - imageFs: []*runtimeapi.FilesystemUsage{}, - }, - { - name: "FsId does not exist", - writeableSeparateFromReadOnly: false, - containerFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}}, - imageFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}}, - }, - } - - for _, tc := range cases { - runtime.SetContainerFsStats(tc.containerFs) - runtime.SetImageFsStats(tc.imageFs) - actualCommand := gcContainer.IsContainerFsSeparateFromImageFs(context.TODO()) - - if e, a := tc.writeableSeparateFromReadOnly, actualCommand; !reflect.DeepEqual(e, a) { - t.Errorf("%v: unexpected value; expected %v, got %v", tc.name, e, a) - } - runtime.SetContainerFsStats(nil) - runtime.SetImageFsStats(nil) - } -} diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 00df1a773c6..a131fa3c43c 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -252,13 +252,17 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act // build the ranking functions (if not yet known) // TODO: have a function in cadvisor that lets us know if global housekeeping has completed if m.dedicatedImageFs == nil { - hasImageFs, splitDiskError := diskInfoProvider.HasDedicatedImageFs(ctx) - if splitDiskError != nil { - klog.ErrorS(splitDiskError, "Eviction manager: failed to get HasDedicatedImageFs") - return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %v", splitDiskError) + hasImageFs, imageFsErr := diskInfoProvider.HasDedicatedImageFs(ctx) + if imageFsErr != nil { + klog.ErrorS(imageFsErr, "Eviction manager: failed to get HasDedicatedImageFs") + return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %w", imageFsErr) } m.dedicatedImageFs = &hasImageFs - splitContainerImageFs := m.containerGC.IsContainerFsSeparateFromImageFs(ctx) + splitContainerImageFs, splitErr := diskInfoProvider.HasDedicatedContainerFs(ctx) + if splitErr != nil { + klog.ErrorS(splitErr, "Eviction manager: failed to get HasDedicatedContainerFs") + return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedContainerFs: %w", splitErr) + } // If we are a split filesystem but the feature is turned off // we should return an error. diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index c8993087246..2befa71b945 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -66,7 +66,8 @@ func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride // mockDiskInfoProvider is used to simulate testing. type mockDiskInfoProvider struct { - dedicatedImageFs *bool + dedicatedImageFs *bool + dedicatedContainerFs *bool } // HasDedicatedImageFs returns the mocked value @@ -74,14 +75,18 @@ func (m *mockDiskInfoProvider) HasDedicatedImageFs(_ context.Context) (bool, err return ptr.Deref(m.dedicatedImageFs, false), nil } +// HasDedicatedContainerFs returns the mocked value +func (m *mockDiskInfoProvider) HasDedicatedContainerFs(_ context.Context) (bool, error) { + return ptr.Deref(m.dedicatedContainerFs, false), nil +} + // mockDiskGC is used to simulate invoking image and container garbage collection. type mockDiskGC struct { - err error - imageGCInvoked bool - containerGCInvoked bool - readAndWriteSeparate bool - fakeSummaryProvider *fakeSummaryProvider - summaryAfterGC *statsapi.Summary + err error + imageGCInvoked bool + containerGCInvoked bool + fakeSummaryProvider *fakeSummaryProvider + summaryAfterGC *statsapi.Summary } // DeleteUnusedImages returns the mocked values. @@ -102,10 +107,6 @@ func (m *mockDiskGC) DeleteAllUnusedContainers(_ context.Context) error { return m.err } -func (m *mockDiskGC) IsContainerFsSeparateFromImageFs(_ context.Context) bool { - return m.readAndWriteSeparate -} - func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) { pod := newPod(name, priority, []v1.Container{ newContainer(name, requests, limits), @@ -384,7 +385,7 @@ func TestPIDPressure_VerifyPodStatus(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false), dedicatedContainerFs: ptr.To(false)} diskGC := &mockDiskGC{err: nil} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} @@ -562,8 +563,8 @@ func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} - diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} + diskGC := &mockDiskGC{err: nil} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -1307,8 +1308,8 @@ func TestDiskPressureNodeFs(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} - diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} + diskGC := &mockDiskGC{err: nil} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -1829,7 +1830,7 @@ func TestNodeReclaimFuncs(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ @@ -1846,7 +1847,7 @@ func TestNodeReclaimFuncs(t *testing.T) { // This is a constant that we use to test that disk pressure is over. Don't change! diskStatConst := diskStatStart summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)} - diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} + diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil} manager := &managerImpl{ clock: fakeClock, killPodFunc: podKiller.killPodNow, @@ -2292,8 +2293,8 @@ func TestInodePressureFsInodes(t *testing.T) { fakeClock := testingclock.NewFakeClock(time.Now()) podKiller := &mockPodKiller{} - diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs} - diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly} + diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs, dedicatedContainerFs: &tc.writeableSeparateFromReadOnly} + diskGC := &mockDiskGC{err: nil} nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""} config := Config{ diff --git a/pkg/kubelet/eviction/types.go b/pkg/kubelet/eviction/types.go index 0c3f8a39691..83cdfcdf853 100644 --- a/pkg/kubelet/eviction/types.go +++ b/pkg/kubelet/eviction/types.go @@ -75,6 +75,8 @@ type Manager interface { type DiskInfoProvider interface { // HasDedicatedImageFs returns true if the imagefs is on a separate device from the rootfs. HasDedicatedImageFs(ctx context.Context) (bool, error) + // HasDedicatedContainerFs returns true if the container fs is on a separate device from the rootfs. + HasDedicatedContainerFs(ctx context.Context) (bool, error) } // ImageGC is responsible for performing garbage collection of unused images. @@ -87,8 +89,6 @@ type ImageGC interface { type ContainerGC interface { // DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted. DeleteAllUnusedContainers(ctx context.Context) error - // IsContainerFsSeparateFromImageFs checks if container filesystem is split from image filesystem. - IsContainerFsSeparateFromImageFs(ctx context.Context) bool } // KillPodFunc kills a pod. diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index e4a5d00c55b..9949a6d9b73 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -276,11 +276,11 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *s if imageStats == nil || len(imageStats.ImageFilesystems) == 0 || len(imageStats.ContainerFilesystems) == 0 { return nil, nil, fmt.Errorf("missing image stats: %+v", imageStats) } + splitFileSystem := false imageFs := imageStats.ImageFilesystems[0] containerFs := imageStats.ContainerFilesystems[0] if imageFs.FsId != nil && containerFs.FsId != nil && imageFs.FsId.Mountpoint != containerFs.FsId.Mountpoint { - klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageFs, "ContainerFilesystems", containerFs) splitFileSystem = true } var imageFsInodesUsed *uint64 @@ -312,6 +312,12 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *s if err != nil { return nil, nil, fmt.Errorf("failed to get container fs info: %w", err) } + // ImageFs and ContainerFs could be on different paths on the same device. + if containerFsInfo.Device == imageFsInfo.Device { + return fsStats, fsStats, nil + } + + klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageStats.ImageFilesystems[0], "ContainerFilesystems", imageStats.ContainerFilesystems[0]) var containerFsInodesUsed *uint64 if containerFsInfo.Inodes != nil && containerFsInfo.InodesFree != nil { diff --git a/pkg/kubelet/stats/cadvisor_stats_provider_test.go b/pkg/kubelet/stats/cadvisor_stats_provider_test.go index 4cf988f4fe1..54110111f04 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider_test.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider_test.go @@ -680,7 +680,7 @@ func TestCadvisorSplitImagesFsStats(t *testing.T) { mockRuntime = containertest.NewMockRuntime(t) seed = 1000 - imageFsInfo = getTestFsInfo(seed) + imageFsInfo = getTestFsInfoWithDifferentMount(seed, "image") containerSeed = 1001 containerFsInfo = getTestFsInfo(containerSeed) ) @@ -723,7 +723,59 @@ func TestCadvisorSplitImagesFsStats(t *testing.T) { assert.Equal(containerFsInfo.InodesFree, containerfs.InodesFree) assert.Equal(containerFsInfo.Inodes, containerfs.Inodes) assert.Equal(*containerFsInfo.Inodes-*containerFsInfo.InodesFree, *containerfs.InodesUsed) +} +func TestCadvisorSameDiskDifferentLocations(t *testing.T) { + ctx := context.Background() + var ( + assert = assert.New(t) + mockCadvisor = cadvisortest.NewMockInterface(t) + mockRuntime = containertest.NewMockRuntime(t) + + seed = 1000 + imageFsInfo = getTestFsInfo(seed) + containerSeed = 1001 + containerFsInfo = getTestFsInfo(containerSeed) + ) + imageFsInfoCRI := &runtimeapi.FilesystemUsage{ + Timestamp: imageFsInfo.Timestamp.Unix(), + FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "images"}, + UsedBytes: &runtimeapi.UInt64Value{Value: imageFsInfo.Usage}, + InodesUsed: &runtimeapi.UInt64Value{Value: *imageFsInfo.Inodes}, + } + containerFsInfoCRI := &runtimeapi.FilesystemUsage{ + Timestamp: containerFsInfo.Timestamp.Unix(), + FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "containers"}, + UsedBytes: &runtimeapi.UInt64Value{Value: containerFsInfo.Usage}, + InodesUsed: &runtimeapi.UInt64Value{Value: *containerFsInfo.Inodes}, + } + imageFsInfoResponse := &runtimeapi.ImageFsInfoResponse{ + ImageFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI}, + ContainerFilesystems: []*runtimeapi.FilesystemUsage{containerFsInfoCRI}, + } + + mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil) + mockCadvisor.EXPECT().ContainerFsInfo().Return(containerFsInfo, nil) + mockRuntime.EXPECT().ImageFsInfo(ctx).Return(imageFsInfoResponse, nil) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, true) + + provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider()) + stats, containerfs, err := provider.ImageFsStats(ctx) + require.NoError(t, err, "imageFsStats should have no error") + + assert.Equal(imageFsInfo.Timestamp, stats.Time.Time) + assert.Equal(imageFsInfo.Available, *stats.AvailableBytes) + assert.Equal(imageFsInfo.Capacity, *stats.CapacityBytes) + assert.Equal(imageFsInfo.InodesFree, stats.InodesFree) + assert.Equal(imageFsInfo.Inodes, stats.Inodes) + assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed) + + assert.Equal(imageFsInfo.Timestamp, containerfs.Time.Time) + assert.Equal(imageFsInfo.Available, *containerfs.AvailableBytes) + assert.Equal(imageFsInfo.Capacity, *containerfs.CapacityBytes) + assert.Equal(imageFsInfo.InodesFree, containerfs.InodesFree) + assert.Equal(imageFsInfo.Inodes, containerfs.Inodes) + assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *containerfs.InodesUsed) } func TestCadvisorListPodStatsWhenContainerLogFound(t *testing.T) { diff --git a/pkg/kubelet/stats/provider.go b/pkg/kubelet/stats/provider.go index 4b747c42a0c..05b889aabf3 100644 --- a/pkg/kubelet/stats/provider.go +++ b/pkg/kubelet/stats/provider.go @@ -197,6 +197,20 @@ func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) { return device != rootFsInfo.Device, nil } +// HasDedicatedImageFs returns true if a dedicated image filesystem exists for storing images. +// KEP Issue Number 4191: Enhanced this to allow for the containers to be separate from images. +func (p *Provider) HasDedicatedContainerFs(ctx context.Context) (bool, error) { + imageFs, err := p.cadvisor.ImagesFsInfo() + if err != nil { + return false, err + } + containerFs, err := p.cadvisor.ContainerFsInfo() + if err != nil { + return false, err + } + return imageFs.Device != containerFs.Device, nil +} + func equalFileSystems(a, b *statsapi.FsStats) bool { if a == nil || b == nil { return false diff --git a/pkg/kubelet/stats/provider_test.go b/pkg/kubelet/stats/provider_test.go index 4ab66e33ae7..978537fe106 100644 --- a/pkg/kubelet/stats/provider_test.go +++ b/pkg/kubelet/stats/provider_test.go @@ -347,6 +347,23 @@ func getTestFsInfo(seed int) cadvisorapiv2.FsInfo { } } +func getTestFsInfoWithDifferentMount(seed int, device string) cadvisorapiv2.FsInfo { + var ( + inodes = uint64(seed + offsetFsInodes) + inodesFree = uint64(seed + offsetFsInodesFree) + ) + return cadvisorapiv2.FsInfo{ + Timestamp: time.Now(), + Device: device, + Mountpoint: "test-mount-point", + Capacity: uint64(seed + offsetFsCapacity), + Available: uint64(seed + offsetFsAvailable), + Usage: uint64(seed + offsetFsUsage), + Inodes: &inodes, + InodesFree: &inodesFree, + } +} + func getPodVolumeStats(seed int, volumeName string) statsapi.VolumeStats { availableBytes := uint64(seed + offsetFsAvailable) capacityBytes := uint64(seed + offsetFsCapacity)