Merge pull request #120616 from kannon92/kubelet-disk-api-changes

Kubelet disk api changes
This commit is contained in:
Kubernetes Prow Robot 2023-11-02 16:11:20 +01:00 committed by GitHub
commit 6c64593ba1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 3455 additions and 686 deletions

View File

@ -454,6 +454,12 @@ const (
// Enable POD resources API to return allocatable resources // Enable POD resources API to return allocatable resources
KubeletPodResourcesGetAllocatable featuregate.Feature = "KubeletPodResourcesGetAllocatable" KubeletPodResourcesGetAllocatable featuregate.Feature = "KubeletPodResourcesGetAllocatable"
// KubeletSeparateDiskGC enables Kubelet to garbage collection images/containers on different filesystems
// owner: @kannon92
// kep: https://kep.k8s.io/4191
// alpha: v1.29
KubeletSeparateDiskGC featuregate.Feature = "KubeletSeparateDiskGC"
// owner: @sallyom // owner: @sallyom
// kep: https://kep.k8s.io/2832 // kep: https://kep.k8s.io/2832
// alpha: v1.25 // alpha: v1.25
@ -1088,6 +1094,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
KubeletPodResourcesGetAllocatable: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.28, remove in 1.30 KubeletPodResourcesGetAllocatable: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.28, remove in 1.30
KubeletSeparateDiskGC: {Default: false, PreRelease: featuregate.Alpha},
KubeletTracing: {Default: true, PreRelease: featuregate.Beta}, KubeletTracing: {Default: true, PreRelease: featuregate.Beta},
KubeProxyDrainingTerminatingNodes: {Default: false, PreRelease: featuregate.Alpha}, KubeProxyDrainingTerminatingNodes: {Default: false, PreRelease: featuregate.Alpha},

View File

@ -186,6 +186,14 @@ func (cc *cadvisorClient) getFsInfo(label string) (cadvisorapiv2.FsInfo, error)
return res[0], nil return res[0], nil
} }
func (cc *cadvisorClient) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
label, err := cc.imageFsInfoProvider.ContainerFsInfoLabel()
if err != nil {
return cadvisorapiv2.FsInfo{}, err
}
return cc.getFsInfo(label)
}
func (cc *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) { func (cc *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return cc.WatchForEvents(request) return cc.WatchForEvents(request)
} }

View File

@ -57,3 +57,32 @@ func TestImageFsInfoLabel(t *testing.T) {
}) })
} }
} }
func TestContainerFsInfoLabel(t *testing.T) {
testcases := []struct {
description string
runtime string
runtimeEndpoint string
expectedLabel string
expectedError error
}{{
description: "LabelCrioWriteableImages should be returned",
runtimeEndpoint: crio.CrioSocket,
expectedLabel: LabelCrioContainers,
expectedError: nil,
}, {
description: "Cannot find valid imagefs label",
runtimeEndpoint: "",
expectedLabel: "",
expectedError: fmt.Errorf("no containerfs label for configured runtime"),
}}
for _, tc := range testcases {
t.Run(tc.description, func(t *testing.T) {
infoProvider := NewImageFsInfoProvider(tc.runtimeEndpoint)
label, err := infoProvider.ContainerFsInfoLabel()
assert.Equal(t, tc.expectedLabel, label)
assert.Equal(t, tc.expectedError, err)
})
}
}

View File

@ -79,6 +79,10 @@ func (cu *cadvisorUnsupported) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, errUnsupported return cadvisorapiv2.FsInfo{}, errUnsupported
} }
func (cu *cadvisorUnsupported) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, errUnsupported
}
func (cu *cadvisorUnsupported) WatchEvents(request *events.Request) (*events.EventChannel, error) { func (cu *cadvisorUnsupported) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return nil, errUnsupported return nil, errUnsupported
} }

View File

@ -79,6 +79,10 @@ func (cu *cadvisorClient) ImagesFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil return cadvisorapiv2.FsInfo{}, nil
} }
func (cu *cadvisorClient) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}
func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) { func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cu.GetDirFsInfo(cu.rootPath) return cu.GetDirFsInfo(cu.rootPath)
} }

View File

@ -26,6 +26,11 @@ import (
cadvisorfs "github.com/google/cadvisor/fs" cadvisorfs "github.com/google/cadvisor/fs"
) )
// LabelCrioContainers is a label to allow for cadvisor to track writeable layers
// separately from read-only layers.
// Once CAdvisor upstream changes are merged, we should remove this constant
const LabelCrioContainers string = "crio-containers"
// imageFsInfoProvider knows how to translate the configured runtime // imageFsInfoProvider knows how to translate the configured runtime
// to its file system label for images. // to its file system label for images.
type imageFsInfoProvider struct { type imageFsInfoProvider struct {
@ -35,15 +40,28 @@ type imageFsInfoProvider struct {
// ImageFsInfoLabel returns the image fs label for the configured runtime. // ImageFsInfoLabel returns the image fs label for the configured runtime.
// For remote runtimes, it handles additional runtimes natively understood by cAdvisor. // For remote runtimes, it handles additional runtimes natively understood by cAdvisor.
func (i *imageFsInfoProvider) ImageFsInfoLabel() (string, error) { func (i *imageFsInfoProvider) ImageFsInfoLabel() (string, error) {
// This is a temporary workaround to get stats for cri-o from cadvisor if detectCrioWorkaround(i) {
// and should be removed.
// Related to https://github.com/kubernetes/kubernetes/issues/51798
if strings.HasSuffix(i.runtimeEndpoint, CrioSocketSuffix) {
return cadvisorfs.LabelCrioImages, nil return cadvisorfs.LabelCrioImages, nil
} }
return "", fmt.Errorf("no imagefs label for configured runtime") return "", fmt.Errorf("no imagefs label for configured runtime")
} }
// ContainerFsInfoLabel returns the container fs label for the configured runtime.
// For remote runtimes, it handles addition runtimes natively understood by cAdvisor.
func (i *imageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
if detectCrioWorkaround(i) {
return LabelCrioContainers, nil
}
return "", fmt.Errorf("no containerfs label for configured runtime")
}
// This is a temporary workaround to get stats for cri-o from cadvisor
// and should be removed.
// Related to https://github.com/kubernetes/kubernetes/issues/51798
func detectCrioWorkaround(i *imageFsInfoProvider) bool {
return strings.HasSuffix(i.runtimeEndpoint, CrioSocketSuffix)
}
// NewImageFsInfoProvider returns a provider for the specified runtime configuration. // NewImageFsInfoProvider returns a provider for the specified runtime configuration.
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider { func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
return &imageFsInfoProvider{runtimeEndpoint: runtimeEndpoint} return &imageFsInfoProvider{runtimeEndpoint: runtimeEndpoint}

View File

@ -29,6 +29,10 @@ func (i *unsupportedImageFsInfoProvider) ImageFsInfoLabel() (string, error) {
return "", errors.New("unsupported") return "", errors.New("unsupported")
} }
func (i *unsupportedImageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
return "", errors.New("unsupported")
}
// NewImageFsInfoProvider returns a provider for the specified runtime configuration. // NewImageFsInfoProvider returns a provider for the specified runtime configuration.
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider { func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
return &unsupportedImageFsInfoProvider{} return &unsupportedImageFsInfoProvider{}

View File

@ -101,6 +101,11 @@ func (c *Fake) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil return cadvisorapiv2.FsInfo{}, nil
} }
// ContainerFsInfo is a fake implementation of Interface.ContainerFsInfo.
func (c *Fake) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}
// WatchEvents is a fake implementation of Interface.WatchEvents. // WatchEvents is a fake implementation of Interface.WatchEvents.
func (c *Fake) WatchEvents(request *events.Request) (*events.EventChannel, error) { func (c *Fake) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return new(events.EventChannel), nil return new(events.EventChannel), nil

View File

@ -52,6 +52,21 @@ func (m *MockInterface) EXPECT() *MockInterfaceMockRecorder {
return m.recorder return m.recorder
} }
// ContainerFsInfo mocks base method.
func (m *MockInterface) ContainerFsInfo() (v2.FsInfo, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ContainerFsInfo")
ret0, _ := ret[0].(v2.FsInfo)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ContainerFsInfo indicates an expected call of ContainerFsInfo.
func (mr *MockInterfaceMockRecorder) ContainerFsInfo() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ContainerFsInfo", reflect.TypeOf((*MockInterface)(nil).ContainerFsInfo))
}
// ContainerInfo mocks base method. // ContainerInfo mocks base method.
func (m *MockInterface) ContainerInfo(name string, req *v1.ContainerInfoRequest) (*v1.ContainerInfo, error) { func (m *MockInterface) ContainerInfo(name string, req *v1.ContainerInfoRequest) (*v1.ContainerInfo, error) {
m.ctrl.T.Helper() m.ctrl.T.Helper()
@ -254,6 +269,21 @@ func (m *MockImageFsInfoProvider) EXPECT() *MockImageFsInfoProviderMockRecorder
return m.recorder return m.recorder
} }
// ContainerFsInfoLabel mocks base method.
func (m *MockImageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ContainerFsInfoLabel")
ret0, _ := ret[0].(string)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ContainerFsInfoLabel indicates an expected call of ContainerFsInfoLabel.
func (mr *MockImageFsInfoProviderMockRecorder) ContainerFsInfoLabel() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ContainerFsInfoLabel", reflect.TypeOf((*MockImageFsInfoProvider)(nil).ContainerFsInfoLabel))
}
// ImageFsInfoLabel mocks base method. // ImageFsInfoLabel mocks base method.
func (m *MockImageFsInfoProvider) ImageFsInfoLabel() (string, error) { func (m *MockImageFsInfoProvider) ImageFsInfoLabel() (string, error) {
m.ctrl.T.Helper() m.ctrl.T.Helper()

View File

@ -41,6 +41,10 @@ type Interface interface {
// Returns usage information about the root filesystem. // Returns usage information about the root filesystem.
RootFsInfo() (cadvisorapiv2.FsInfo, error) RootFsInfo() (cadvisorapiv2.FsInfo, error)
// Returns usage information about the writeable layer.
// KEP 4191 can separate the image filesystem
ContainerFsInfo() (cadvisorapiv2.FsInfo, error)
// Get events streamed through passedChannel that fit the request. // Get events streamed through passedChannel that fit the request.
WatchEvents(request *events.Request) (*events.EventChannel, error) WatchEvents(request *events.Request) (*events.EventChannel, error)
@ -52,4 +56,6 @@ type Interface interface {
type ImageFsInfoProvider interface { type ImageFsInfoProvider interface {
// ImageFsInfoLabel returns the label cAdvisor should use to find the filesystem holding container images. // ImageFsInfoLabel returns the label cAdvisor should use to find the filesystem holding container images.
ImageFsInfoLabel() (string, error) ImageFsInfoLabel() (string, error)
// In split image filesystem this will be different from ImageFsInfoLabel
ContainerFsInfoLabel() (string, error)
} }

View File

@ -45,6 +45,8 @@ type GC interface {
GarbageCollect(ctx context.Context) error GarbageCollect(ctx context.Context) error
// Deletes all unused containers, including containers belonging to pods that are terminated but not deleted // Deletes all unused containers, including containers belonging to pods that are terminated but not deleted
DeleteAllUnusedContainers(ctx context.Context) error DeleteAllUnusedContainers(ctx context.Context) error
// IsContainerFsSeparateFromImageFs tells if writeable layer and read-only layer are separate.
IsContainerFsSeparateFromImageFs(ctx context.Context) bool
} }
// SourcesReadyProvider knows how to determine if configuration sources are ready // SourcesReadyProvider knows how to determine if configuration sources are ready
@ -86,3 +88,22 @@ func (cgc *realContainerGC) DeleteAllUnusedContainers(ctx context.Context) error
klog.InfoS("Attempting to delete unused containers") klog.InfoS("Attempting to delete unused containers")
return cgc.runtime.GarbageCollect(ctx, cgc.policy, cgc.sourcesReadyProvider.AllReady(), true) return cgc.runtime.GarbageCollect(ctx, cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
} }
func (cgc *realContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool {
resp, err := cgc.runtime.ImageFsInfo(ctx)
if err != nil {
return false
}
// These fields can be empty if CRI implementation didn't populate.
if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 {
return false
}
// KEP 4191 explains that multiple filesystems for images and containers is not
// supported at the moment.
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4191-split-image-filesystem#comment-on-future-extensions
// for work needed to support multiple filesystems.
if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil {
return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint
}
return false
}

View File

@ -0,0 +1,96 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package container_test
import (
"context"
"reflect"
"testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
. "k8s.io/kubernetes/pkg/kubelet/container"
ctest "k8s.io/kubernetes/pkg/kubelet/container/testing"
)
func TestIsContainerFsSeparateFromImageFs(t *testing.T) {
runtime := &ctest.FakeRuntime{}
fakeSources := ctest.NewFakeReadyProvider()
gcContainer, err := NewContainerGC(runtime, GCPolicy{}, fakeSources)
if err != nil {
t.Errorf("unexpected error")
}
cases := []struct {
name string
containerFs []*runtimeapi.FilesystemUsage
imageFs []*runtimeapi.FilesystemUsage
writeableSeparateFromReadOnly bool
}{
{
name: "Only images",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "images and containers",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "container"}}},
writeableSeparateFromReadOnly: true,
},
{
name: "same filesystem",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "Only containers",
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "neither are specified",
writeableSeparateFromReadOnly: false,
},
{
name: "both are empty arrays",
writeableSeparateFromReadOnly: false,
containerFs: []*runtimeapi.FilesystemUsage{},
imageFs: []*runtimeapi.FilesystemUsage{},
},
{
name: "FsId does not exist",
writeableSeparateFromReadOnly: false,
containerFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}},
imageFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}},
},
}
for _, tc := range cases {
runtime.SetContainerFsStats(tc.containerFs)
runtime.SetImageFsStats(tc.imageFs)
actualCommand := gcContainer.IsContainerFsSeparateFromImageFs(context.TODO())
if e, a := tc.writeableSeparateFromReadOnly, actualCommand; !reflect.DeepEqual(e, a) {
t.Errorf("%v: unexpected value; expected %v, got %v", tc.name, e, a)
}
runtime.SetContainerFsStats(nil)
runtime.SetImageFsStats(nil)
}
}

View File

@ -160,6 +160,8 @@ type ImageService interface {
RemoveImage(ctx context.Context, image ImageSpec) error RemoveImage(ctx context.Context, image ImageSpec) error
// ImageStats returns Image statistics. // ImageStats returns Image statistics.
ImageStats(ctx context.Context) (*ImageStats, error) ImageStats(ctx context.Context) (*ImageStats, error)
// ImageFsInfo returns a list of file systems for containers/images
ImageFsInfo(ctx context.Context) (*runtimeapi.ImageFsInfoResponse, error)
} }
// Attacher interface allows to attach a container. // Attacher interface allows to attach a container.

View File

@ -0,0 +1,36 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testing
import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
// FakeReadyProvider implements a fake ready provider
type FakeReadyProvider struct {
kubecontainer.SourcesReadyProvider
}
// AllReady notifies caller that the Fake Provider is ready.
func (frp *FakeReadyProvider) AllReady() bool {
return true
}
// NewFakeReadyProvider creates a FakeReadyProvider object
func NewFakeReadyProvider() kubecontainer.SourcesReadyProvider {
return &FakeReadyProvider{}
}

View File

@ -45,6 +45,8 @@ type FakeRuntime struct {
PodList []*FakePod PodList []*FakePod
AllPodList []*FakePod AllPodList []*FakePod
ImageList []kubecontainer.Image ImageList []kubecontainer.Image
ImageFsStats []*runtimeapi.FilesystemUsage
ContainerFsStats []*runtimeapi.FilesystemUsage
APIPodStatus v1.PodStatus APIPodStatus v1.PodStatus
PodStatus kubecontainer.PodStatus PodStatus kubecontainer.PodStatus
StartedPods []string StartedPods []string
@ -422,6 +424,16 @@ func (f *FakeRuntime) ListPodSandboxMetrics(_ context.Context) ([]*runtimeapi.Po
return nil, f.Err return nil, f.Err
} }
// SetContainerFsStats sets the containerFsStats for dependency injection.
func (f *FakeRuntime) SetContainerFsStats(val []*runtimeapi.FilesystemUsage) {
f.ContainerFsStats = val
}
// SetImageFsStats sets the ImageFsStats for dependency injection.
func (f *FakeRuntime) SetImageFsStats(val []*runtimeapi.FilesystemUsage) {
f.ImageFsStats = val
}
func (f *FakeRuntime) ImageStats(_ context.Context) (*kubecontainer.ImageStats, error) { func (f *FakeRuntime) ImageStats(_ context.Context) (*kubecontainer.ImageStats, error) {
f.Lock() f.Lock()
defer f.Unlock() defer f.Unlock()
@ -430,6 +442,20 @@ func (f *FakeRuntime) ImageStats(_ context.Context) (*kubecontainer.ImageStats,
return nil, f.Err return nil, f.Err
} }
// ImageFsInfo returns a ImageFsInfoResponse given the DI injected values of ImageFsStats
// and ContainerFsStats.
func (f *FakeRuntime) ImageFsInfo(_ context.Context) (*runtimeapi.ImageFsInfoResponse, error) {
f.Lock()
defer f.Unlock()
f.CalledFunctions = append(f.CalledFunctions, "ImageFsInfo")
resp := &runtimeapi.ImageFsInfoResponse{
ImageFilesystems: f.ImageFsStats,
ContainerFilesystems: f.ContainerFsStats,
}
return resp, f.Err
}
func (f *FakeStreamingRuntime) GetExec(_ context.Context, id kubecontainer.ContainerID, cmd []string, stdin, stdout, stderr, tty bool) (*url.URL, error) { func (f *FakeStreamingRuntime) GetExec(_ context.Context, id kubecontainer.ContainerID, cmd []string, stdin, stdout, stderr, tty bool) (*url.URL, error) {
f.Lock() f.Lock()
defer f.Unlock() defer f.Unlock()

View File

@ -242,6 +242,21 @@ func (mr *MockRuntimeMockRecorder) GetPods(ctx, all interface{}) *gomock.Call {
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPods", reflect.TypeOf((*MockRuntime)(nil).GetPods), ctx, all) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPods", reflect.TypeOf((*MockRuntime)(nil).GetPods), ctx, all)
} }
// ImageFsInfo mocks base method.
func (m *MockRuntime) ImageFsInfo(ctx context.Context) (*v10.ImageFsInfoResponse, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ImageFsInfo", ctx)
ret0, _ := ret[0].(*v10.ImageFsInfoResponse)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ImageFsInfo indicates an expected call of ImageFsInfo.
func (mr *MockRuntimeMockRecorder) ImageFsInfo(ctx interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ImageFsInfo", reflect.TypeOf((*MockRuntime)(nil).ImageFsInfo), ctx)
}
// ImageStats mocks base method. // ImageStats mocks base method.
func (m *MockRuntime) ImageStats(ctx context.Context) (*container.ImageStats, error) { func (m *MockRuntime) ImageStats(ctx context.Context) (*container.ImageStats, error) {
m.ctrl.T.Helper() m.ctrl.T.Helper()
@ -523,6 +538,21 @@ func (mr *MockImageServiceMockRecorder) GetImageRef(ctx, image interface{}) *gom
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetImageRef", reflect.TypeOf((*MockImageService)(nil).GetImageRef), ctx, image) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetImageRef", reflect.TypeOf((*MockImageService)(nil).GetImageRef), ctx, image)
} }
// ImageFsInfo mocks base method.
func (m *MockImageService) ImageFsInfo(ctx context.Context) (*v10.ImageFsInfoResponse, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ImageFsInfo", ctx)
ret0, _ := ret[0].(*v10.ImageFsInfoResponse)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ImageFsInfo indicates an expected call of ImageFsInfo.
func (mr *MockImageServiceMockRecorder) ImageFsInfo(ctx interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ImageFsInfo", reflect.TypeOf((*MockImageService)(nil).ImageFsInfo), ctx)
}
// ImageStats mocks base method. // ImageStats mocks base method.
func (m *MockImageService) ImageStats(ctx context.Context) (*container.ImageStats, error) { func (m *MockImageService) ImageStats(ctx context.Context) (*container.ImageStats, error) {
m.ctrl.T.Helper() m.ctrl.T.Helper()

View File

@ -32,10 +32,25 @@ const (
SignalNodeFsAvailable Signal = "nodefs.available" SignalNodeFsAvailable Signal = "nodefs.available"
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc. // SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsInodesFree Signal = "nodefs.inodesFree" SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers. // SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images layers.
// If the container filesystem and image filesystem are not separate,
// than imagefs can store both image layers and writeable layers.
SignalImageFsAvailable Signal = "imagefs.available" SignalImageFsAvailable Signal = "imagefs.available"
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writable layers. // SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images layers.
// If the container filesystem and image filesystem are not separate,
// than imagefs can store both image layers and writeable layers.
SignalImageFsInodesFree Signal = "imagefs.inodesFree" SignalImageFsInodesFree Signal = "imagefs.inodesFree"
// SignalContainerFsAvailable is amount of storage available on filesystem that container runtime uses for container writable layers.
// In case of a single filesystem, containerfs=nodefs.
// In case of a image filesystem, containerfs=imagefs.
// We will override user settings and set to either imagefs or nodefs depending on configuration.
SignalContainerFsAvailable Signal = "containerfs.available"
// SignalContainerFsInodesFree is amount of inodes available on filesystem that container runtime uses for container writable layers.
// SignalContainerFsAvailable is amount of storage available on filesystem that container runtime uses for container writable layers.
// In case of a single filesystem, containerfs=nodefs.
// In case of a image filesystem, containerfs=imagefs.
// We will override user settings and set to either imagefs or nodefs depending on configuration.
SignalContainerFsInodesFree Signal = "containerfs.inodesFree"
// SignalAllocatableMemoryAvailable is amount of memory available for pod allocation (i.e. allocatable - workingSet (of pods), in bytes. // SignalAllocatableMemoryAvailable is amount of memory available for pod allocation (i.e. allocatable - workingSet (of pods), in bytes.
SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available" SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available"
// SignalPIDAvailable is amount of PID available for pod allocation // SignalPIDAvailable is amount of PID available for pod allocation
@ -63,6 +78,8 @@ var OpForSignal = map[Signal]ThresholdOperator{
SignalNodeFsInodesFree: OpLessThan, SignalNodeFsInodesFree: OpLessThan,
SignalImageFsAvailable: OpLessThan, SignalImageFsAvailable: OpLessThan,
SignalImageFsInodesFree: OpLessThan, SignalImageFsInodesFree: OpLessThan,
SignalContainerFsAvailable: OpLessThan,
SignalContainerFsInodesFree: OpLessThan,
SignalAllocatableMemoryAvailable: OpLessThan, SignalAllocatableMemoryAvailable: OpLessThan,
SignalPIDAvailable: OpLessThan, SignalPIDAvailable: OpLessThan,
} }

View File

@ -94,6 +94,8 @@ type managerImpl struct {
lastObservations signalObservations lastObservations signalObservations
// dedicatedImageFs indicates if imagefs is on a separate device from the rootfs // dedicatedImageFs indicates if imagefs is on a separate device from the rootfs
dedicatedImageFs *bool dedicatedImageFs *bool
// splitContainerImageFs indicates if containerfs is on a separate device from imagefs
splitContainerImageFs *bool
// thresholdNotifiers is a list of memory threshold notifiers which each notify for a memory eviction threshold // thresholdNotifiers is a list of memory threshold notifiers which each notify for a memory eviction threshold
thresholdNotifiers []ThresholdNotifier thresholdNotifiers []ThresholdNotifier
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated. // thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
@ -129,6 +131,7 @@ func NewManager(
nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{}, thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil, dedicatedImageFs: nil,
splitContainerImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{}, thresholdNotifiers: []ThresholdNotifier{},
localStorageCapacityIsolation: localStorageCapacityIsolation, localStorageCapacityIsolation: localStorageCapacityIsolation,
} }
@ -197,10 +200,14 @@ func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePod
// start the eviction manager monitoring // start the eviction manager monitoring
go func() { go func() {
for { for {
if evictedPods := m.synchronize(diskInfoProvider, podFunc); evictedPods != nil { evictedPods, err := m.synchronize(diskInfoProvider, podFunc)
if evictedPods != nil && err == nil {
klog.InfoS("Eviction manager: pods evicted, waiting for pod to be cleaned up", "pods", klog.KObjSlice(evictedPods)) klog.InfoS("Eviction manager: pods evicted, waiting for pod to be cleaned up", "pods", klog.KObjSlice(evictedPods))
m.waitForPodsCleanup(podCleanedUpFunc, evictedPods) m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
} else { } else {
if err != nil {
klog.ErrorS(err, "Eviction manager: failed to synchronize")
}
time.Sleep(monitoringInterval) time.Sleep(monitoringInterval)
} }
} }
@ -230,33 +237,50 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
// synchronize is the main control loop that enforces eviction thresholds. // synchronize is the main control loop that enforces eviction thresholds.
// Returns the pod that was killed, or nil if no pod was killed. // Returns the pod that was killed, or nil if no pod was killed.
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod { func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) ([]*v1.Pod, error) {
ctx := context.Background() ctx := context.Background()
// if we have nothing to do, just return // if we have nothing to do, just return
thresholds := m.config.Thresholds thresholds := m.config.Thresholds
if len(thresholds) == 0 && !m.localStorageCapacityIsolation { if len(thresholds) == 0 && !m.localStorageCapacityIsolation {
return nil return nil, nil
} }
klog.V(3).InfoS("Eviction manager: synchronize housekeeping") klog.V(3).InfoS("Eviction manager: synchronize housekeeping")
// build the ranking functions (if not yet known) // build the ranking functions (if not yet known)
// TODO: have a function in cadvisor that lets us know if global housekeeping has completed // TODO: have a function in cadvisor that lets us know if global housekeeping has completed
if m.dedicatedImageFs == nil { if m.dedicatedImageFs == nil {
hasImageFs, ok := diskInfoProvider.HasDedicatedImageFs(ctx) hasImageFs, splitDiskError := diskInfoProvider.HasDedicatedImageFs(ctx)
if ok != nil { if splitDiskError != nil {
return nil klog.ErrorS(splitDiskError, "Eviction manager: failed to get HasDedicatedImageFs")
return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %v", splitDiskError)
} }
m.dedicatedImageFs = &hasImageFs m.dedicatedImageFs = &hasImageFs
m.signalToRankFunc = buildSignalToRankFunc(hasImageFs) splitContainerImageFs := m.containerGC.IsContainerFsSeparateFromImageFs(ctx)
m.signalToNodeReclaimFuncs = buildSignalToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
// If we are a split filesystem but the feature is turned off
// we should return an error.
// This is a bad state.
if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletSeparateDiskGC) && splitContainerImageFs {
splitDiskError := fmt.Errorf("KubeletSeparateDiskGC is turned off but we still have a split filesystem")
return nil, splitDiskError
}
thresholds, err := UpdateContainerFsThresholds(m.config.Thresholds, hasImageFs, splitContainerImageFs)
m.config.Thresholds = thresholds
if err != nil {
klog.ErrorS(err, "eviction manager: found conflicting containerfs eviction. Ignoring.")
}
m.splitContainerImageFs = &splitContainerImageFs
m.signalToRankFunc = buildSignalToRankFunc(hasImageFs, splitContainerImageFs)
m.signalToNodeReclaimFuncs = buildSignalToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs, splitContainerImageFs)
} }
klog.V(3).InfoS("FileSystem detection", "DedicatedImageFs", m.dedicatedImageFs, "SplitImageFs", m.splitContainerImageFs)
activePods := podFunc() activePods := podFunc()
updateStats := true updateStats := true
summary, err := m.summaryProvider.Get(ctx, updateStats) summary, err := m.summaryProvider.Get(ctx, updateStats)
if err != nil { if err != nil {
klog.ErrorS(err, "Eviction manager: failed to get summary stats") klog.ErrorS(err, "Eviction manager: failed to get summary stats")
return nil return nil, nil
} }
if m.clock.Since(m.thresholdsLastUpdated) > notifierRefreshInterval { if m.clock.Since(m.thresholdsLastUpdated) > notifierRefreshInterval {
@ -324,20 +348,20 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// If eviction happens in localStorageEviction function, skip the rest of eviction action // If eviction happens in localStorageEviction function, skip the rest of eviction action
if m.localStorageCapacityIsolation { if m.localStorageCapacityIsolation {
if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 { if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 {
return evictedPods return evictedPods, nil
} }
} }
if len(thresholds) == 0 { if len(thresholds) == 0 {
klog.V(3).InfoS("Eviction manager: no resources are starved") klog.V(3).InfoS("Eviction manager: no resources are starved")
return nil return nil, nil
} }
// rank the thresholds by eviction priority // rank the thresholds by eviction priority
sort.Sort(byEvictionPriority(thresholds)) sort.Sort(byEvictionPriority(thresholds))
thresholdToReclaim, resourceToReclaim, foundAny := getReclaimableThreshold(thresholds) thresholdToReclaim, resourceToReclaim, foundAny := getReclaimableThreshold(thresholds)
if !foundAny { if !foundAny {
return nil return nil, nil
} }
klog.InfoS("Eviction manager: attempting to reclaim", "resourceName", resourceToReclaim) klog.InfoS("Eviction manager: attempting to reclaim", "resourceName", resourceToReclaim)
@ -347,7 +371,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods. // check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(ctx, thresholdToReclaim.Signal, resourceToReclaim) { if m.reclaimNodeLevelResources(ctx, thresholdToReclaim.Signal, resourceToReclaim) {
klog.InfoS("Eviction manager: able to reduce resource pressure without evicting pods.", "resourceName", resourceToReclaim) klog.InfoS("Eviction manager: able to reduce resource pressure without evicting pods.", "resourceName", resourceToReclaim)
return nil return nil, nil
} }
klog.InfoS("Eviction manager: must evict pod(s) to reclaim", "resourceName", resourceToReclaim) klog.InfoS("Eviction manager: must evict pod(s) to reclaim", "resourceName", resourceToReclaim)
@ -356,13 +380,13 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
rank, ok := m.signalToRankFunc[thresholdToReclaim.Signal] rank, ok := m.signalToRankFunc[thresholdToReclaim.Signal]
if !ok { if !ok {
klog.ErrorS(nil, "Eviction manager: no ranking function for signal", "threshold", thresholdToReclaim.Signal) klog.ErrorS(nil, "Eviction manager: no ranking function for signal", "threshold", thresholdToReclaim.Signal)
return nil return nil, nil
} }
// the only candidates viable for eviction are those pods that had anything running. // the only candidates viable for eviction are those pods that had anything running.
if len(activePods) == 0 { if len(activePods) == 0 {
klog.ErrorS(nil, "Eviction manager: eviction thresholds have been met, but no pods are active to evict") klog.ErrorS(nil, "Eviction manager: eviction thresholds have been met, but no pods are active to evict")
return nil return nil, nil
} }
// rank the running pods for eviction for the specified resource // rank the running pods for eviction for the specified resource
@ -397,11 +421,11 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
} }
if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) { if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) {
metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc() metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc()
return []*v1.Pod{pod} return []*v1.Pod{pod}, nil
} }
} }
klog.InfoS("Eviction manager: unable to evict any pods from the node") klog.InfoS("Eviction manager: unable to evict any pods from the node")
return nil return nil, nil
} }
func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods []*v1.Pod) { func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods []*v1.Pod) {

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@ limitations under the License.
package eviction package eviction
import ( import (
"errors"
"fmt" "fmt"
"sort" "sort"
"strconv" "strconv"
@ -80,9 +81,11 @@ func init() {
signalToNodeCondition[evictionapi.SignalMemoryAvailable] = v1.NodeMemoryPressure signalToNodeCondition[evictionapi.SignalMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[evictionapi.SignalAllocatableMemoryAvailable] = v1.NodeMemoryPressure signalToNodeCondition[evictionapi.SignalAllocatableMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[evictionapi.SignalImageFsAvailable] = v1.NodeDiskPressure signalToNodeCondition[evictionapi.SignalImageFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalContainerFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalContainerFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalPIDAvailable] = v1.NodePIDPressure signalToNodeCondition[evictionapi.SignalPIDAvailable] = v1.NodePIDPressure
// map signals to resources (and vice-versa) // map signals to resources (and vice-versa)
@ -91,6 +94,8 @@ func init() {
signalToResource[evictionapi.SignalAllocatableMemoryAvailable] = v1.ResourceMemory signalToResource[evictionapi.SignalAllocatableMemoryAvailable] = v1.ResourceMemory
signalToResource[evictionapi.SignalImageFsAvailable] = v1.ResourceEphemeralStorage signalToResource[evictionapi.SignalImageFsAvailable] = v1.ResourceEphemeralStorage
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceInodes signalToResource[evictionapi.SignalImageFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalContainerFsAvailable] = v1.ResourceEphemeralStorage
signalToResource[evictionapi.SignalContainerFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalNodeFsAvailable] = v1.ResourceEphemeralStorage signalToResource[evictionapi.SignalNodeFsAvailable] = v1.ResourceEphemeralStorage
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceInodes signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalPIDAvailable] = resourcePids signalToResource[evictionapi.SignalPIDAvailable] = resourcePids
@ -172,6 +177,181 @@ func addAllocatableThresholds(thresholds []evictionapi.Threshold) []evictionapi.
return append(append([]evictionapi.Threshold{}, thresholds...), additionalThresholds...) return append(append([]evictionapi.Threshold{}, thresholds...), additionalThresholds...)
} }
// UpdateContainerFsThresholds will add containerfs eviction hard/soft
// settings based on container runtime settings.
// Thresholds are parsed from evictionHard and evictionSoft limits so we will override.
// If there is a single filesystem, then containerfs settings are same as nodefs.
// If there is a separate image filesystem for both containers and images then containerfs settings are same as imagefs.
func UpdateContainerFsThresholds(thresholds []evictionapi.Threshold, imageFs, separateContainerImageFs bool) ([]evictionapi.Threshold, error) {
hardNodeFsDisk := evictionapi.Threshold{}
softNodeFsDisk := evictionapi.Threshold{}
hardNodeINodeDisk := evictionapi.Threshold{}
softNodeINodeDisk := evictionapi.Threshold{}
hardImageFsDisk := evictionapi.Threshold{}
softImageFsDisk := evictionapi.Threshold{}
hardImageINodeDisk := evictionapi.Threshold{}
softImageINodeDisk := evictionapi.Threshold{}
hardContainerFsDisk := -1
softContainerFsDisk := -1
hardContainerFsINodes := -1
softContainerFsINodes := -1
// Find the imagefs and nodefs thresholds
var err error = nil
for idx, threshold := range thresholds {
if threshold.Signal == evictionapi.SignalImageFsAvailable && isHardEvictionThreshold(threshold) {
hardImageFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalImageFsAvailable && !isHardEvictionThreshold(threshold) {
softImageFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalImageFsInodesFree && isHardEvictionThreshold(threshold) {
hardImageINodeDisk = threshold
}
if threshold.Signal == evictionapi.SignalImageFsInodesFree && !isHardEvictionThreshold(threshold) {
softImageINodeDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsAvailable && isHardEvictionThreshold(threshold) {
hardNodeFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsAvailable && !isHardEvictionThreshold(threshold) {
softNodeFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsInodesFree && isHardEvictionThreshold(threshold) {
hardNodeINodeDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsInodesFree && !isHardEvictionThreshold(threshold) {
softNodeINodeDisk = threshold
}
// We are logging a warning and we will override the settings.
// In this case this is safe because we do not support a separate container filesystem.
// So we want either limits to be same as nodefs or imagefs.
if threshold.Signal == evictionapi.SignalContainerFsAvailable && isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.available for hard eviction. ignoring"))
hardContainerFsDisk = idx
}
if threshold.Signal == evictionapi.SignalContainerFsAvailable && !isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.available for soft eviction. ignoring"))
softContainerFsDisk = idx
}
if threshold.Signal == evictionapi.SignalContainerFsInodesFree && isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.inodesFree for hard eviction. ignoring"))
hardContainerFsINodes = idx
}
if threshold.Signal == evictionapi.SignalContainerFsInodesFree && !isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.inodesFree for soft eviction. ignoring"))
softContainerFsINodes = idx
}
}
// Either split disk case (containerfs=nodefs) or single filesystem
if (imageFs && separateContainerImageFs) || (!imageFs && !separateContainerImageFs) {
if hardContainerFsDisk != -1 {
thresholds[hardContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, Operator: hardNodeFsDisk.Operator, Value: hardNodeFsDisk.Value, MinReclaim: hardNodeFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: hardNodeFsDisk.Operator,
Value: hardNodeFsDisk.Value,
MinReclaim: hardNodeFsDisk.MinReclaim,
})
}
if softContainerFsDisk != -1 {
thresholds[softContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, GracePeriod: softNodeFsDisk.GracePeriod, Operator: softNodeFsDisk.Operator, Value: softNodeFsDisk.Value, MinReclaim: softNodeFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: softNodeFsDisk.Operator,
Value: softNodeFsDisk.Value,
MinReclaim: softNodeFsDisk.MinReclaim,
GracePeriod: softNodeFsDisk.GracePeriod,
})
}
if hardContainerFsINodes != -1 {
thresholds[hardContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, Operator: hardNodeINodeDisk.Operator, Value: hardNodeINodeDisk.Value, MinReclaim: hardNodeINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: hardNodeINodeDisk.Operator,
Value: hardNodeINodeDisk.Value,
MinReclaim: hardNodeINodeDisk.MinReclaim,
})
}
if softContainerFsINodes != -1 {
thresholds[softContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, GracePeriod: softNodeINodeDisk.GracePeriod, Operator: softNodeINodeDisk.Operator, Value: softNodeINodeDisk.Value, MinReclaim: softNodeINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: softNodeINodeDisk.Operator,
Value: softNodeINodeDisk.Value,
MinReclaim: softNodeINodeDisk.MinReclaim,
GracePeriod: softNodeINodeDisk.GracePeriod,
})
}
}
// Separate image filesystem case
if imageFs && !separateContainerImageFs {
if hardContainerFsDisk != -1 {
thresholds[hardContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, Operator: hardImageFsDisk.Operator, Value: hardImageFsDisk.Value, MinReclaim: hardImageFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: hardImageFsDisk.Operator,
Value: hardImageFsDisk.Value,
MinReclaim: hardImageFsDisk.MinReclaim,
})
}
if softContainerFsDisk != -1 {
thresholds[softContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, GracePeriod: softImageFsDisk.GracePeriod, Operator: softImageFsDisk.Operator, Value: softImageFsDisk.Value, MinReclaim: softImageFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: softImageFsDisk.Operator,
Value: softImageFsDisk.Value,
MinReclaim: softImageFsDisk.MinReclaim,
GracePeriod: softImageFsDisk.GracePeriod,
})
}
if hardContainerFsINodes != -1 {
thresholds[hardContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, GracePeriod: hardImageINodeDisk.GracePeriod, Operator: hardImageINodeDisk.Operator, Value: hardImageINodeDisk.Value, MinReclaim: hardImageINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: hardImageINodeDisk.Operator,
Value: hardImageINodeDisk.Value,
MinReclaim: hardImageINodeDisk.MinReclaim,
})
}
if softContainerFsINodes != -1 {
thresholds[softContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, GracePeriod: softImageINodeDisk.GracePeriod, Operator: softImageINodeDisk.Operator, Value: softImageINodeDisk.Value, MinReclaim: softImageINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: softImageINodeDisk.Operator,
Value: softImageINodeDisk.Value,
MinReclaim: softImageINodeDisk.MinReclaim,
GracePeriod: softImageINodeDisk.GracePeriod,
})
}
}
return thresholds, err
}
// parseThresholdStatements parses the input statements into a list of Threshold objects. // parseThresholdStatements parses the input statements into a list of Threshold objects.
func parseThresholdStatements(statements map[string]string) ([]evictionapi.Threshold, error) { func parseThresholdStatements(statements map[string]string) ([]evictionapi.Threshold, error) {
if len(statements) == 0 { if len(statements) == 0 {
@ -708,6 +888,7 @@ func makeSignalObservations(summary *statsapi.Summary) (signalObservations, stat
capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI), capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI),
time: imageFs.Time, time: imageFs.Time,
} }
}
if imageFs.InodesFree != nil && imageFs.Inodes != nil { if imageFs.InodesFree != nil && imageFs.Inodes != nil {
result[evictionapi.SignalImageFsInodesFree] = signalObservation{ result[evictionapi.SignalImageFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.DecimalSI), available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.DecimalSI),
@ -716,6 +897,21 @@ func makeSignalObservations(summary *statsapi.Summary) (signalObservations, stat
} }
} }
} }
if containerFs := summary.Node.Runtime.ContainerFs; containerFs != nil {
if containerFs.AvailableBytes != nil && containerFs.CapacityBytes != nil {
result[evictionapi.SignalContainerFsAvailable] = signalObservation{
available: resource.NewQuantity(int64(*containerFs.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*containerFs.CapacityBytes), resource.BinarySI),
time: containerFs.Time,
}
}
if containerFs.InodesFree != nil && containerFs.Inodes != nil {
result[evictionapi.SignalContainerFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*containerFs.InodesFree), resource.DecimalSI),
capacity: resource.NewQuantity(int64(*containerFs.Inodes), resource.DecimalSI),
time: containerFs.Time,
}
}
} }
} }
if rlimit := summary.Node.Rlimit; rlimit != nil { if rlimit := summary.Node.Rlimit; rlimit != nil {
@ -951,27 +1147,47 @@ func isAllocatableEvictionThreshold(threshold evictionapi.Threshold) bool {
} }
// buildSignalToRankFunc returns ranking functions associated with resources // buildSignalToRankFunc returns ranking functions associated with resources
func buildSignalToRankFunc(withImageFs bool) map[evictionapi.Signal]rankFunc { func buildSignalToRankFunc(withImageFs bool, imageContainerSplitFs bool) map[evictionapi.Signal]rankFunc {
signalToRankFunc := map[evictionapi.Signal]rankFunc{ signalToRankFunc := map[evictionapi.Signal]rankFunc{
evictionapi.SignalMemoryAvailable: rankMemoryPressure, evictionapi.SignalMemoryAvailable: rankMemoryPressure,
evictionapi.SignalAllocatableMemoryAvailable: rankMemoryPressure, evictionapi.SignalAllocatableMemoryAvailable: rankMemoryPressure,
evictionapi.SignalPIDAvailable: rankPIDPressure, evictionapi.SignalPIDAvailable: rankPIDPressure,
} }
// usage of an imagefs is optional // usage of an imagefs is optional
if withImageFs { // We have a dedicated Image filesystem (images and containers are on same disk)
// then we assume it is just a separate imagefs
if withImageFs && !imageContainerSplitFs {
// with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes // with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage) signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes) signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
// with an imagefs, imagefs pod rank func for eviction only includes rootfs // with an imagefs, imagefs pod rank func for eviction only includes rootfs
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, v1.ResourceEphemeralStorage) signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsImages}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceInodes) signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsImages}, resourceInodes)
signalToRankFunc[evictionapi.SignalContainerFsAvailable] = signalToRankFunc[evictionapi.SignalImageFsAvailable]
signalToRankFunc[evictionapi.SignalContainerFsInodesFree] = signalToRankFunc[evictionapi.SignalImageFsInodesFree]
// If both imagefs and container fs are on separate disks
// we want to track the writeable layer in containerfs signals.
} else if withImageFs && imageContainerSplitFs {
// with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource, fsStatsRoot}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource, fsStatsRoot}, resourceInodes)
signalToRankFunc[evictionapi.SignalContainerFsAvailable] = signalToRankFunc[evictionapi.SignalNodeFsAvailable]
signalToRankFunc[evictionapi.SignalContainerFsInodesFree] = signalToRankFunc[evictionapi.SignalNodeFsInodesFree]
// with an imagefs, containerfs pod rank func for eviction only includes rootfs
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsImages}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsImages}, resourceInodes)
// If image fs is not on separate disk as root but container fs is
} else { } else {
// without an imagefs, nodefs pod rank func for eviction looks at all fs stats. // without an imagefs, nodefs pod rank func for eviction looks at all fs stats.
// since imagefs and nodefs share a common device, they share common ranking functions. // since imagefs and nodefs share a common device, they share common ranking functions.
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage) signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes) signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage) signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes) signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
signalToRankFunc[evictionapi.SignalContainerFsAvailable] = signalToRankFunc[evictionapi.SignalNodeFsAvailable]
signalToRankFunc[evictionapi.SignalContainerFsInodesFree] = signalToRankFunc[evictionapi.SignalNodeFsInodesFree]
} }
return signalToRankFunc return signalToRankFunc
} }
@ -982,19 +1198,29 @@ func PodIsEvicted(podStatus v1.PodStatus) bool {
} }
// buildSignalToNodeReclaimFuncs returns reclaim functions associated with resources. // buildSignalToNodeReclaimFuncs returns reclaim functions associated with resources.
func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool) map[evictionapi.Signal]nodeReclaimFuncs { func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool, splitContainerImageFs bool) map[evictionapi.Signal]nodeReclaimFuncs {
signalToReclaimFunc := map[evictionapi.Signal]nodeReclaimFuncs{} signalToReclaimFunc := map[evictionapi.Signal]nodeReclaimFuncs{}
// usage of an imagefs is optional // usage of an imagefs is optional
if withImageFs { if withImageFs && !splitContainerImageFs {
// with an imagefs, nodefs pressure should just delete logs // with an imagefs, nodefs pressure should just delete logs
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{} signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{}
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{} signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{}
// with an imagefs, imagefs pressure should delete unused images // with an imagefs, imagefs pressure should delete unused images
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages} signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages} signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
// usage of imagefs and container fs on separate disks
// containers gc on containerfs pressure
// image gc on imagefs pressure
} else if withImageFs && splitContainerImageFs {
// with an imagefs, imagefs pressure should delete unused images
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{imageGC.DeleteUnusedImages}
// with an split fs and imagefs, containerfs pressure should delete unused containers
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers}
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers}
} else { } else {
// without an imagefs, nodefs pressure should delete logs, and unused images // without an imagefs, nodefs pressure should delete logs, and unused images
// since imagefs and nodefs share a common device, they share common reclaim functions // since imagefs, containerfs and nodefs share a common device, they share common reclaim functions
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages} signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages} signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages} signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}

File diff suppressed because it is too large Load Diff

View File

@ -218,6 +218,20 @@ func (mr *MockContainerGCMockRecorder) DeleteAllUnusedContainers(ctx interface{}
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteAllUnusedContainers", reflect.TypeOf((*MockContainerGC)(nil).DeleteAllUnusedContainers), ctx) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteAllUnusedContainers", reflect.TypeOf((*MockContainerGC)(nil).DeleteAllUnusedContainers), ctx)
} }
// IsContainerFsSeparateFromImageFs mocks base method.
func (m *MockContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "IsContainerFsSeparateFromImageFs", ctx)
ret0, _ := ret[0].(bool)
return ret0
}
// IsContainerFsSeparateFromImageFs indicates an expected call of IsContainerFsSeparateFromImageFs.
func (mr *MockContainerGCMockRecorder) IsContainerFsSeparateFromImageFs(ctx interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsContainerFsSeparateFromImageFs", reflect.TypeOf((*MockContainerGC)(nil).IsContainerFsSeparateFromImageFs), ctx)
}
// MockCgroupNotifier is a mock of CgroupNotifier interface. // MockCgroupNotifier is a mock of CgroupNotifier interface.
type MockCgroupNotifier struct { type MockCgroupNotifier struct {
ctrl *gomock.Controller ctrl *gomock.Controller

View File

@ -38,6 +38,8 @@ const (
fsStatsLogs fsStatsType = "logs" fsStatsLogs fsStatsType = "logs"
// fsStatsRoot identifies stats for pod container writable layers. // fsStatsRoot identifies stats for pod container writable layers.
fsStatsRoot fsStatsType = "root" fsStatsRoot fsStatsType = "root"
// fsStatsContainer identifies stats for pod container read-only layers
fsStatsImages fsStatsType = "images"
) )
// Config holds information about how eviction is configured. // Config holds information about how eviction is configured.
@ -85,6 +87,8 @@ type ImageGC interface {
type ContainerGC interface { type ContainerGC interface {
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted. // DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
DeleteAllUnusedContainers(ctx context.Context) error DeleteAllUnusedContainers(ctx context.Context) error
// IsContainerFsSeparateFromImageFs checks if container filesystem is split from image filesystem.
IsContainerFsSeparateFromImageFs(ctx context.Context) bool
} }
// KillPodFunc kills a pod. // KillPodFunc kills a pod.

View File

@ -54,7 +54,7 @@ const imageIndexTupleFormat = "%s,%s"
// collection. // collection.
type StatsProvider interface { type StatsProvider interface {
// ImageFsStats returns the stats of the image filesystem. // ImageFsStats returns the stats of the image filesystem.
ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error)
} }
// ImageGCManager is an interface for managing lifecycle of all images. // ImageGCManager is an interface for managing lifecycle of all images.
@ -328,7 +328,7 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
} }
// Get disk usage on disk holding images. // Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats(ctx) fsStats, _, err := im.statsProvider.ImageFsStats(ctx)
if err != nil { if err != nil {
return err return err
} }

View File

@ -618,10 +618,11 @@ func TestGarbageCollectBelowLowThreshold(t *testing.T) {
manager, _ := newRealImageGCManager(policy, mockStatsProvider) manager, _ := newRealImageGCManager(policy, mockStatsProvider)
// Expect 40% usage. // Expect 40% usage.
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{ imageStats := &statsapi.FsStats{
AvailableBytes: uint64Ptr(600), AvailableBytes: uint64Ptr(600),
CapacityBytes: uint64Ptr(1000), CapacityBytes: uint64Ptr(1000),
}, nil) }
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageStats, imageStats, nil)
assert.NoError(t, manager.GarbageCollect(ctx)) assert.NoError(t, manager.GarbageCollect(ctx))
} }
@ -637,7 +638,7 @@ func TestGarbageCollectCadvisorFailure(t *testing.T) {
mockStatsProvider := statstest.NewMockProvider(mockCtrl) mockStatsProvider := statstest.NewMockProvider(mockCtrl)
manager, _ := newRealImageGCManager(policy, mockStatsProvider) manager, _ := newRealImageGCManager(policy, mockStatsProvider)
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, fmt.Errorf("error")) mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, &statsapi.FsStats{}, fmt.Errorf("error"))
assert.NotNil(t, manager.GarbageCollect(ctx)) assert.NotNil(t, manager.GarbageCollect(ctx))
} }
@ -654,10 +655,11 @@ func TestGarbageCollectBelowSuccess(t *testing.T) {
manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider) manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider)
// Expect 95% usage and most of it gets freed. // Expect 95% usage and most of it gets freed.
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{ imageFs := &statsapi.FsStats{
AvailableBytes: uint64Ptr(50), AvailableBytes: uint64Ptr(50),
CapacityBytes: uint64Ptr(1000), CapacityBytes: uint64Ptr(1000),
}, nil) }
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageFs, imageFs, nil)
fakeRuntime.ImageList = []container.Image{ fakeRuntime.ImageList = []container.Image{
makeImage(0, 450), makeImage(0, 450),
} }
@ -677,10 +679,11 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider) manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider)
// Expect 95% usage and little of it gets freed. // Expect 95% usage and little of it gets freed.
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{ imageFs := &statsapi.FsStats{
AvailableBytes: uint64Ptr(50), AvailableBytes: uint64Ptr(50),
CapacityBytes: uint64Ptr(1000), CapacityBytes: uint64Ptr(1000),
}, nil) }
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageFs, imageFs, nil)
fakeRuntime.ImageList = []container.Image{ fakeRuntime.ImageList = []container.Image{
makeImage(0, 50), makeImage(0, 50),
} }

View File

@ -1326,7 +1326,7 @@ func (kl *Kubelet) ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([
} }
// ImageFsStats is delegated to StatsProvider, which implements stats.Provider interface // ImageFsStats is delegated to StatsProvider, which implements stats.Provider interface
func (kl *Kubelet) ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) { func (kl *Kubelet) ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) {
return kl.StatsProvider.ImageFsStats(ctx) return kl.StatsProvider.ImageFsStats(ctx)
} }

View File

@ -158,3 +158,12 @@ func (m *kubeGenericRuntimeManager) ImageStats(ctx context.Context) (*kubecontai
} }
return stats, nil return stats, nil
} }
func (m *kubeGenericRuntimeManager) ImageFsInfo(ctx context.Context) (*runtimeapi.ImageFsInfoResponse, error) {
allImages, err := m.imageService.ImageFsInfo(ctx)
if err != nil {
klog.ErrorS(err, "Failed to get image filesystem")
return nil, err
}
return allImages, nil
}

View File

@ -289,7 +289,9 @@ func (*fakeKubelet) ListPodStatsAndUpdateCPUNanoCoreUsage(_ context.Context) ([]
func (*fakeKubelet) ListPodCPUAndMemoryStats(_ context.Context) ([]statsapi.PodStats, error) { func (*fakeKubelet) ListPodCPUAndMemoryStats(_ context.Context) ([]statsapi.PodStats, error) {
return nil, nil return nil, nil
} }
func (*fakeKubelet) ImageFsStats(_ context.Context) (*statsapi.FsStats, error) { return nil, nil } func (*fakeKubelet) ImageFsStats(_ context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) {
return nil, nil, nil
}
func (*fakeKubelet) RlimitStats() (*statsapi.RlimitStats, error) { return nil, nil } func (*fakeKubelet) RlimitStats() (*statsapi.RlimitStats, error) { return nil, nil }
func (*fakeKubelet) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) { func (*fakeKubelet) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) {
return nil, nil, nil return nil, nil, nil

View File

@ -51,8 +51,13 @@ type Provider interface {
// for more details. // for more details.
ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([]statsapi.PodStats, error) ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([]statsapi.PodStats, error)
// ImageFsStats returns the stats of the image filesystem. // ImageFsStats returns the stats of the image filesystem.
ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) // Kubelet allows three options for container filesystems
// Everything is on node fs (so no image filesystem)
// Container storage is on a dedicated disk (imageFs is separate from root)
// Container Filesystem is on root and Images are stored on ImageFs
// First return parameter is the image filesystem and
// second parameter is the container filesystem
ImageFsStats(ctx context.Context) (imageFs *statsapi.FsStats, containerFs *statsapi.FsStats, callErr error)
// The following stats are provided by cAdvisor. // The following stats are provided by cAdvisor.
// //
// GetCgroupStats returns the stats and the networking usage of the cgroup // GetCgroupStats returns the stats and the networking usage of the cgroup

View File

@ -82,7 +82,7 @@ func (sp *summaryProviderImpl) Get(ctx context.Context, updateStats bool) (*stat
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get rootFs stats: %v", err) return nil, fmt.Errorf("failed to get rootFs stats: %v", err)
} }
imageFsStats, err := sp.provider.ImageFsStats(ctx) imageFsStats, containerFsStats, err := sp.provider.ImageFsStats(ctx)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get imageFs stats: %v", err) return nil, fmt.Errorf("failed to get imageFs stats: %v", err)
} }
@ -109,7 +109,7 @@ func (sp *summaryProviderImpl) Get(ctx context.Context, updateStats bool) (*stat
Network: networkStats, Network: networkStats,
StartTime: sp.systemBootTime, StartTime: sp.systemBootTime,
Fs: rootFsStats, Fs: rootFsStats,
Runtime: &statsapi.RuntimeStats{ImageFs: imageFsStats}, Runtime: &statsapi.RuntimeStats{ContainerFs: containerFsStats, ImageFs: imageFsStats},
Rlimit: rlimit, Rlimit: rlimit,
SystemContainers: sp.GetSystemContainersStats(nodeConfig, podStats, updateStats), SystemContainers: sp.GetSystemContainersStats(nodeConfig, podStats, updateStats),
} }

View File

@ -48,7 +48,7 @@ var (
rlimitStats = getRlimitStats() rlimitStats = getRlimitStats()
) )
func TestSummaryProviderGetStats(t *testing.T) { func TestSummaryProviderGetStatsNoSplitFileSystem(t *testing.T) {
ctx := context.Background() ctx := context.Background()
assert := assert.New(t) assert := assert.New(t)
@ -81,7 +81,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
mockStatsProvider.EXPECT().GetPodCgroupRoot().Return(cgroupRoot) mockStatsProvider.EXPECT().GetPodCgroupRoot().Return(cgroupRoot)
mockStatsProvider.EXPECT().ListPodStats(ctx).Return(podStats, nil).AnyTimes() mockStatsProvider.EXPECT().ListPodStats(ctx).Return(podStats, nil).AnyTimes()
mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage(ctx).Return(podStats, nil) mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage(ctx).Return(podStats, nil)
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(imageFsStats, nil) mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(imageFsStats, imageFsStats, nil)
mockStatsProvider.EXPECT().RootFsStats().Return(rootFsStats, nil) mockStatsProvider.EXPECT().RootFsStats().Return(rootFsStats, nil)
mockStatsProvider.EXPECT().RlimitStats().Return(rlimitStats, nil) mockStatsProvider.EXPECT().RlimitStats().Return(rlimitStats, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/", true).Return(cgroupStatsMap["/"].cs, cgroupStatsMap["/"].ns, nil) mockStatsProvider.EXPECT().GetCgroupStats("/", true).Return(cgroupStatsMap["/"].cs, cgroupStatsMap["/"].ns, nil)
@ -103,7 +103,106 @@ func TestSummaryProviderGetStats(t *testing.T) {
assert.Equal(summary.Node.Swap, cgroupStatsMap["/"].cs.Swap) assert.Equal(summary.Node.Swap, cgroupStatsMap["/"].cs.Swap)
assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns) assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns)
assert.Equal(summary.Node.Fs, rootFsStats) assert.Equal(summary.Node.Fs, rootFsStats)
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ImageFs: imageFsStats}) assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ContainerFs: imageFsStats, ImageFs: imageFsStats})
assert.Equal(len(summary.Node.SystemContainers), 4)
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "kubelet",
StartTime: kubeletCreationTime,
CPU: cgroupStatsMap["/kubelet"].cs.CPU,
Memory: cgroupStatsMap["/kubelet"].cs.Memory,
Accelerators: cgroupStatsMap["/kubelet"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/kubelet"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/kubelet"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "misc",
StartTime: cgroupStatsMap["/misc"].cs.StartTime,
CPU: cgroupStatsMap["/misc"].cs.CPU,
Memory: cgroupStatsMap["/misc"].cs.Memory,
Accelerators: cgroupStatsMap["/misc"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/misc"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/misc"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "runtime",
StartTime: cgroupStatsMap["/runtime"].cs.StartTime,
CPU: cgroupStatsMap["/runtime"].cs.CPU,
Memory: cgroupStatsMap["/runtime"].cs.Memory,
Accelerators: cgroupStatsMap["/runtime"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/runtime"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/runtime"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "pods",
StartTime: cgroupStatsMap["/pods"].cs.StartTime,
CPU: cgroupStatsMap["/pods"].cs.CPU,
Memory: cgroupStatsMap["/pods"].cs.Memory,
Accelerators: cgroupStatsMap["/pods"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/pods"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/pods"].cs.Swap,
})
assert.Equal(summary.Pods, podStats)
}
func TestSummaryProviderGetStatsSplitImageFs(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
podStats := []statsapi.PodStats{
{
PodRef: statsapi.PodReference{Name: "test-pod", Namespace: "test-namespace", UID: "UID_test-pod"},
StartTime: metav1.NewTime(time.Now()),
Containers: []statsapi.ContainerStats{*getContainerStats()},
Network: getNetworkStats(),
VolumeStats: []statsapi.VolumeStats{*getVolumeStats()},
},
}
cgroupStatsMap := map[string]struct {
cs *statsapi.ContainerStats
ns *statsapi.NetworkStats
}{
"/": {cs: getContainerStats(), ns: getNetworkStats()},
"/runtime": {cs: getContainerStats(), ns: getNetworkStats()},
"/misc": {cs: getContainerStats(), ns: getNetworkStats()},
"/kubelet": {cs: getContainerStats(), ns: getNetworkStats()},
"/pods": {cs: getContainerStats(), ns: getNetworkStats()},
}
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mockStatsProvider := statstest.NewMockProvider(mockCtrl)
mockStatsProvider.EXPECT().GetNode().Return(node, nil)
mockStatsProvider.EXPECT().GetNodeConfig().Return(nodeConfig)
mockStatsProvider.EXPECT().GetPodCgroupRoot().Return(cgroupRoot)
mockStatsProvider.EXPECT().ListPodStats(ctx).Return(podStats, nil).AnyTimes()
mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage(ctx).Return(podStats, nil)
mockStatsProvider.EXPECT().RootFsStats().Return(rootFsStats, nil)
mockStatsProvider.EXPECT().RlimitStats().Return(rlimitStats, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/", true).Return(cgroupStatsMap["/"].cs, cgroupStatsMap["/"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/runtime", false).Return(cgroupStatsMap["/runtime"].cs, cgroupStatsMap["/runtime"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/misc", false).Return(cgroupStatsMap["/misc"].cs, cgroupStatsMap["/misc"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/kubelet", false).Return(cgroupStatsMap["/kubelet"].cs, cgroupStatsMap["/kubelet"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/kubepods", true).Return(cgroupStatsMap["/pods"].cs, cgroupStatsMap["/pods"].ns, nil)
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(imageFsStats, rootFsStats, nil)
kubeletCreationTime := metav1.Now()
systemBootTime := metav1.Now()
provider := summaryProviderImpl{kubeletCreationTime: kubeletCreationTime, systemBootTime: systemBootTime, provider: mockStatsProvider}
summary, err := provider.Get(ctx, true)
assert.NoError(err)
assert.Equal(summary.Node.NodeName, "test-node")
assert.Equal(summary.Node.StartTime, systemBootTime)
assert.Equal(summary.Node.CPU, cgroupStatsMap["/"].cs.CPU)
assert.Equal(summary.Node.Memory, cgroupStatsMap["/"].cs.Memory)
assert.Equal(summary.Node.Swap, cgroupStatsMap["/"].cs.Swap)
assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns)
assert.Equal(summary.Node.Fs, rootFsStats)
// Since we are a split filesystem we want root filesystem to be container fs and image to be image filesystem
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ContainerFs: rootFsStats, ImageFs: imageFsStats})
assert.Equal(len(summary.Node.SystemContainers), 4) assert.Equal(len(summary.Node.SystemContainers), 4)
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{ assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{

View File

@ -221,12 +221,13 @@ func (mr *MockProviderMockRecorder) GetRequestedContainersInfo(containerName, op
} }
// ImageFsStats mocks base method. // ImageFsStats mocks base method.
func (m *MockProvider) ImageFsStats(ctx context.Context) (*v1alpha1.FsStats, error) { func (m *MockProvider) ImageFsStats(ctx context.Context) (*v1alpha1.FsStats, *v1alpha1.FsStats, error) {
m.ctrl.T.Helper() m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ImageFsStats", ctx) ret := m.ctrl.Call(m, "ImageFsStats", ctx)
ret0, _ := ret[0].(*v1alpha1.FsStats) ret0, _ := ret[0].(*v1alpha1.FsStats)
ret1, _ := ret[1].(error) ret1, _ := ret[1].(*v1alpha1.FsStats)
return ret0, ret1 ret2, _ := ret[2].(error)
return ret0, ret1, ret2
} }
// ImageFsStats indicates an expected call of ImageFsStats. // ImageFsStats indicates an expected call of ImageFsStats.

View File

@ -28,8 +28,10 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
kubetypes "k8s.io/kubelet/pkg/types" kubetypes "k8s.io/kubelet/pkg/types"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@ -237,14 +239,16 @@ func (p *cadvisorStatsProvider) ListPodCPUAndMemoryStats(_ context.Context) ([]s
} }
// ImageFsStats returns the stats of the filesystem for storing images. // ImageFsStats returns the stats of the filesystem for storing images.
func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) { func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *statsapi.FsStats, containerFsRet *statsapi.FsStats, errCall error) {
imageFsInfo, err := p.cadvisor.ImagesFsInfo() imageFsInfo, err := p.cadvisor.ImagesFsInfo()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get imageFs info: %v", err) return nil, nil, fmt.Errorf("failed to get imageFs info: %v", err)
} }
if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletSeparateDiskGC) {
imageStats, err := p.imageService.ImageStats(ctx) imageStats, err := p.imageService.ImageStats(ctx)
if err != nil || imageStats == nil { if err != nil || imageStats == nil {
return nil, fmt.Errorf("failed to get image stats: %v", err) return nil, nil, fmt.Errorf("failed to get image stats: %v", err)
} }
var imageFsInodesUsed *uint64 var imageFsInodesUsed *uint64
@ -253,7 +257,7 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsS
imageFsInodesUsed = &imageFsIU imageFsInodesUsed = &imageFsIU
} }
return &statsapi.FsStats{ imageFs := &statsapi.FsStats{
Time: metav1.NewTime(imageFsInfo.Timestamp), Time: metav1.NewTime(imageFsInfo.Timestamp),
AvailableBytes: &imageFsInfo.Available, AvailableBytes: &imageFsInfo.Available,
CapacityBytes: &imageFsInfo.Capacity, CapacityBytes: &imageFsInfo.Capacity,
@ -261,7 +265,60 @@ func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsS
InodesFree: imageFsInfo.InodesFree, InodesFree: imageFsInfo.InodesFree,
Inodes: imageFsInfo.Inodes, Inodes: imageFsInfo.Inodes,
InodesUsed: imageFsInodesUsed, InodesUsed: imageFsInodesUsed,
}, nil }
return imageFs, imageFs, nil
}
containerFsInfo, err := p.cadvisor.ContainerFsInfo()
if err != nil {
return nil, nil, fmt.Errorf("failed to get container fs info: %v", err)
}
imageStats, err := p.imageService.ImageFsInfo(ctx)
if err != nil || imageStats == nil {
return nil, nil, fmt.Errorf("failed to get image stats: %v", err)
}
splitFileSystem := false
if imageStats.ImageFilesystems[0].FsId.Mountpoint != imageStats.ContainerFilesystems[0].FsId.Mountpoint {
klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageStats.ImageFilesystems[0], "ContainerFilesystems", imageStats.ContainerFilesystems[0])
splitFileSystem = true
}
imageFs := imageStats.ImageFilesystems[0]
var imageFsInodesUsed *uint64
if imageFsInfo.Inodes != nil && imageFsInfo.InodesFree != nil {
imageFsIU := *imageFsInfo.Inodes - *imageFsInfo.InodesFree
imageFsInodesUsed = &imageFsIU
}
fsStats := &statsapi.FsStats{
Time: metav1.NewTime(imageFsInfo.Timestamp),
AvailableBytes: &imageFsInfo.Available,
CapacityBytes: &imageFsInfo.Capacity,
UsedBytes: &imageFs.UsedBytes.Value,
InodesFree: imageFsInfo.InodesFree,
Inodes: imageFsInfo.Inodes,
InodesUsed: imageFsInodesUsed,
}
if !splitFileSystem {
return fsStats, fsStats, nil
}
containerFs := imageStats.ContainerFilesystems[0]
var containerFsInodesUsed *uint64
if containerFsInfo.Inodes != nil && containerFsInfo.InodesFree != nil {
containerFsIU := *containerFsInfo.Inodes - *containerFsInfo.InodesFree
containerFsInodesUsed = &containerFsIU
}
fsContainerStats := &statsapi.FsStats{
Time: metav1.NewTime(containerFsInfo.Timestamp),
AvailableBytes: &containerFsInfo.Available,
CapacityBytes: &containerFsInfo.Capacity,
UsedBytes: &containerFs.UsedBytes.Value,
InodesFree: containerFsInfo.InodesFree,
Inodes: containerFsInfo.Inodes,
InodesUsed: containerFsInodesUsed,
}
return fsStats, fsContainerStats, nil
} }
// ImageFsDevice returns name of the device where the image filesystem locates, // ImageFsDevice returns name of the device where the image filesystem locates,

View File

@ -28,7 +28,11 @@ import (
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/features"
cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing" cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
@ -250,7 +254,6 @@ func TestCadvisorListPodStats(t *testing.T) {
mockCadvisor.EXPECT().ImagesFsInfo().Return(imagefs, nil) mockCadvisor.EXPECT().ImagesFsInfo().Return(imagefs, nil)
mockRuntime := containertest.NewMockRuntime(mockCtrl) mockRuntime := containertest.NewMockRuntime(mockCtrl)
mockRuntime.EXPECT().ImageStats(ctx).Return(&kubecontainer.ImageStats{TotalStorageBytes: 123}, nil).AnyTimes()
ephemeralVolumes := []statsapi.VolumeStats{getPodVolumeStats(seedEphemeralVolume1, "ephemeralVolume1"), ephemeralVolumes := []statsapi.VolumeStats{getPodVolumeStats(seedEphemeralVolume1, "ephemeralVolume1"),
getPodVolumeStats(seedEphemeralVolume2, "ephemeralVolume2")} getPodVolumeStats(seedEphemeralVolume2, "ephemeralVolume2")}
@ -520,7 +523,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
assert.Nil(t, ps.Network) assert.Nil(t, ps.Network)
} }
func TestCadvisorImagesFsStats(t *testing.T) { func TestCadvisorImagesFsStatsKubeletSeparateDiskOff(t *testing.T) {
ctx := context.Background() ctx := context.Background()
mockCtrl := gomock.NewController(t) mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish() defer mockCtrl.Finish()
@ -534,11 +537,13 @@ func TestCadvisorImagesFsStats(t *testing.T) {
imageStats = &kubecontainer.ImageStats{TotalStorageBytes: 100} imageStats = &kubecontainer.ImageStats{TotalStorageBytes: 100}
) )
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, false)()
mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil) mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil)
mockRuntime.EXPECT().ImageStats(ctx).Return(imageStats, nil) mockRuntime.EXPECT().ImageStats(ctx).Return(imageStats, nil)
provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider()) provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider())
stats, err := provider.ImageFsStats(ctx) stats, _, err := provider.ImageFsStats(ctx)
assert.NoError(err) assert.NoError(err)
assert.Equal(imageFsInfo.Timestamp, stats.Time.Time) assert.Equal(imageFsInfo.Timestamp, stats.Time.Time)
@ -550,6 +555,110 @@ func TestCadvisorImagesFsStats(t *testing.T) {
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed) assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed)
} }
func TestCadvisorImagesFsStats(t *testing.T) {
ctx := context.Background()
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
var (
assert = assert.New(t)
mockCadvisor = cadvisortest.NewMockInterface(mockCtrl)
mockRuntime = containertest.NewMockRuntime(mockCtrl)
seed = 1000
imageFsInfo = getTestFsInfo(seed)
)
imageFsInfoCRI := &runtimeapi.FilesystemUsage{
Timestamp: imageFsInfo.Timestamp.Unix(),
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "images"},
UsedBytes: &runtimeapi.UInt64Value{Value: imageFsInfo.Usage},
InodesUsed: &runtimeapi.UInt64Value{Value: *imageFsInfo.Inodes},
}
imageFsInfoResponse := &runtimeapi.ImageFsInfoResponse{
ImageFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI},
ContainerFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI},
}
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, true)()
mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil)
mockCadvisor.EXPECT().ContainerFsInfo().Return(imageFsInfo, nil)
mockRuntime.EXPECT().ImageFsInfo(ctx).Return(imageFsInfoResponse, nil)
provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider())
stats, containerfs, err := provider.ImageFsStats(ctx)
assert.NoError(err)
assert.Equal(imageFsInfo.Timestamp, stats.Time.Time)
assert.Equal(imageFsInfo.Available, *stats.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *stats.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, stats.InodesFree)
assert.Equal(imageFsInfo.Inodes, stats.Inodes)
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed)
assert.Equal(imageFsInfo.Timestamp, containerfs.Time.Time)
assert.Equal(imageFsInfo.Available, *containerfs.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *containerfs.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, containerfs.InodesFree)
assert.Equal(imageFsInfo.Inodes, containerfs.Inodes)
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *containerfs.InodesUsed)
}
func TestCadvisorSplitImagesFsStats(t *testing.T) {
ctx := context.Background()
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
var (
assert = assert.New(t)
mockCadvisor = cadvisortest.NewMockInterface(mockCtrl)
mockRuntime = containertest.NewMockRuntime(mockCtrl)
seed = 1000
imageFsInfo = getTestFsInfo(seed)
containerSeed = 1001
containerFsInfo = getTestFsInfo(containerSeed)
)
imageFsInfoCRI := &runtimeapi.FilesystemUsage{
Timestamp: imageFsInfo.Timestamp.Unix(),
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "images"},
UsedBytes: &runtimeapi.UInt64Value{Value: imageFsInfo.Usage},
InodesUsed: &runtimeapi.UInt64Value{Value: *imageFsInfo.Inodes},
}
containerFsInfoCRI := &runtimeapi.FilesystemUsage{
Timestamp: containerFsInfo.Timestamp.Unix(),
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "containers"},
UsedBytes: &runtimeapi.UInt64Value{Value: containerFsInfo.Usage},
InodesUsed: &runtimeapi.UInt64Value{Value: *containerFsInfo.Inodes},
}
imageFsInfoResponse := &runtimeapi.ImageFsInfoResponse{
ImageFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI},
ContainerFilesystems: []*runtimeapi.FilesystemUsage{containerFsInfoCRI},
}
mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil)
mockCadvisor.EXPECT().ContainerFsInfo().Return(containerFsInfo, nil)
mockRuntime.EXPECT().ImageFsInfo(ctx).Return(imageFsInfoResponse, nil)
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, true)()
provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider())
stats, containerfs, err := provider.ImageFsStats(ctx)
assert.NoError(err)
assert.Equal(imageFsInfo.Timestamp, stats.Time.Time)
assert.Equal(imageFsInfo.Available, *stats.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *stats.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, stats.InodesFree)
assert.Equal(imageFsInfo.Inodes, stats.Inodes)
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed)
assert.Equal(containerFsInfo.Timestamp, containerfs.Time.Time)
assert.Equal(containerFsInfo.Available, *containerfs.AvailableBytes)
assert.Equal(containerFsInfo.Capacity, *containerfs.CapacityBytes)
assert.Equal(containerFsInfo.InodesFree, containerfs.InodesFree)
assert.Equal(containerFsInfo.Inodes, containerfs.Inodes)
assert.Equal(*containerFsInfo.Inodes-*containerFsInfo.InodesFree, *containerfs.InodesUsed)
}
func TestCadvisorListPodStatsWhenContainerLogFound(t *testing.T) { func TestCadvisorListPodStatsWhenContainerLogFound(t *testing.T) {
ctx := context.Background() ctx := context.Background()
const ( const (

View File

@ -385,10 +385,10 @@ func (p *criStatsProvider) getPodAndContainerMaps(ctx context.Context) (map[stri
} }
// ImageFsStats returns the stats of the image filesystem. // ImageFsStats returns the stats of the image filesystem.
func (p *criStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) { func (p *criStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *statsapi.FsStats, containerFsRet *statsapi.FsStats, errRet error) {
resp, err := p.imageService.ImageFsInfo(ctx) resp, err := p.imageService.ImageFsInfo(ctx)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
// CRI may return the stats of multiple image filesystems but we only // CRI may return the stats of multiple image filesystems but we only
@ -396,31 +396,32 @@ func (p *criStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats,
// //
// TODO(yguo0905): Support returning stats of multiple image filesystems. // TODO(yguo0905): Support returning stats of multiple image filesystems.
if len(resp.GetImageFilesystems()) == 0 { if len(resp.GetImageFilesystems()) == 0 {
return nil, fmt.Errorf("imageFs information is unavailable") return nil, nil, fmt.Errorf("imageFs information is unavailable")
} }
fs := resp.GetImageFilesystems()[0] fs := resp.GetImageFilesystems()[0]
s := &statsapi.FsStats{ imageFsRet = &statsapi.FsStats{
Time: metav1.NewTime(time.Unix(0, fs.Timestamp)), Time: metav1.NewTime(time.Unix(0, fs.Timestamp)),
UsedBytes: &fs.UsedBytes.Value, UsedBytes: &fs.UsedBytes.Value,
} }
if fs.InodesUsed != nil { if fs.InodesUsed != nil {
s.InodesUsed = &fs.InodesUsed.Value imageFsRet.InodesUsed = &fs.InodesUsed.Value
} }
imageFsInfo, err := p.getFsInfo(fs.GetFsId()) imageFsInfo, err := p.getFsInfo(fs.GetFsId())
if err != nil { if err != nil {
return nil, fmt.Errorf("get filesystem info: %w", err) return nil, nil, fmt.Errorf("get filesystem info: %w", err)
} }
if imageFsInfo != nil { if imageFsInfo != nil {
// The image filesystem id is unknown to the local node or there's // The image filesystem id is unknown to the local node or there's
// an error on retrieving the stats. In these cases, we omit those // an error on retrieving the stats. In these cases, we omit those
// stats and return the best-effort partial result. See // stats and return the best-effort partial result. See
// https://github.com/kubernetes/heapster/issues/1793. // https://github.com/kubernetes/heapster/issues/1793.
s.AvailableBytes = &imageFsInfo.Available imageFsRet.AvailableBytes = &imageFsInfo.Available
s.CapacityBytes = &imageFsInfo.Capacity imageFsRet.CapacityBytes = &imageFsInfo.Capacity
s.InodesFree = imageFsInfo.InodesFree imageFsRet.InodesFree = imageFsInfo.InodesFree
s.Inodes = imageFsInfo.Inodes imageFsRet.Inodes = imageFsInfo.Inodes
} }
return s, nil // TODO: For CRI Stats Provider we don't support separate disks yet.
return imageFsRet, imageFsRet, nil
} }
// ImageFsDevice returns name of the device where the image filesystem locates, // ImageFsDevice returns name of the device where the image filesystem locates,

View File

@ -773,7 +773,7 @@ func TestCRIImagesFsStats(t *testing.T) {
false, false,
) )
stats, err := provider.ImageFsStats(ctx) stats, containerStats, err := provider.ImageFsStats(ctx)
assert := assert.New(t) assert := assert.New(t)
assert.NoError(err) assert.NoError(err)
@ -784,6 +784,15 @@ func TestCRIImagesFsStats(t *testing.T) {
assert.Equal(imageFsInfo.Inodes, stats.Inodes) assert.Equal(imageFsInfo.Inodes, stats.Inodes)
assert.Equal(imageFsUsage.UsedBytes.Value, *stats.UsedBytes) assert.Equal(imageFsUsage.UsedBytes.Value, *stats.UsedBytes)
assert.Equal(imageFsUsage.InodesUsed.Value, *stats.InodesUsed) assert.Equal(imageFsUsage.InodesUsed.Value, *stats.InodesUsed)
assert.Equal(imageFsUsage.Timestamp, containerStats.Time.UnixNano())
assert.Equal(imageFsInfo.Available, *containerStats.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *containerStats.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, containerStats.InodesFree)
assert.Equal(imageFsInfo.Inodes, containerStats.Inodes)
assert.Equal(imageFsUsage.UsedBytes.Value, *containerStats.UsedBytes)
assert.Equal(imageFsUsage.InodesUsed.Value, *containerStats.InodesUsed)
} }
func makeFakePodSandbox(name, uid, namespace string, terminated bool) *critest.FakePodSandbox { func makeFakePodSandbox(name, uid, namespace string, terminated bool) *critest.FakePodSandbox {

View File

@ -31,6 +31,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit" "k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
"k8s.io/kubernetes/pkg/kubelet/status" "k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types" kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/utils/ptr"
) )
// PodManager is the subset of methods the manager needs to observe the actual state of the kubelet. // PodManager is the subset of methods the manager needs to observe the actual state of the kubelet.
@ -99,7 +100,7 @@ type containerStatsProvider interface {
ListPodStats(ctx context.Context) ([]statsapi.PodStats, error) ListPodStats(ctx context.Context) ([]statsapi.PodStats, error)
ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([]statsapi.PodStats, error) ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([]statsapi.PodStats, error)
ListPodCPUAndMemoryStats(ctx context.Context) ([]statsapi.PodStats, error) ListPodCPUAndMemoryStats(ctx context.Context) ([]statsapi.PodStats, error)
ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error)
ImageFsDevice(ctx context.Context) (string, error) ImageFsDevice(ctx context.Context) (string, error)
} }
@ -203,6 +204,7 @@ func (p *Provider) GetRawContainerInfo(containerName string, req *cadvisorapiv1.
} }
// HasDedicatedImageFs returns true if a dedicated image filesystem exists for storing images. // HasDedicatedImageFs returns true if a dedicated image filesystem exists for storing images.
// KEP Issue Number 4191: Enhanced this to allow for the containers to be separate from images.
func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) { func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) {
device, err := p.containerStatsProvider.ImageFsDevice(ctx) device, err := p.containerStatsProvider.ImageFsDevice(ctx)
if err != nil { if err != nil {
@ -212,5 +214,38 @@ func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) {
if err != nil { if err != nil {
return false, err return false, err
} }
// KEP Enhancement: DedicatedImageFs can mean either container or image fs are separate from root
// CAdvisor reports this a bit differently than Container runtimes
if device == rootFsInfo.Device {
imageFs, containerFs, err := p.ImageFsStats(ctx)
if err != nil {
return false, err
}
if !equalFileSystems(imageFs, containerFs) {
return true, nil
}
}
return device != rootFsInfo.Device, nil return device != rootFsInfo.Device, nil
} }
func equalFileSystems(a, b *statsapi.FsStats) bool {
if a == nil || b == nil {
return false
}
if !ptr.Equal(a.AvailableBytes, b.AvailableBytes) {
return false
}
if !ptr.Equal(a.CapacityBytes, b.CapacityBytes) {
return false
}
if !ptr.Equal(a.InodesUsed, b.InodesUsed) {
return false
}
if !ptr.Equal(a.InodesFree, b.InodesFree) {
return false
}
if !ptr.Equal(a.Inodes, b.Inodes) {
return false
}
return true
}

View File

@ -417,21 +417,34 @@ func TestHasDedicatedImageFs(t *testing.T) {
ctx := context.Background() ctx := context.Background()
mockCtrl := gomock.NewController(t) mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish() defer mockCtrl.Finish()
imageStatsExpected := &statsapi.FsStats{AvailableBytes: uint64Ptr(1)}
for desc, test := range map[string]struct { for desc, test := range map[string]struct {
rootfsDevice string rootfsDevice string
imagefsDevice string imagefsDevice string
dedicated bool dedicated bool
imageFsStats *statsapi.FsStats
containerFsStats *statsapi.FsStats
}{ }{
"dedicated device for image filesystem": { "dedicated device for image filesystem": {
rootfsDevice: "root/device", rootfsDevice: "root/device",
imagefsDevice: "image/device", imagefsDevice: "image/device",
dedicated: true, dedicated: true,
imageFsStats: imageStatsExpected,
}, },
"shared device for image filesystem": { "shared device for image filesystem": {
rootfsDevice: "share/device", rootfsDevice: "share/device",
imagefsDevice: "share/device", imagefsDevice: "share/device",
dedicated: false, dedicated: false,
imageFsStats: imageStatsExpected,
containerFsStats: imageStatsExpected,
},
"split filesystem for images": {
rootfsDevice: "root/device",
imagefsDevice: "root/device",
dedicated: true,
imageFsStats: &statsapi.FsStats{AvailableBytes: uint64Ptr(1)},
containerFsStats: &statsapi.FsStats{AvailableBytes: uint64Ptr(2)},
}, },
} { } {
t.Logf("TestCase %q", desc) t.Logf("TestCase %q", desc)
@ -441,10 +454,12 @@ func TestHasDedicatedImageFs(t *testing.T) {
mockRuntimeCache = new(kubecontainertest.MockRuntimeCache) mockRuntimeCache = new(kubecontainertest.MockRuntimeCache)
) )
mockCadvisor.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{Device: test.rootfsDevice}, nil) mockCadvisor.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{Device: test.rootfsDevice}, nil)
provider := newStatsProvider(mockCadvisor, mockPodManager, mockRuntimeCache, fakeContainerStatsProvider{ provider := newStatsProvider(mockCadvisor, mockPodManager, mockRuntimeCache, fakeContainerStatsProvider{
device: test.imagefsDevice, device: test.imagefsDevice,
imageFs: test.imageFsStats,
containerFs: test.containerFsStats,
}) })
dedicated, err := provider.HasDedicatedImageFs(ctx) dedicated, err := provider.HasDedicatedImageFs(ctx)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, test.dedicated, dedicated) assert.Equal(t, test.dedicated, dedicated)
@ -761,6 +776,8 @@ func (o *fakeResourceAnalyzer) GetPodVolumeStats(uid types.UID) (serverstats.Pod
type fakeContainerStatsProvider struct { type fakeContainerStatsProvider struct {
device string device string
imageFs *statsapi.FsStats
containerFs *statsapi.FsStats
} }
func (p fakeContainerStatsProvider) ListPodStats(context.Context) ([]statsapi.PodStats, error) { func (p fakeContainerStatsProvider) ListPodStats(context.Context) ([]statsapi.PodStats, error) {
@ -775,8 +792,8 @@ func (p fakeContainerStatsProvider) ListPodCPUAndMemoryStats(context.Context) ([
return nil, fmt.Errorf("not implemented") return nil, fmt.Errorf("not implemented")
} }
func (p fakeContainerStatsProvider) ImageFsStats(context.Context) (*statsapi.FsStats, error) { func (p fakeContainerStatsProvider) ImageFsStats(context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) {
return nil, fmt.Errorf("not implemented") return p.imageFs, p.containerFs, nil
} }
func (p fakeContainerStatsProvider) ImageFsDevice(context.Context) (string, error) { func (p fakeContainerStatsProvider) ImageFsDevice(context.Context) (string, error) {

View File

@ -317,7 +317,16 @@ var _ = SIGDescribe("Summary API [NodeConformance]", func() {
"Inodes": bounded(1e4, 1e8), "Inodes": bounded(1e4, 1e8),
"InodesUsed": bounded(0, 1e8), "InodesUsed": bounded(0, 1e8),
}), }),
"ContainerFs": gomega.BeNil(), "ContainerFs": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"AvailableBytes": fsCapacityBounds,
"CapacityBytes": fsCapacityBounds,
// we assume we are not running tests on machines more than 10tb of disk
"UsedBytes": bounded(e2evolume.Kb, 10*e2evolume.Tb),
"InodesFree": bounded(1e4, 1e8),
"Inodes": bounded(1e4, 1e8),
"InodesUsed": bounded(0, 1e8),
}),
}), }),
"Rlimit": ptrMatchAllFields(gstruct.Fields{ "Rlimit": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge), "Time": recent(maxStatsAge),