Merge pull request #120616 from kannon92/kubelet-disk-api-changes

Kubelet disk api changes
This commit is contained in:
Kubernetes Prow Robot 2023-11-02 16:11:20 +01:00 committed by GitHub
commit 6c64593ba1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 3455 additions and 686 deletions

View File

@ -454,6 +454,12 @@ const (
// Enable POD resources API to return allocatable resources
KubeletPodResourcesGetAllocatable featuregate.Feature = "KubeletPodResourcesGetAllocatable"
// KubeletSeparateDiskGC enables Kubelet to garbage collection images/containers on different filesystems
// owner: @kannon92
// kep: https://kep.k8s.io/4191
// alpha: v1.29
KubeletSeparateDiskGC featuregate.Feature = "KubeletSeparateDiskGC"
// owner: @sallyom
// kep: https://kep.k8s.io/2832
// alpha: v1.25
@ -1088,6 +1094,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
KubeletPodResourcesGetAllocatable: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // GA in 1.28, remove in 1.30
KubeletSeparateDiskGC: {Default: false, PreRelease: featuregate.Alpha},
KubeletTracing: {Default: true, PreRelease: featuregate.Beta},
KubeProxyDrainingTerminatingNodes: {Default: false, PreRelease: featuregate.Alpha},

View File

@ -186,6 +186,14 @@ func (cc *cadvisorClient) getFsInfo(label string) (cadvisorapiv2.FsInfo, error)
return res[0], nil
}
func (cc *cadvisorClient) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
label, err := cc.imageFsInfoProvider.ContainerFsInfoLabel()
if err != nil {
return cadvisorapiv2.FsInfo{}, err
}
return cc.getFsInfo(label)
}
func (cc *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return cc.WatchForEvents(request)
}

View File

@ -57,3 +57,32 @@ func TestImageFsInfoLabel(t *testing.T) {
})
}
}
func TestContainerFsInfoLabel(t *testing.T) {
testcases := []struct {
description string
runtime string
runtimeEndpoint string
expectedLabel string
expectedError error
}{{
description: "LabelCrioWriteableImages should be returned",
runtimeEndpoint: crio.CrioSocket,
expectedLabel: LabelCrioContainers,
expectedError: nil,
}, {
description: "Cannot find valid imagefs label",
runtimeEndpoint: "",
expectedLabel: "",
expectedError: fmt.Errorf("no containerfs label for configured runtime"),
}}
for _, tc := range testcases {
t.Run(tc.description, func(t *testing.T) {
infoProvider := NewImageFsInfoProvider(tc.runtimeEndpoint)
label, err := infoProvider.ContainerFsInfoLabel()
assert.Equal(t, tc.expectedLabel, label)
assert.Equal(t, tc.expectedError, err)
})
}
}

View File

@ -79,6 +79,10 @@ func (cu *cadvisorUnsupported) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, errUnsupported
}
func (cu *cadvisorUnsupported) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, errUnsupported
}
func (cu *cadvisorUnsupported) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return nil, errUnsupported
}

View File

@ -79,6 +79,10 @@ func (cu *cadvisorClient) ImagesFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}
func (cu *cadvisorClient) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}
func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cu.GetDirFsInfo(cu.rootPath)
}

View File

@ -26,6 +26,11 @@ import (
cadvisorfs "github.com/google/cadvisor/fs"
)
// LabelCrioContainers is a label to allow for cadvisor to track writeable layers
// separately from read-only layers.
// Once CAdvisor upstream changes are merged, we should remove this constant
const LabelCrioContainers string = "crio-containers"
// imageFsInfoProvider knows how to translate the configured runtime
// to its file system label for images.
type imageFsInfoProvider struct {
@ -35,15 +40,28 @@ type imageFsInfoProvider struct {
// ImageFsInfoLabel returns the image fs label for the configured runtime.
// For remote runtimes, it handles additional runtimes natively understood by cAdvisor.
func (i *imageFsInfoProvider) ImageFsInfoLabel() (string, error) {
// This is a temporary workaround to get stats for cri-o from cadvisor
// and should be removed.
// Related to https://github.com/kubernetes/kubernetes/issues/51798
if strings.HasSuffix(i.runtimeEndpoint, CrioSocketSuffix) {
if detectCrioWorkaround(i) {
return cadvisorfs.LabelCrioImages, nil
}
return "", fmt.Errorf("no imagefs label for configured runtime")
}
// ContainerFsInfoLabel returns the container fs label for the configured runtime.
// For remote runtimes, it handles addition runtimes natively understood by cAdvisor.
func (i *imageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
if detectCrioWorkaround(i) {
return LabelCrioContainers, nil
}
return "", fmt.Errorf("no containerfs label for configured runtime")
}
// This is a temporary workaround to get stats for cri-o from cadvisor
// and should be removed.
// Related to https://github.com/kubernetes/kubernetes/issues/51798
func detectCrioWorkaround(i *imageFsInfoProvider) bool {
return strings.HasSuffix(i.runtimeEndpoint, CrioSocketSuffix)
}
// NewImageFsInfoProvider returns a provider for the specified runtime configuration.
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
return &imageFsInfoProvider{runtimeEndpoint: runtimeEndpoint}

View File

@ -29,6 +29,10 @@ func (i *unsupportedImageFsInfoProvider) ImageFsInfoLabel() (string, error) {
return "", errors.New("unsupported")
}
func (i *unsupportedImageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
return "", errors.New("unsupported")
}
// NewImageFsInfoProvider returns a provider for the specified runtime configuration.
func NewImageFsInfoProvider(runtimeEndpoint string) ImageFsInfoProvider {
return &unsupportedImageFsInfoProvider{}

View File

@ -101,6 +101,11 @@ func (c *Fake) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}
// ContainerFsInfo is a fake implementation of Interface.ContainerFsInfo.
func (c *Fake) ContainerFsInfo() (cadvisorapiv2.FsInfo, error) {
return cadvisorapiv2.FsInfo{}, nil
}
// WatchEvents is a fake implementation of Interface.WatchEvents.
func (c *Fake) WatchEvents(request *events.Request) (*events.EventChannel, error) {
return new(events.EventChannel), nil

View File

@ -52,6 +52,21 @@ func (m *MockInterface) EXPECT() *MockInterfaceMockRecorder {
return m.recorder
}
// ContainerFsInfo mocks base method.
func (m *MockInterface) ContainerFsInfo() (v2.FsInfo, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ContainerFsInfo")
ret0, _ := ret[0].(v2.FsInfo)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ContainerFsInfo indicates an expected call of ContainerFsInfo.
func (mr *MockInterfaceMockRecorder) ContainerFsInfo() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ContainerFsInfo", reflect.TypeOf((*MockInterface)(nil).ContainerFsInfo))
}
// ContainerInfo mocks base method.
func (m *MockInterface) ContainerInfo(name string, req *v1.ContainerInfoRequest) (*v1.ContainerInfo, error) {
m.ctrl.T.Helper()
@ -254,6 +269,21 @@ func (m *MockImageFsInfoProvider) EXPECT() *MockImageFsInfoProviderMockRecorder
return m.recorder
}
// ContainerFsInfoLabel mocks base method.
func (m *MockImageFsInfoProvider) ContainerFsInfoLabel() (string, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ContainerFsInfoLabel")
ret0, _ := ret[0].(string)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ContainerFsInfoLabel indicates an expected call of ContainerFsInfoLabel.
func (mr *MockImageFsInfoProviderMockRecorder) ContainerFsInfoLabel() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ContainerFsInfoLabel", reflect.TypeOf((*MockImageFsInfoProvider)(nil).ContainerFsInfoLabel))
}
// ImageFsInfoLabel mocks base method.
func (m *MockImageFsInfoProvider) ImageFsInfoLabel() (string, error) {
m.ctrl.T.Helper()

View File

@ -41,6 +41,10 @@ type Interface interface {
// Returns usage information about the root filesystem.
RootFsInfo() (cadvisorapiv2.FsInfo, error)
// Returns usage information about the writeable layer.
// KEP 4191 can separate the image filesystem
ContainerFsInfo() (cadvisorapiv2.FsInfo, error)
// Get events streamed through passedChannel that fit the request.
WatchEvents(request *events.Request) (*events.EventChannel, error)
@ -52,4 +56,6 @@ type Interface interface {
type ImageFsInfoProvider interface {
// ImageFsInfoLabel returns the label cAdvisor should use to find the filesystem holding container images.
ImageFsInfoLabel() (string, error)
// In split image filesystem this will be different from ImageFsInfoLabel
ContainerFsInfoLabel() (string, error)
}

View File

@ -45,6 +45,8 @@ type GC interface {
GarbageCollect(ctx context.Context) error
// Deletes all unused containers, including containers belonging to pods that are terminated but not deleted
DeleteAllUnusedContainers(ctx context.Context) error
// IsContainerFsSeparateFromImageFs tells if writeable layer and read-only layer are separate.
IsContainerFsSeparateFromImageFs(ctx context.Context) bool
}
// SourcesReadyProvider knows how to determine if configuration sources are ready
@ -86,3 +88,22 @@ func (cgc *realContainerGC) DeleteAllUnusedContainers(ctx context.Context) error
klog.InfoS("Attempting to delete unused containers")
return cgc.runtime.GarbageCollect(ctx, cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
}
func (cgc *realContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool {
resp, err := cgc.runtime.ImageFsInfo(ctx)
if err != nil {
return false
}
// These fields can be empty if CRI implementation didn't populate.
if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 {
return false
}
// KEP 4191 explains that multiple filesystems for images and containers is not
// supported at the moment.
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4191-split-image-filesystem#comment-on-future-extensions
// for work needed to support multiple filesystems.
if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil {
return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint
}
return false
}

View File

@ -0,0 +1,96 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package container_test
import (
"context"
"reflect"
"testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
. "k8s.io/kubernetes/pkg/kubelet/container"
ctest "k8s.io/kubernetes/pkg/kubelet/container/testing"
)
func TestIsContainerFsSeparateFromImageFs(t *testing.T) {
runtime := &ctest.FakeRuntime{}
fakeSources := ctest.NewFakeReadyProvider()
gcContainer, err := NewContainerGC(runtime, GCPolicy{}, fakeSources)
if err != nil {
t.Errorf("unexpected error")
}
cases := []struct {
name string
containerFs []*runtimeapi.FilesystemUsage
imageFs []*runtimeapi.FilesystemUsage
writeableSeparateFromReadOnly bool
}{
{
name: "Only images",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "images and containers",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "container"}}},
writeableSeparateFromReadOnly: true,
},
{
name: "same filesystem",
imageFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "Only containers",
containerFs: []*runtimeapi.FilesystemUsage{{FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "image"}}},
writeableSeparateFromReadOnly: false,
},
{
name: "neither are specified",
writeableSeparateFromReadOnly: false,
},
{
name: "both are empty arrays",
writeableSeparateFromReadOnly: false,
containerFs: []*runtimeapi.FilesystemUsage{},
imageFs: []*runtimeapi.FilesystemUsage{},
},
{
name: "FsId does not exist",
writeableSeparateFromReadOnly: false,
containerFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}},
imageFs: []*runtimeapi.FilesystemUsage{{UsedBytes: &runtimeapi.UInt64Value{Value: 10}}},
},
}
for _, tc := range cases {
runtime.SetContainerFsStats(tc.containerFs)
runtime.SetImageFsStats(tc.imageFs)
actualCommand := gcContainer.IsContainerFsSeparateFromImageFs(context.TODO())
if e, a := tc.writeableSeparateFromReadOnly, actualCommand; !reflect.DeepEqual(e, a) {
t.Errorf("%v: unexpected value; expected %v, got %v", tc.name, e, a)
}
runtime.SetContainerFsStats(nil)
runtime.SetImageFsStats(nil)
}
}

View File

@ -160,6 +160,8 @@ type ImageService interface {
RemoveImage(ctx context.Context, image ImageSpec) error
// ImageStats returns Image statistics.
ImageStats(ctx context.Context) (*ImageStats, error)
// ImageFsInfo returns a list of file systems for containers/images
ImageFsInfo(ctx context.Context) (*runtimeapi.ImageFsInfoResponse, error)
}
// Attacher interface allows to attach a container.

View File

@ -0,0 +1,36 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testing
import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
// FakeReadyProvider implements a fake ready provider
type FakeReadyProvider struct {
kubecontainer.SourcesReadyProvider
}
// AllReady notifies caller that the Fake Provider is ready.
func (frp *FakeReadyProvider) AllReady() bool {
return true
}
// NewFakeReadyProvider creates a FakeReadyProvider object
func NewFakeReadyProvider() kubecontainer.SourcesReadyProvider {
return &FakeReadyProvider{}
}

View File

@ -45,6 +45,8 @@ type FakeRuntime struct {
PodList []*FakePod
AllPodList []*FakePod
ImageList []kubecontainer.Image
ImageFsStats []*runtimeapi.FilesystemUsage
ContainerFsStats []*runtimeapi.FilesystemUsage
APIPodStatus v1.PodStatus
PodStatus kubecontainer.PodStatus
StartedPods []string
@ -422,6 +424,16 @@ func (f *FakeRuntime) ListPodSandboxMetrics(_ context.Context) ([]*runtimeapi.Po
return nil, f.Err
}
// SetContainerFsStats sets the containerFsStats for dependency injection.
func (f *FakeRuntime) SetContainerFsStats(val []*runtimeapi.FilesystemUsage) {
f.ContainerFsStats = val
}
// SetImageFsStats sets the ImageFsStats for dependency injection.
func (f *FakeRuntime) SetImageFsStats(val []*runtimeapi.FilesystemUsage) {
f.ImageFsStats = val
}
func (f *FakeRuntime) ImageStats(_ context.Context) (*kubecontainer.ImageStats, error) {
f.Lock()
defer f.Unlock()
@ -430,6 +442,20 @@ func (f *FakeRuntime) ImageStats(_ context.Context) (*kubecontainer.ImageStats,
return nil, f.Err
}
// ImageFsInfo returns a ImageFsInfoResponse given the DI injected values of ImageFsStats
// and ContainerFsStats.
func (f *FakeRuntime) ImageFsInfo(_ context.Context) (*runtimeapi.ImageFsInfoResponse, error) {
f.Lock()
defer f.Unlock()
f.CalledFunctions = append(f.CalledFunctions, "ImageFsInfo")
resp := &runtimeapi.ImageFsInfoResponse{
ImageFilesystems: f.ImageFsStats,
ContainerFilesystems: f.ContainerFsStats,
}
return resp, f.Err
}
func (f *FakeStreamingRuntime) GetExec(_ context.Context, id kubecontainer.ContainerID, cmd []string, stdin, stdout, stderr, tty bool) (*url.URL, error) {
f.Lock()
defer f.Unlock()

View File

@ -242,6 +242,21 @@ func (mr *MockRuntimeMockRecorder) GetPods(ctx, all interface{}) *gomock.Call {
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPods", reflect.TypeOf((*MockRuntime)(nil).GetPods), ctx, all)
}
// ImageFsInfo mocks base method.
func (m *MockRuntime) ImageFsInfo(ctx context.Context) (*v10.ImageFsInfoResponse, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ImageFsInfo", ctx)
ret0, _ := ret[0].(*v10.ImageFsInfoResponse)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ImageFsInfo indicates an expected call of ImageFsInfo.
func (mr *MockRuntimeMockRecorder) ImageFsInfo(ctx interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ImageFsInfo", reflect.TypeOf((*MockRuntime)(nil).ImageFsInfo), ctx)
}
// ImageStats mocks base method.
func (m *MockRuntime) ImageStats(ctx context.Context) (*container.ImageStats, error) {
m.ctrl.T.Helper()
@ -523,6 +538,21 @@ func (mr *MockImageServiceMockRecorder) GetImageRef(ctx, image interface{}) *gom
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetImageRef", reflect.TypeOf((*MockImageService)(nil).GetImageRef), ctx, image)
}
// ImageFsInfo mocks base method.
func (m *MockImageService) ImageFsInfo(ctx context.Context) (*v10.ImageFsInfoResponse, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ImageFsInfo", ctx)
ret0, _ := ret[0].(*v10.ImageFsInfoResponse)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// ImageFsInfo indicates an expected call of ImageFsInfo.
func (mr *MockImageServiceMockRecorder) ImageFsInfo(ctx interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ImageFsInfo", reflect.TypeOf((*MockImageService)(nil).ImageFsInfo), ctx)
}
// ImageStats mocks base method.
func (m *MockImageService) ImageStats(ctx context.Context) (*container.ImageStats, error) {
m.ctrl.T.Helper()

View File

@ -32,10 +32,25 @@ const (
SignalNodeFsAvailable Signal = "nodefs.available"
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers.
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images layers.
// If the container filesystem and image filesystem are not separate,
// than imagefs can store both image layers and writeable layers.
SignalImageFsAvailable Signal = "imagefs.available"
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writable layers.
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images layers.
// If the container filesystem and image filesystem are not separate,
// than imagefs can store both image layers and writeable layers.
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
// SignalContainerFsAvailable is amount of storage available on filesystem that container runtime uses for container writable layers.
// In case of a single filesystem, containerfs=nodefs.
// In case of a image filesystem, containerfs=imagefs.
// We will override user settings and set to either imagefs or nodefs depending on configuration.
SignalContainerFsAvailable Signal = "containerfs.available"
// SignalContainerFsInodesFree is amount of inodes available on filesystem that container runtime uses for container writable layers.
// SignalContainerFsAvailable is amount of storage available on filesystem that container runtime uses for container writable layers.
// In case of a single filesystem, containerfs=nodefs.
// In case of a image filesystem, containerfs=imagefs.
// We will override user settings and set to either imagefs or nodefs depending on configuration.
SignalContainerFsInodesFree Signal = "containerfs.inodesFree"
// SignalAllocatableMemoryAvailable is amount of memory available for pod allocation (i.e. allocatable - workingSet (of pods), in bytes.
SignalAllocatableMemoryAvailable Signal = "allocatableMemory.available"
// SignalPIDAvailable is amount of PID available for pod allocation
@ -63,6 +78,8 @@ var OpForSignal = map[Signal]ThresholdOperator{
SignalNodeFsInodesFree: OpLessThan,
SignalImageFsAvailable: OpLessThan,
SignalImageFsInodesFree: OpLessThan,
SignalContainerFsAvailable: OpLessThan,
SignalContainerFsInodesFree: OpLessThan,
SignalAllocatableMemoryAvailable: OpLessThan,
SignalPIDAvailable: OpLessThan,
}

View File

@ -94,6 +94,8 @@ type managerImpl struct {
lastObservations signalObservations
// dedicatedImageFs indicates if imagefs is on a separate device from the rootfs
dedicatedImageFs *bool
// splitContainerImageFs indicates if containerfs is on a separate device from imagefs
splitContainerImageFs *bool
// thresholdNotifiers is a list of memory threshold notifiers which each notify for a memory eviction threshold
thresholdNotifiers []ThresholdNotifier
// thresholdsLastUpdated is the last time the thresholdNotifiers were updated.
@ -129,6 +131,7 @@ func NewManager(
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
dedicatedImageFs: nil,
splitContainerImageFs: nil,
thresholdNotifiers: []ThresholdNotifier{},
localStorageCapacityIsolation: localStorageCapacityIsolation,
}
@ -197,10 +200,14 @@ func (m *managerImpl) Start(diskInfoProvider DiskInfoProvider, podFunc ActivePod
// start the eviction manager monitoring
go func() {
for {
if evictedPods := m.synchronize(diskInfoProvider, podFunc); evictedPods != nil {
evictedPods, err := m.synchronize(diskInfoProvider, podFunc)
if evictedPods != nil && err == nil {
klog.InfoS("Eviction manager: pods evicted, waiting for pod to be cleaned up", "pods", klog.KObjSlice(evictedPods))
m.waitForPodsCleanup(podCleanedUpFunc, evictedPods)
} else {
if err != nil {
klog.ErrorS(err, "Eviction manager: failed to synchronize")
}
time.Sleep(monitoringInterval)
}
}
@ -230,33 +237,50 @@ func (m *managerImpl) IsUnderPIDPressure() bool {
// synchronize is the main control loop that enforces eviction thresholds.
// Returns the pod that was killed, or nil if no pod was killed.
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod {
func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) ([]*v1.Pod, error) {
ctx := context.Background()
// if we have nothing to do, just return
thresholds := m.config.Thresholds
if len(thresholds) == 0 && !m.localStorageCapacityIsolation {
return nil
return nil, nil
}
klog.V(3).InfoS("Eviction manager: synchronize housekeeping")
// build the ranking functions (if not yet known)
// TODO: have a function in cadvisor that lets us know if global housekeeping has completed
if m.dedicatedImageFs == nil {
hasImageFs, ok := diskInfoProvider.HasDedicatedImageFs(ctx)
if ok != nil {
return nil
hasImageFs, splitDiskError := diskInfoProvider.HasDedicatedImageFs(ctx)
if splitDiskError != nil {
klog.ErrorS(splitDiskError, "Eviction manager: failed to get HasDedicatedImageFs")
return nil, fmt.Errorf("eviction manager: failed to get HasDedicatedImageFs: %v", splitDiskError)
}
m.dedicatedImageFs = &hasImageFs
m.signalToRankFunc = buildSignalToRankFunc(hasImageFs)
m.signalToNodeReclaimFuncs = buildSignalToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs)
splitContainerImageFs := m.containerGC.IsContainerFsSeparateFromImageFs(ctx)
// If we are a split filesystem but the feature is turned off
// we should return an error.
// This is a bad state.
if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletSeparateDiskGC) && splitContainerImageFs {
splitDiskError := fmt.Errorf("KubeletSeparateDiskGC is turned off but we still have a split filesystem")
return nil, splitDiskError
}
thresholds, err := UpdateContainerFsThresholds(m.config.Thresholds, hasImageFs, splitContainerImageFs)
m.config.Thresholds = thresholds
if err != nil {
klog.ErrorS(err, "eviction manager: found conflicting containerfs eviction. Ignoring.")
}
m.splitContainerImageFs = &splitContainerImageFs
m.signalToRankFunc = buildSignalToRankFunc(hasImageFs, splitContainerImageFs)
m.signalToNodeReclaimFuncs = buildSignalToNodeReclaimFuncs(m.imageGC, m.containerGC, hasImageFs, splitContainerImageFs)
}
klog.V(3).InfoS("FileSystem detection", "DedicatedImageFs", m.dedicatedImageFs, "SplitImageFs", m.splitContainerImageFs)
activePods := podFunc()
updateStats := true
summary, err := m.summaryProvider.Get(ctx, updateStats)
if err != nil {
klog.ErrorS(err, "Eviction manager: failed to get summary stats")
return nil
return nil, nil
}
if m.clock.Since(m.thresholdsLastUpdated) > notifierRefreshInterval {
@ -324,20 +348,20 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// If eviction happens in localStorageEviction function, skip the rest of eviction action
if m.localStorageCapacityIsolation {
if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 {
return evictedPods
return evictedPods, nil
}
}
if len(thresholds) == 0 {
klog.V(3).InfoS("Eviction manager: no resources are starved")
return nil
return nil, nil
}
// rank the thresholds by eviction priority
sort.Sort(byEvictionPriority(thresholds))
thresholdToReclaim, resourceToReclaim, foundAny := getReclaimableThreshold(thresholds)
if !foundAny {
return nil
return nil, nil
}
klog.InfoS("Eviction manager: attempting to reclaim", "resourceName", resourceToReclaim)
@ -347,7 +371,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// check if there are node-level resources we can reclaim to reduce pressure before evicting end-user pods.
if m.reclaimNodeLevelResources(ctx, thresholdToReclaim.Signal, resourceToReclaim) {
klog.InfoS("Eviction manager: able to reduce resource pressure without evicting pods.", "resourceName", resourceToReclaim)
return nil
return nil, nil
}
klog.InfoS("Eviction manager: must evict pod(s) to reclaim", "resourceName", resourceToReclaim)
@ -356,13 +380,13 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
rank, ok := m.signalToRankFunc[thresholdToReclaim.Signal]
if !ok {
klog.ErrorS(nil, "Eviction manager: no ranking function for signal", "threshold", thresholdToReclaim.Signal)
return nil
return nil, nil
}
// the only candidates viable for eviction are those pods that had anything running.
if len(activePods) == 0 {
klog.ErrorS(nil, "Eviction manager: eviction thresholds have been met, but no pods are active to evict")
return nil
return nil, nil
}
// rank the running pods for eviction for the specified resource
@ -397,11 +421,11 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
}
if m.evictPod(pod, gracePeriodOverride, message, annotations, condition) {
metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc()
return []*v1.Pod{pod}
return []*v1.Pod{pod}, nil
}
}
klog.InfoS("Eviction manager: unable to evict any pods from the node")
return nil
return nil, nil
}
func (m *managerImpl) waitForPodsCleanup(podCleanedUpFunc PodCleanedUpFunc, pods []*v1.Pod) {

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@ limitations under the License.
package eviction
import (
"errors"
"fmt"
"sort"
"strconv"
@ -80,9 +81,11 @@ func init() {
signalToNodeCondition[evictionapi.SignalMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[evictionapi.SignalAllocatableMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[evictionapi.SignalImageFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalContainerFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalContainerFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalPIDAvailable] = v1.NodePIDPressure
// map signals to resources (and vice-versa)
@ -91,6 +94,8 @@ func init() {
signalToResource[evictionapi.SignalAllocatableMemoryAvailable] = v1.ResourceMemory
signalToResource[evictionapi.SignalImageFsAvailable] = v1.ResourceEphemeralStorage
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalContainerFsAvailable] = v1.ResourceEphemeralStorage
signalToResource[evictionapi.SignalContainerFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalNodeFsAvailable] = v1.ResourceEphemeralStorage
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceInodes
signalToResource[evictionapi.SignalPIDAvailable] = resourcePids
@ -172,6 +177,181 @@ func addAllocatableThresholds(thresholds []evictionapi.Threshold) []evictionapi.
return append(append([]evictionapi.Threshold{}, thresholds...), additionalThresholds...)
}
// UpdateContainerFsThresholds will add containerfs eviction hard/soft
// settings based on container runtime settings.
// Thresholds are parsed from evictionHard and evictionSoft limits so we will override.
// If there is a single filesystem, then containerfs settings are same as nodefs.
// If there is a separate image filesystem for both containers and images then containerfs settings are same as imagefs.
func UpdateContainerFsThresholds(thresholds []evictionapi.Threshold, imageFs, separateContainerImageFs bool) ([]evictionapi.Threshold, error) {
hardNodeFsDisk := evictionapi.Threshold{}
softNodeFsDisk := evictionapi.Threshold{}
hardNodeINodeDisk := evictionapi.Threshold{}
softNodeINodeDisk := evictionapi.Threshold{}
hardImageFsDisk := evictionapi.Threshold{}
softImageFsDisk := evictionapi.Threshold{}
hardImageINodeDisk := evictionapi.Threshold{}
softImageINodeDisk := evictionapi.Threshold{}
hardContainerFsDisk := -1
softContainerFsDisk := -1
hardContainerFsINodes := -1
softContainerFsINodes := -1
// Find the imagefs and nodefs thresholds
var err error = nil
for idx, threshold := range thresholds {
if threshold.Signal == evictionapi.SignalImageFsAvailable && isHardEvictionThreshold(threshold) {
hardImageFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalImageFsAvailable && !isHardEvictionThreshold(threshold) {
softImageFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalImageFsInodesFree && isHardEvictionThreshold(threshold) {
hardImageINodeDisk = threshold
}
if threshold.Signal == evictionapi.SignalImageFsInodesFree && !isHardEvictionThreshold(threshold) {
softImageINodeDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsAvailable && isHardEvictionThreshold(threshold) {
hardNodeFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsAvailable && !isHardEvictionThreshold(threshold) {
softNodeFsDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsInodesFree && isHardEvictionThreshold(threshold) {
hardNodeINodeDisk = threshold
}
if threshold.Signal == evictionapi.SignalNodeFsInodesFree && !isHardEvictionThreshold(threshold) {
softNodeINodeDisk = threshold
}
// We are logging a warning and we will override the settings.
// In this case this is safe because we do not support a separate container filesystem.
// So we want either limits to be same as nodefs or imagefs.
if threshold.Signal == evictionapi.SignalContainerFsAvailable && isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.available for hard eviction. ignoring"))
hardContainerFsDisk = idx
}
if threshold.Signal == evictionapi.SignalContainerFsAvailable && !isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.available for soft eviction. ignoring"))
softContainerFsDisk = idx
}
if threshold.Signal == evictionapi.SignalContainerFsInodesFree && isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.inodesFree for hard eviction. ignoring"))
hardContainerFsINodes = idx
}
if threshold.Signal == evictionapi.SignalContainerFsInodesFree && !isHardEvictionThreshold(threshold) {
err = errors.Join(fmt.Errorf("found containerfs.inodesFree for soft eviction. ignoring"))
softContainerFsINodes = idx
}
}
// Either split disk case (containerfs=nodefs) or single filesystem
if (imageFs && separateContainerImageFs) || (!imageFs && !separateContainerImageFs) {
if hardContainerFsDisk != -1 {
thresholds[hardContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, Operator: hardNodeFsDisk.Operator, Value: hardNodeFsDisk.Value, MinReclaim: hardNodeFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: hardNodeFsDisk.Operator,
Value: hardNodeFsDisk.Value,
MinReclaim: hardNodeFsDisk.MinReclaim,
})
}
if softContainerFsDisk != -1 {
thresholds[softContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, GracePeriod: softNodeFsDisk.GracePeriod, Operator: softNodeFsDisk.Operator, Value: softNodeFsDisk.Value, MinReclaim: softNodeFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: softNodeFsDisk.Operator,
Value: softNodeFsDisk.Value,
MinReclaim: softNodeFsDisk.MinReclaim,
GracePeriod: softNodeFsDisk.GracePeriod,
})
}
if hardContainerFsINodes != -1 {
thresholds[hardContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, Operator: hardNodeINodeDisk.Operator, Value: hardNodeINodeDisk.Value, MinReclaim: hardNodeINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: hardNodeINodeDisk.Operator,
Value: hardNodeINodeDisk.Value,
MinReclaim: hardNodeINodeDisk.MinReclaim,
})
}
if softContainerFsINodes != -1 {
thresholds[softContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, GracePeriod: softNodeINodeDisk.GracePeriod, Operator: softNodeINodeDisk.Operator, Value: softNodeINodeDisk.Value, MinReclaim: softNodeINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: softNodeINodeDisk.Operator,
Value: softNodeINodeDisk.Value,
MinReclaim: softNodeINodeDisk.MinReclaim,
GracePeriod: softNodeINodeDisk.GracePeriod,
})
}
}
// Separate image filesystem case
if imageFs && !separateContainerImageFs {
if hardContainerFsDisk != -1 {
thresholds[hardContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, Operator: hardImageFsDisk.Operator, Value: hardImageFsDisk.Value, MinReclaim: hardImageFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: hardImageFsDisk.Operator,
Value: hardImageFsDisk.Value,
MinReclaim: hardImageFsDisk.MinReclaim,
})
}
if softContainerFsDisk != -1 {
thresholds[softContainerFsDisk] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable, GracePeriod: softImageFsDisk.GracePeriod, Operator: softImageFsDisk.Operator, Value: softImageFsDisk.Value, MinReclaim: softImageFsDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsAvailable,
Operator: softImageFsDisk.Operator,
Value: softImageFsDisk.Value,
MinReclaim: softImageFsDisk.MinReclaim,
GracePeriod: softImageFsDisk.GracePeriod,
})
}
if hardContainerFsINodes != -1 {
thresholds[hardContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, GracePeriod: hardImageINodeDisk.GracePeriod, Operator: hardImageINodeDisk.Operator, Value: hardImageINodeDisk.Value, MinReclaim: hardImageINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: hardImageINodeDisk.Operator,
Value: hardImageINodeDisk.Value,
MinReclaim: hardImageINodeDisk.MinReclaim,
})
}
if softContainerFsINodes != -1 {
thresholds[softContainerFsINodes] = evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree, GracePeriod: softImageINodeDisk.GracePeriod, Operator: softImageINodeDisk.Operator, Value: softImageINodeDisk.Value, MinReclaim: softImageINodeDisk.MinReclaim,
}
} else {
thresholds = append(thresholds, evictionapi.Threshold{
Signal: evictionapi.SignalContainerFsInodesFree,
Operator: softImageINodeDisk.Operator,
Value: softImageINodeDisk.Value,
MinReclaim: softImageINodeDisk.MinReclaim,
GracePeriod: softImageINodeDisk.GracePeriod,
})
}
}
return thresholds, err
}
// parseThresholdStatements parses the input statements into a list of Threshold objects.
func parseThresholdStatements(statements map[string]string) ([]evictionapi.Threshold, error) {
if len(statements) == 0 {
@ -708,12 +888,28 @@ func makeSignalObservations(summary *statsapi.Summary) (signalObservations, stat
capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI),
time: imageFs.Time,
}
if imageFs.InodesFree != nil && imageFs.Inodes != nil {
result[evictionapi.SignalImageFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.DecimalSI),
capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.DecimalSI),
time: imageFs.Time,
}
}
if imageFs.InodesFree != nil && imageFs.Inodes != nil {
result[evictionapi.SignalImageFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.DecimalSI),
capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.DecimalSI),
time: imageFs.Time,
}
}
}
if containerFs := summary.Node.Runtime.ContainerFs; containerFs != nil {
if containerFs.AvailableBytes != nil && containerFs.CapacityBytes != nil {
result[evictionapi.SignalContainerFsAvailable] = signalObservation{
available: resource.NewQuantity(int64(*containerFs.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*containerFs.CapacityBytes), resource.BinarySI),
time: containerFs.Time,
}
}
if containerFs.InodesFree != nil && containerFs.Inodes != nil {
result[evictionapi.SignalContainerFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*containerFs.InodesFree), resource.DecimalSI),
capacity: resource.NewQuantity(int64(*containerFs.Inodes), resource.DecimalSI),
time: containerFs.Time,
}
}
}
@ -951,27 +1147,47 @@ func isAllocatableEvictionThreshold(threshold evictionapi.Threshold) bool {
}
// buildSignalToRankFunc returns ranking functions associated with resources
func buildSignalToRankFunc(withImageFs bool) map[evictionapi.Signal]rankFunc {
func buildSignalToRankFunc(withImageFs bool, imageContainerSplitFs bool) map[evictionapi.Signal]rankFunc {
signalToRankFunc := map[evictionapi.Signal]rankFunc{
evictionapi.SignalMemoryAvailable: rankMemoryPressure,
evictionapi.SignalAllocatableMemoryAvailable: rankMemoryPressure,
evictionapi.SignalPIDAvailable: rankPIDPressure,
}
// usage of an imagefs is optional
if withImageFs {
// We have a dedicated Image filesystem (images and containers are on same disk)
// then we assume it is just a separate imagefs
if withImageFs && !imageContainerSplitFs {
// with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
// with an imagefs, imagefs pod rank func for eviction only includes rootfs
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot}, resourceInodes)
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsImages}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsImages}, resourceInodes)
signalToRankFunc[evictionapi.SignalContainerFsAvailable] = signalToRankFunc[evictionapi.SignalImageFsAvailable]
signalToRankFunc[evictionapi.SignalContainerFsInodesFree] = signalToRankFunc[evictionapi.SignalImageFsInodesFree]
// If both imagefs and container fs are on separate disks
// we want to track the writeable layer in containerfs signals.
} else if withImageFs && imageContainerSplitFs {
// with an imagefs, nodefs pod rank func for eviction only includes logs and local volumes
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource, fsStatsRoot}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsLogs, fsStatsLocalVolumeSource, fsStatsRoot}, resourceInodes)
signalToRankFunc[evictionapi.SignalContainerFsAvailable] = signalToRankFunc[evictionapi.SignalNodeFsAvailable]
signalToRankFunc[evictionapi.SignalContainerFsInodesFree] = signalToRankFunc[evictionapi.SignalNodeFsInodesFree]
// with an imagefs, containerfs pod rank func for eviction only includes rootfs
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsImages}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsImages}, resourceInodes)
// If image fs is not on separate disk as root but container fs is
} else {
// without an imagefs, nodefs pod rank func for eviction looks at all fs stats.
// since imagefs and nodefs share a common device, they share common ranking functions.
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
signalToRankFunc[evictionapi.SignalNodeFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalNodeFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
signalToRankFunc[evictionapi.SignalImageFsAvailable] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)
signalToRankFunc[evictionapi.SignalImageFsInodesFree] = rankDiskPressureFunc([]fsStatsType{fsStatsImages, fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)
signalToRankFunc[evictionapi.SignalContainerFsAvailable] = signalToRankFunc[evictionapi.SignalNodeFsAvailable]
signalToRankFunc[evictionapi.SignalContainerFsInodesFree] = signalToRankFunc[evictionapi.SignalNodeFsInodesFree]
}
return signalToRankFunc
}
@ -982,19 +1198,29 @@ func PodIsEvicted(podStatus v1.PodStatus) bool {
}
// buildSignalToNodeReclaimFuncs returns reclaim functions associated with resources.
func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool) map[evictionapi.Signal]nodeReclaimFuncs {
func buildSignalToNodeReclaimFuncs(imageGC ImageGC, containerGC ContainerGC, withImageFs bool, splitContainerImageFs bool) map[evictionapi.Signal]nodeReclaimFuncs {
signalToReclaimFunc := map[evictionapi.Signal]nodeReclaimFuncs{}
// usage of an imagefs is optional
if withImageFs {
if withImageFs && !splitContainerImageFs {
// with an imagefs, nodefs pressure should just delete logs
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{}
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{}
// with an imagefs, imagefs pressure should delete unused images
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
// usage of imagefs and container fs on separate disks
// containers gc on containerfs pressure
// image gc on imagefs pressure
} else if withImageFs && splitContainerImageFs {
// with an imagefs, imagefs pressure should delete unused images
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalImageFsInodesFree] = nodeReclaimFuncs{imageGC.DeleteUnusedImages}
// with an split fs and imagefs, containerfs pressure should delete unused containers
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers}
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers}
} else {
// without an imagefs, nodefs pressure should delete logs, and unused images
// since imagefs and nodefs share a common device, they share common reclaim functions
// since imagefs, containerfs and nodefs share a common device, they share common reclaim functions
signalToReclaimFunc[evictionapi.SignalNodeFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalNodeFsInodesFree] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}
signalToReclaimFunc[evictionapi.SignalImageFsAvailable] = nodeReclaimFuncs{containerGC.DeleteAllUnusedContainers, imageGC.DeleteUnusedImages}

File diff suppressed because it is too large Load Diff

View File

@ -218,6 +218,20 @@ func (mr *MockContainerGCMockRecorder) DeleteAllUnusedContainers(ctx interface{}
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteAllUnusedContainers", reflect.TypeOf((*MockContainerGC)(nil).DeleteAllUnusedContainers), ctx)
}
// IsContainerFsSeparateFromImageFs mocks base method.
func (m *MockContainerGC) IsContainerFsSeparateFromImageFs(ctx context.Context) bool {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "IsContainerFsSeparateFromImageFs", ctx)
ret0, _ := ret[0].(bool)
return ret0
}
// IsContainerFsSeparateFromImageFs indicates an expected call of IsContainerFsSeparateFromImageFs.
func (mr *MockContainerGCMockRecorder) IsContainerFsSeparateFromImageFs(ctx interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsContainerFsSeparateFromImageFs", reflect.TypeOf((*MockContainerGC)(nil).IsContainerFsSeparateFromImageFs), ctx)
}
// MockCgroupNotifier is a mock of CgroupNotifier interface.
type MockCgroupNotifier struct {
ctrl *gomock.Controller

View File

@ -38,6 +38,8 @@ const (
fsStatsLogs fsStatsType = "logs"
// fsStatsRoot identifies stats for pod container writable layers.
fsStatsRoot fsStatsType = "root"
// fsStatsContainer identifies stats for pod container read-only layers
fsStatsImages fsStatsType = "images"
)
// Config holds information about how eviction is configured.
@ -85,6 +87,8 @@ type ImageGC interface {
type ContainerGC interface {
// DeleteAllUnusedContainers deletes all unused containers, even those that belong to pods that are terminated, but not deleted.
DeleteAllUnusedContainers(ctx context.Context) error
// IsContainerFsSeparateFromImageFs checks if container filesystem is split from image filesystem.
IsContainerFsSeparateFromImageFs(ctx context.Context) bool
}
// KillPodFunc kills a pod.

View File

@ -54,7 +54,7 @@ const imageIndexTupleFormat = "%s,%s"
// collection.
type StatsProvider interface {
// ImageFsStats returns the stats of the image filesystem.
ImageFsStats(ctx context.Context) (*statsapi.FsStats, error)
ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error)
}
// ImageGCManager is an interface for managing lifecycle of all images.
@ -328,7 +328,7 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
}
// Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats(ctx)
fsStats, _, err := im.statsProvider.ImageFsStats(ctx)
if err != nil {
return err
}

View File

@ -618,10 +618,11 @@ func TestGarbageCollectBelowLowThreshold(t *testing.T) {
manager, _ := newRealImageGCManager(policy, mockStatsProvider)
// Expect 40% usage.
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{
imageStats := &statsapi.FsStats{
AvailableBytes: uint64Ptr(600),
CapacityBytes: uint64Ptr(1000),
}, nil)
}
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageStats, imageStats, nil)
assert.NoError(t, manager.GarbageCollect(ctx))
}
@ -637,7 +638,7 @@ func TestGarbageCollectCadvisorFailure(t *testing.T) {
mockStatsProvider := statstest.NewMockProvider(mockCtrl)
manager, _ := newRealImageGCManager(policy, mockStatsProvider)
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, fmt.Errorf("error"))
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, &statsapi.FsStats{}, fmt.Errorf("error"))
assert.NotNil(t, manager.GarbageCollect(ctx))
}
@ -654,10 +655,11 @@ func TestGarbageCollectBelowSuccess(t *testing.T) {
manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider)
// Expect 95% usage and most of it gets freed.
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{
imageFs := &statsapi.FsStats{
AvailableBytes: uint64Ptr(50),
CapacityBytes: uint64Ptr(1000),
}, nil)
}
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageFs, imageFs, nil)
fakeRuntime.ImageList = []container.Image{
makeImage(0, 450),
}
@ -677,10 +679,11 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
manager, fakeRuntime := newRealImageGCManager(policy, mockStatsProvider)
// Expect 95% usage and little of it gets freed.
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{
imageFs := &statsapi.FsStats{
AvailableBytes: uint64Ptr(50),
CapacityBytes: uint64Ptr(1000),
}, nil)
}
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageFs, imageFs, nil)
fakeRuntime.ImageList = []container.Image{
makeImage(0, 50),
}

View File

@ -1326,7 +1326,7 @@ func (kl *Kubelet) ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([
}
// ImageFsStats is delegated to StatsProvider, which implements stats.Provider interface
func (kl *Kubelet) ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) {
func (kl *Kubelet) ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) {
return kl.StatsProvider.ImageFsStats(ctx)
}

View File

@ -158,3 +158,12 @@ func (m *kubeGenericRuntimeManager) ImageStats(ctx context.Context) (*kubecontai
}
return stats, nil
}
func (m *kubeGenericRuntimeManager) ImageFsInfo(ctx context.Context) (*runtimeapi.ImageFsInfoResponse, error) {
allImages, err := m.imageService.ImageFsInfo(ctx)
if err != nil {
klog.ErrorS(err, "Failed to get image filesystem")
return nil, err
}
return allImages, nil
}

View File

@ -289,8 +289,10 @@ func (*fakeKubelet) ListPodStatsAndUpdateCPUNanoCoreUsage(_ context.Context) ([]
func (*fakeKubelet) ListPodCPUAndMemoryStats(_ context.Context) ([]statsapi.PodStats, error) {
return nil, nil
}
func (*fakeKubelet) ImageFsStats(_ context.Context) (*statsapi.FsStats, error) { return nil, nil }
func (*fakeKubelet) RlimitStats() (*statsapi.RlimitStats, error) { return nil, nil }
func (*fakeKubelet) ImageFsStats(_ context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) {
return nil, nil, nil
}
func (*fakeKubelet) RlimitStats() (*statsapi.RlimitStats, error) { return nil, nil }
func (*fakeKubelet) GetCgroupStats(cgroupName string, updateStats bool) (*statsapi.ContainerStats, *statsapi.NetworkStats, error) {
return nil, nil, nil
}

View File

@ -51,8 +51,13 @@ type Provider interface {
// for more details.
ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([]statsapi.PodStats, error)
// ImageFsStats returns the stats of the image filesystem.
ImageFsStats(ctx context.Context) (*statsapi.FsStats, error)
// Kubelet allows three options for container filesystems
// Everything is on node fs (so no image filesystem)
// Container storage is on a dedicated disk (imageFs is separate from root)
// Container Filesystem is on root and Images are stored on ImageFs
// First return parameter is the image filesystem and
// second parameter is the container filesystem
ImageFsStats(ctx context.Context) (imageFs *statsapi.FsStats, containerFs *statsapi.FsStats, callErr error)
// The following stats are provided by cAdvisor.
//
// GetCgroupStats returns the stats and the networking usage of the cgroup

View File

@ -82,7 +82,7 @@ func (sp *summaryProviderImpl) Get(ctx context.Context, updateStats bool) (*stat
if err != nil {
return nil, fmt.Errorf("failed to get rootFs stats: %v", err)
}
imageFsStats, err := sp.provider.ImageFsStats(ctx)
imageFsStats, containerFsStats, err := sp.provider.ImageFsStats(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get imageFs stats: %v", err)
}
@ -109,7 +109,7 @@ func (sp *summaryProviderImpl) Get(ctx context.Context, updateStats bool) (*stat
Network: networkStats,
StartTime: sp.systemBootTime,
Fs: rootFsStats,
Runtime: &statsapi.RuntimeStats{ImageFs: imageFsStats},
Runtime: &statsapi.RuntimeStats{ContainerFs: containerFsStats, ImageFs: imageFsStats},
Rlimit: rlimit,
SystemContainers: sp.GetSystemContainersStats(nodeConfig, podStats, updateStats),
}

View File

@ -48,7 +48,7 @@ var (
rlimitStats = getRlimitStats()
)
func TestSummaryProviderGetStats(t *testing.T) {
func TestSummaryProviderGetStatsNoSplitFileSystem(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
@ -81,7 +81,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
mockStatsProvider.EXPECT().GetPodCgroupRoot().Return(cgroupRoot)
mockStatsProvider.EXPECT().ListPodStats(ctx).Return(podStats, nil).AnyTimes()
mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage(ctx).Return(podStats, nil)
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(imageFsStats, nil)
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(imageFsStats, imageFsStats, nil)
mockStatsProvider.EXPECT().RootFsStats().Return(rootFsStats, nil)
mockStatsProvider.EXPECT().RlimitStats().Return(rlimitStats, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/", true).Return(cgroupStatsMap["/"].cs, cgroupStatsMap["/"].ns, nil)
@ -103,7 +103,106 @@ func TestSummaryProviderGetStats(t *testing.T) {
assert.Equal(summary.Node.Swap, cgroupStatsMap["/"].cs.Swap)
assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns)
assert.Equal(summary.Node.Fs, rootFsStats)
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ImageFs: imageFsStats})
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ContainerFs: imageFsStats, ImageFs: imageFsStats})
assert.Equal(len(summary.Node.SystemContainers), 4)
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "kubelet",
StartTime: kubeletCreationTime,
CPU: cgroupStatsMap["/kubelet"].cs.CPU,
Memory: cgroupStatsMap["/kubelet"].cs.Memory,
Accelerators: cgroupStatsMap["/kubelet"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/kubelet"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/kubelet"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "misc",
StartTime: cgroupStatsMap["/misc"].cs.StartTime,
CPU: cgroupStatsMap["/misc"].cs.CPU,
Memory: cgroupStatsMap["/misc"].cs.Memory,
Accelerators: cgroupStatsMap["/misc"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/misc"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/misc"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "runtime",
StartTime: cgroupStatsMap["/runtime"].cs.StartTime,
CPU: cgroupStatsMap["/runtime"].cs.CPU,
Memory: cgroupStatsMap["/runtime"].cs.Memory,
Accelerators: cgroupStatsMap["/runtime"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/runtime"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/runtime"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "pods",
StartTime: cgroupStatsMap["/pods"].cs.StartTime,
CPU: cgroupStatsMap["/pods"].cs.CPU,
Memory: cgroupStatsMap["/pods"].cs.Memory,
Accelerators: cgroupStatsMap["/pods"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/pods"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/pods"].cs.Swap,
})
assert.Equal(summary.Pods, podStats)
}
func TestSummaryProviderGetStatsSplitImageFs(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
podStats := []statsapi.PodStats{
{
PodRef: statsapi.PodReference{Name: "test-pod", Namespace: "test-namespace", UID: "UID_test-pod"},
StartTime: metav1.NewTime(time.Now()),
Containers: []statsapi.ContainerStats{*getContainerStats()},
Network: getNetworkStats(),
VolumeStats: []statsapi.VolumeStats{*getVolumeStats()},
},
}
cgroupStatsMap := map[string]struct {
cs *statsapi.ContainerStats
ns *statsapi.NetworkStats
}{
"/": {cs: getContainerStats(), ns: getNetworkStats()},
"/runtime": {cs: getContainerStats(), ns: getNetworkStats()},
"/misc": {cs: getContainerStats(), ns: getNetworkStats()},
"/kubelet": {cs: getContainerStats(), ns: getNetworkStats()},
"/pods": {cs: getContainerStats(), ns: getNetworkStats()},
}
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mockStatsProvider := statstest.NewMockProvider(mockCtrl)
mockStatsProvider.EXPECT().GetNode().Return(node, nil)
mockStatsProvider.EXPECT().GetNodeConfig().Return(nodeConfig)
mockStatsProvider.EXPECT().GetPodCgroupRoot().Return(cgroupRoot)
mockStatsProvider.EXPECT().ListPodStats(ctx).Return(podStats, nil).AnyTimes()
mockStatsProvider.EXPECT().ListPodStatsAndUpdateCPUNanoCoreUsage(ctx).Return(podStats, nil)
mockStatsProvider.EXPECT().RootFsStats().Return(rootFsStats, nil)
mockStatsProvider.EXPECT().RlimitStats().Return(rlimitStats, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/", true).Return(cgroupStatsMap["/"].cs, cgroupStatsMap["/"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/runtime", false).Return(cgroupStatsMap["/runtime"].cs, cgroupStatsMap["/runtime"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/misc", false).Return(cgroupStatsMap["/misc"].cs, cgroupStatsMap["/misc"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/kubelet", false).Return(cgroupStatsMap["/kubelet"].cs, cgroupStatsMap["/kubelet"].ns, nil)
mockStatsProvider.EXPECT().GetCgroupStats("/kubepods", true).Return(cgroupStatsMap["/pods"].cs, cgroupStatsMap["/pods"].ns, nil)
mockStatsProvider.EXPECT().ImageFsStats(ctx).Return(imageFsStats, rootFsStats, nil)
kubeletCreationTime := metav1.Now()
systemBootTime := metav1.Now()
provider := summaryProviderImpl{kubeletCreationTime: kubeletCreationTime, systemBootTime: systemBootTime, provider: mockStatsProvider}
summary, err := provider.Get(ctx, true)
assert.NoError(err)
assert.Equal(summary.Node.NodeName, "test-node")
assert.Equal(summary.Node.StartTime, systemBootTime)
assert.Equal(summary.Node.CPU, cgroupStatsMap["/"].cs.CPU)
assert.Equal(summary.Node.Memory, cgroupStatsMap["/"].cs.Memory)
assert.Equal(summary.Node.Swap, cgroupStatsMap["/"].cs.Swap)
assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns)
assert.Equal(summary.Node.Fs, rootFsStats)
// Since we are a split filesystem we want root filesystem to be container fs and image to be image filesystem
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ContainerFs: rootFsStats, ImageFs: imageFsStats})
assert.Equal(len(summary.Node.SystemContainers), 4)
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{

View File

@ -221,12 +221,13 @@ func (mr *MockProviderMockRecorder) GetRequestedContainersInfo(containerName, op
}
// ImageFsStats mocks base method.
func (m *MockProvider) ImageFsStats(ctx context.Context) (*v1alpha1.FsStats, error) {
func (m *MockProvider) ImageFsStats(ctx context.Context) (*v1alpha1.FsStats, *v1alpha1.FsStats, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ImageFsStats", ctx)
ret0, _ := ret[0].(*v1alpha1.FsStats)
ret1, _ := ret[1].(error)
return ret0, ret1
ret1, _ := ret[1].(*v1alpha1.FsStats)
ret2, _ := ret[2].(error)
return ret0, ret1, ret2
}
// ImageFsStats indicates an expected call of ImageFsStats.

View File

@ -28,8 +28,10 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
kubetypes "k8s.io/kubelet/pkg/types"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@ -237,31 +239,86 @@ func (p *cadvisorStatsProvider) ListPodCPUAndMemoryStats(_ context.Context) ([]s
}
// ImageFsStats returns the stats of the filesystem for storing images.
func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) {
func (p *cadvisorStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *statsapi.FsStats, containerFsRet *statsapi.FsStats, errCall error) {
imageFsInfo, err := p.cadvisor.ImagesFsInfo()
if err != nil {
return nil, fmt.Errorf("failed to get imageFs info: %v", err)
}
imageStats, err := p.imageService.ImageStats(ctx)
if err != nil || imageStats == nil {
return nil, fmt.Errorf("failed to get image stats: %v", err)
return nil, nil, fmt.Errorf("failed to get imageFs info: %v", err)
}
if !utilfeature.DefaultFeatureGate.Enabled(features.KubeletSeparateDiskGC) {
imageStats, err := p.imageService.ImageStats(ctx)
if err != nil || imageStats == nil {
return nil, nil, fmt.Errorf("failed to get image stats: %v", err)
}
var imageFsInodesUsed *uint64
if imageFsInfo.Inodes != nil && imageFsInfo.InodesFree != nil {
imageFsIU := *imageFsInfo.Inodes - *imageFsInfo.InodesFree
imageFsInodesUsed = &imageFsIU
}
imageFs := &statsapi.FsStats{
Time: metav1.NewTime(imageFsInfo.Timestamp),
AvailableBytes: &imageFsInfo.Available,
CapacityBytes: &imageFsInfo.Capacity,
UsedBytes: &imageStats.TotalStorageBytes,
InodesFree: imageFsInfo.InodesFree,
Inodes: imageFsInfo.Inodes,
InodesUsed: imageFsInodesUsed,
}
return imageFs, imageFs, nil
}
containerFsInfo, err := p.cadvisor.ContainerFsInfo()
if err != nil {
return nil, nil, fmt.Errorf("failed to get container fs info: %v", err)
}
imageStats, err := p.imageService.ImageFsInfo(ctx)
if err != nil || imageStats == nil {
return nil, nil, fmt.Errorf("failed to get image stats: %v", err)
}
splitFileSystem := false
if imageStats.ImageFilesystems[0].FsId.Mountpoint != imageStats.ContainerFilesystems[0].FsId.Mountpoint {
klog.InfoS("Detect Split Filesystem", "ImageFilesystems", imageStats.ImageFilesystems[0], "ContainerFilesystems", imageStats.ContainerFilesystems[0])
splitFileSystem = true
}
imageFs := imageStats.ImageFilesystems[0]
var imageFsInodesUsed *uint64
if imageFsInfo.Inodes != nil && imageFsInfo.InodesFree != nil {
imageFsIU := *imageFsInfo.Inodes - *imageFsInfo.InodesFree
imageFsInodesUsed = &imageFsIU
}
return &statsapi.FsStats{
fsStats := &statsapi.FsStats{
Time: metav1.NewTime(imageFsInfo.Timestamp),
AvailableBytes: &imageFsInfo.Available,
CapacityBytes: &imageFsInfo.Capacity,
UsedBytes: &imageStats.TotalStorageBytes,
UsedBytes: &imageFs.UsedBytes.Value,
InodesFree: imageFsInfo.InodesFree,
Inodes: imageFsInfo.Inodes,
InodesUsed: imageFsInodesUsed,
}, nil
}
if !splitFileSystem {
return fsStats, fsStats, nil
}
containerFs := imageStats.ContainerFilesystems[0]
var containerFsInodesUsed *uint64
if containerFsInfo.Inodes != nil && containerFsInfo.InodesFree != nil {
containerFsIU := *containerFsInfo.Inodes - *containerFsInfo.InodesFree
containerFsInodesUsed = &containerFsIU
}
fsContainerStats := &statsapi.FsStats{
Time: metav1.NewTime(containerFsInfo.Timestamp),
AvailableBytes: &containerFsInfo.Available,
CapacityBytes: &containerFsInfo.Capacity,
UsedBytes: &containerFs.UsedBytes.Value,
InodesFree: containerFsInfo.InodesFree,
Inodes: containerFsInfo.Inodes,
InodesUsed: containerFsInodesUsed,
}
return fsStats, fsContainerStats, nil
}
// ImageFsDevice returns name of the device where the image filesystem locates,

View File

@ -28,7 +28,11 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/features"
cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
@ -250,7 +254,6 @@ func TestCadvisorListPodStats(t *testing.T) {
mockCadvisor.EXPECT().ImagesFsInfo().Return(imagefs, nil)
mockRuntime := containertest.NewMockRuntime(mockCtrl)
mockRuntime.EXPECT().ImageStats(ctx).Return(&kubecontainer.ImageStats{TotalStorageBytes: 123}, nil).AnyTimes()
ephemeralVolumes := []statsapi.VolumeStats{getPodVolumeStats(seedEphemeralVolume1, "ephemeralVolume1"),
getPodVolumeStats(seedEphemeralVolume2, "ephemeralVolume2")}
@ -520,7 +523,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
assert.Nil(t, ps.Network)
}
func TestCadvisorImagesFsStats(t *testing.T) {
func TestCadvisorImagesFsStatsKubeletSeparateDiskOff(t *testing.T) {
ctx := context.Background()
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
@ -534,11 +537,13 @@ func TestCadvisorImagesFsStats(t *testing.T) {
imageStats = &kubecontainer.ImageStats{TotalStorageBytes: 100}
)
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, false)()
mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil)
mockRuntime.EXPECT().ImageStats(ctx).Return(imageStats, nil)
provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider())
stats, err := provider.ImageFsStats(ctx)
stats, _, err := provider.ImageFsStats(ctx)
assert.NoError(err)
assert.Equal(imageFsInfo.Timestamp, stats.Time.Time)
@ -550,6 +555,110 @@ func TestCadvisorImagesFsStats(t *testing.T) {
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed)
}
func TestCadvisorImagesFsStats(t *testing.T) {
ctx := context.Background()
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
var (
assert = assert.New(t)
mockCadvisor = cadvisortest.NewMockInterface(mockCtrl)
mockRuntime = containertest.NewMockRuntime(mockCtrl)
seed = 1000
imageFsInfo = getTestFsInfo(seed)
)
imageFsInfoCRI := &runtimeapi.FilesystemUsage{
Timestamp: imageFsInfo.Timestamp.Unix(),
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "images"},
UsedBytes: &runtimeapi.UInt64Value{Value: imageFsInfo.Usage},
InodesUsed: &runtimeapi.UInt64Value{Value: *imageFsInfo.Inodes},
}
imageFsInfoResponse := &runtimeapi.ImageFsInfoResponse{
ImageFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI},
ContainerFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI},
}
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, true)()
mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil)
mockCadvisor.EXPECT().ContainerFsInfo().Return(imageFsInfo, nil)
mockRuntime.EXPECT().ImageFsInfo(ctx).Return(imageFsInfoResponse, nil)
provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider())
stats, containerfs, err := provider.ImageFsStats(ctx)
assert.NoError(err)
assert.Equal(imageFsInfo.Timestamp, stats.Time.Time)
assert.Equal(imageFsInfo.Available, *stats.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *stats.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, stats.InodesFree)
assert.Equal(imageFsInfo.Inodes, stats.Inodes)
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed)
assert.Equal(imageFsInfo.Timestamp, containerfs.Time.Time)
assert.Equal(imageFsInfo.Available, *containerfs.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *containerfs.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, containerfs.InodesFree)
assert.Equal(imageFsInfo.Inodes, containerfs.Inodes)
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *containerfs.InodesUsed)
}
func TestCadvisorSplitImagesFsStats(t *testing.T) {
ctx := context.Background()
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
var (
assert = assert.New(t)
mockCadvisor = cadvisortest.NewMockInterface(mockCtrl)
mockRuntime = containertest.NewMockRuntime(mockCtrl)
seed = 1000
imageFsInfo = getTestFsInfo(seed)
containerSeed = 1001
containerFsInfo = getTestFsInfo(containerSeed)
)
imageFsInfoCRI := &runtimeapi.FilesystemUsage{
Timestamp: imageFsInfo.Timestamp.Unix(),
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "images"},
UsedBytes: &runtimeapi.UInt64Value{Value: imageFsInfo.Usage},
InodesUsed: &runtimeapi.UInt64Value{Value: *imageFsInfo.Inodes},
}
containerFsInfoCRI := &runtimeapi.FilesystemUsage{
Timestamp: containerFsInfo.Timestamp.Unix(),
FsId: &runtimeapi.FilesystemIdentifier{Mountpoint: "containers"},
UsedBytes: &runtimeapi.UInt64Value{Value: containerFsInfo.Usage},
InodesUsed: &runtimeapi.UInt64Value{Value: *containerFsInfo.Inodes},
}
imageFsInfoResponse := &runtimeapi.ImageFsInfoResponse{
ImageFilesystems: []*runtimeapi.FilesystemUsage{imageFsInfoCRI},
ContainerFilesystems: []*runtimeapi.FilesystemUsage{containerFsInfoCRI},
}
mockCadvisor.EXPECT().ImagesFsInfo().Return(imageFsInfo, nil)
mockCadvisor.EXPECT().ContainerFsInfo().Return(containerFsInfo, nil)
mockRuntime.EXPECT().ImageFsInfo(ctx).Return(imageFsInfoResponse, nil)
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, true)()
provider := newCadvisorStatsProvider(mockCadvisor, &fakeResourceAnalyzer{}, mockRuntime, nil, NewFakeHostStatsProvider())
stats, containerfs, err := provider.ImageFsStats(ctx)
assert.NoError(err)
assert.Equal(imageFsInfo.Timestamp, stats.Time.Time)
assert.Equal(imageFsInfo.Available, *stats.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *stats.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, stats.InodesFree)
assert.Equal(imageFsInfo.Inodes, stats.Inodes)
assert.Equal(*imageFsInfo.Inodes-*imageFsInfo.InodesFree, *stats.InodesUsed)
assert.Equal(containerFsInfo.Timestamp, containerfs.Time.Time)
assert.Equal(containerFsInfo.Available, *containerfs.AvailableBytes)
assert.Equal(containerFsInfo.Capacity, *containerfs.CapacityBytes)
assert.Equal(containerFsInfo.InodesFree, containerfs.InodesFree)
assert.Equal(containerFsInfo.Inodes, containerfs.Inodes)
assert.Equal(*containerFsInfo.Inodes-*containerFsInfo.InodesFree, *containerfs.InodesUsed)
}
func TestCadvisorListPodStatsWhenContainerLogFound(t *testing.T) {
ctx := context.Background()
const (

View File

@ -385,10 +385,10 @@ func (p *criStatsProvider) getPodAndContainerMaps(ctx context.Context) (map[stri
}
// ImageFsStats returns the stats of the image filesystem.
func (p *criStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats, error) {
func (p *criStatsProvider) ImageFsStats(ctx context.Context) (imageFsRet *statsapi.FsStats, containerFsRet *statsapi.FsStats, errRet error) {
resp, err := p.imageService.ImageFsInfo(ctx)
if err != nil {
return nil, err
return nil, nil, err
}
// CRI may return the stats of multiple image filesystems but we only
@ -396,31 +396,32 @@ func (p *criStatsProvider) ImageFsStats(ctx context.Context) (*statsapi.FsStats,
//
// TODO(yguo0905): Support returning stats of multiple image filesystems.
if len(resp.GetImageFilesystems()) == 0 {
return nil, fmt.Errorf("imageFs information is unavailable")
return nil, nil, fmt.Errorf("imageFs information is unavailable")
}
fs := resp.GetImageFilesystems()[0]
s := &statsapi.FsStats{
imageFsRet = &statsapi.FsStats{
Time: metav1.NewTime(time.Unix(0, fs.Timestamp)),
UsedBytes: &fs.UsedBytes.Value,
}
if fs.InodesUsed != nil {
s.InodesUsed = &fs.InodesUsed.Value
imageFsRet.InodesUsed = &fs.InodesUsed.Value
}
imageFsInfo, err := p.getFsInfo(fs.GetFsId())
if err != nil {
return nil, fmt.Errorf("get filesystem info: %w", err)
return nil, nil, fmt.Errorf("get filesystem info: %w", err)
}
if imageFsInfo != nil {
// The image filesystem id is unknown to the local node or there's
// an error on retrieving the stats. In these cases, we omit those
// stats and return the best-effort partial result. See
// https://github.com/kubernetes/heapster/issues/1793.
s.AvailableBytes = &imageFsInfo.Available
s.CapacityBytes = &imageFsInfo.Capacity
s.InodesFree = imageFsInfo.InodesFree
s.Inodes = imageFsInfo.Inodes
imageFsRet.AvailableBytes = &imageFsInfo.Available
imageFsRet.CapacityBytes = &imageFsInfo.Capacity
imageFsRet.InodesFree = imageFsInfo.InodesFree
imageFsRet.Inodes = imageFsInfo.Inodes
}
return s, nil
// TODO: For CRI Stats Provider we don't support separate disks yet.
return imageFsRet, imageFsRet, nil
}
// ImageFsDevice returns name of the device where the image filesystem locates,

View File

@ -773,7 +773,7 @@ func TestCRIImagesFsStats(t *testing.T) {
false,
)
stats, err := provider.ImageFsStats(ctx)
stats, containerStats, err := provider.ImageFsStats(ctx)
assert := assert.New(t)
assert.NoError(err)
@ -784,6 +784,15 @@ func TestCRIImagesFsStats(t *testing.T) {
assert.Equal(imageFsInfo.Inodes, stats.Inodes)
assert.Equal(imageFsUsage.UsedBytes.Value, *stats.UsedBytes)
assert.Equal(imageFsUsage.InodesUsed.Value, *stats.InodesUsed)
assert.Equal(imageFsUsage.Timestamp, containerStats.Time.UnixNano())
assert.Equal(imageFsInfo.Available, *containerStats.AvailableBytes)
assert.Equal(imageFsInfo.Capacity, *containerStats.CapacityBytes)
assert.Equal(imageFsInfo.InodesFree, containerStats.InodesFree)
assert.Equal(imageFsInfo.Inodes, containerStats.Inodes)
assert.Equal(imageFsUsage.UsedBytes.Value, *containerStats.UsedBytes)
assert.Equal(imageFsUsage.InodesUsed.Value, *containerStats.InodesUsed)
}
func makeFakePodSandbox(name, uid, namespace string, terminated bool) *critest.FakePodSandbox {

View File

@ -31,6 +31,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
"k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/utils/ptr"
)
// PodManager is the subset of methods the manager needs to observe the actual state of the kubelet.
@ -99,7 +100,7 @@ type containerStatsProvider interface {
ListPodStats(ctx context.Context) ([]statsapi.PodStats, error)
ListPodStatsAndUpdateCPUNanoCoreUsage(ctx context.Context) ([]statsapi.PodStats, error)
ListPodCPUAndMemoryStats(ctx context.Context) ([]statsapi.PodStats, error)
ImageFsStats(ctx context.Context) (*statsapi.FsStats, error)
ImageFsStats(ctx context.Context) (*statsapi.FsStats, *statsapi.FsStats, error)
ImageFsDevice(ctx context.Context) (string, error)
}
@ -203,6 +204,7 @@ func (p *Provider) GetRawContainerInfo(containerName string, req *cadvisorapiv1.
}
// HasDedicatedImageFs returns true if a dedicated image filesystem exists for storing images.
// KEP Issue Number 4191: Enhanced this to allow for the containers to be separate from images.
func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) {
device, err := p.containerStatsProvider.ImageFsDevice(ctx)
if err != nil {
@ -212,5 +214,38 @@ func (p *Provider) HasDedicatedImageFs(ctx context.Context) (bool, error) {
if err != nil {
return false, err
}
// KEP Enhancement: DedicatedImageFs can mean either container or image fs are separate from root
// CAdvisor reports this a bit differently than Container runtimes
if device == rootFsInfo.Device {
imageFs, containerFs, err := p.ImageFsStats(ctx)
if err != nil {
return false, err
}
if !equalFileSystems(imageFs, containerFs) {
return true, nil
}
}
return device != rootFsInfo.Device, nil
}
func equalFileSystems(a, b *statsapi.FsStats) bool {
if a == nil || b == nil {
return false
}
if !ptr.Equal(a.AvailableBytes, b.AvailableBytes) {
return false
}
if !ptr.Equal(a.CapacityBytes, b.CapacityBytes) {
return false
}
if !ptr.Equal(a.InodesUsed, b.InodesUsed) {
return false
}
if !ptr.Equal(a.InodesFree, b.InodesFree) {
return false
}
if !ptr.Equal(a.Inodes, b.Inodes) {
return false
}
return true
}

View File

@ -417,21 +417,34 @@ func TestHasDedicatedImageFs(t *testing.T) {
ctx := context.Background()
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
imageStatsExpected := &statsapi.FsStats{AvailableBytes: uint64Ptr(1)}
for desc, test := range map[string]struct {
rootfsDevice string
imagefsDevice string
dedicated bool
rootfsDevice string
imagefsDevice string
dedicated bool
imageFsStats *statsapi.FsStats
containerFsStats *statsapi.FsStats
}{
"dedicated device for image filesystem": {
rootfsDevice: "root/device",
imagefsDevice: "image/device",
dedicated: true,
imageFsStats: imageStatsExpected,
},
"shared device for image filesystem": {
rootfsDevice: "share/device",
imagefsDevice: "share/device",
dedicated: false,
rootfsDevice: "share/device",
imagefsDevice: "share/device",
dedicated: false,
imageFsStats: imageStatsExpected,
containerFsStats: imageStatsExpected,
},
"split filesystem for images": {
rootfsDevice: "root/device",
imagefsDevice: "root/device",
dedicated: true,
imageFsStats: &statsapi.FsStats{AvailableBytes: uint64Ptr(1)},
containerFsStats: &statsapi.FsStats{AvailableBytes: uint64Ptr(2)},
},
} {
t.Logf("TestCase %q", desc)
@ -441,10 +454,12 @@ func TestHasDedicatedImageFs(t *testing.T) {
mockRuntimeCache = new(kubecontainertest.MockRuntimeCache)
)
mockCadvisor.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{Device: test.rootfsDevice}, nil)
provider := newStatsProvider(mockCadvisor, mockPodManager, mockRuntimeCache, fakeContainerStatsProvider{
device: test.imagefsDevice,
device: test.imagefsDevice,
imageFs: test.imageFsStats,
containerFs: test.containerFsStats,
})
dedicated, err := provider.HasDedicatedImageFs(ctx)
assert.NoError(t, err)
assert.Equal(t, test.dedicated, dedicated)
@ -760,7 +775,9 @@ func (o *fakeResourceAnalyzer) GetPodVolumeStats(uid types.UID) (serverstats.Pod
}
type fakeContainerStatsProvider struct {
device string
device string
imageFs *statsapi.FsStats
containerFs *statsapi.FsStats
}
func (p fakeContainerStatsProvider) ListPodStats(context.Context) ([]statsapi.PodStats, error) {
@ -775,8 +792,8 @@ func (p fakeContainerStatsProvider) ListPodCPUAndMemoryStats(context.Context) ([
return nil, fmt.Errorf("not implemented")
}
func (p fakeContainerStatsProvider) ImageFsStats(context.Context) (*statsapi.FsStats, error) {
return nil, fmt.Errorf("not implemented")
func (p fakeContainerStatsProvider) ImageFsStats(context.Context) (*statsapi.FsStats, *statsapi.FsStats, error) {
return p.imageFs, p.containerFs, nil
}
func (p fakeContainerStatsProvider) ImageFsDevice(context.Context) (string, error) {

View File

@ -317,7 +317,16 @@ var _ = SIGDescribe("Summary API [NodeConformance]", func() {
"Inodes": bounded(1e4, 1e8),
"InodesUsed": bounded(0, 1e8),
}),
"ContainerFs": gomega.BeNil(),
"ContainerFs": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),
"AvailableBytes": fsCapacityBounds,
"CapacityBytes": fsCapacityBounds,
// we assume we are not running tests on machines more than 10tb of disk
"UsedBytes": bounded(e2evolume.Kb, 10*e2evolume.Tb),
"InodesFree": bounded(1e4, 1e8),
"Inodes": bounded(1e4, 1e8),
"InodesUsed": bounded(0, 1e8),
}),
}),
"Rlimit": ptrMatchAllFields(gstruct.Fields{
"Time": recent(maxStatsAge),