diff --git a/pkg/kubelet/cadvisor/cadvisor_linux.go b/pkg/kubelet/cadvisor/cadvisor_linux.go index 991afe5fcfe..51861d2b4e3 100644 --- a/pkg/kubelet/cadvisor/cadvisor_linux.go +++ b/pkg/kubelet/cadvisor/cadvisor_linux.go @@ -83,15 +83,20 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [ sysFs := sysfs.NewRealSysFs() includedMetrics := cadvisormetrics.MetricSet{ - cadvisormetrics.CpuUsageMetrics: struct{}{}, - cadvisormetrics.MemoryUsageMetrics: struct{}{}, - cadvisormetrics.CpuLoadMetrics: struct{}{}, - cadvisormetrics.DiskIOMetrics: struct{}{}, - cadvisormetrics.NetworkUsageMetrics: struct{}{}, - cadvisormetrics.AcceleratorUsageMetrics: struct{}{}, - cadvisormetrics.AppMetrics: struct{}{}, - cadvisormetrics.ProcessMetrics: struct{}{}, + cadvisormetrics.CpuUsageMetrics: struct{}{}, + cadvisormetrics.MemoryUsageMetrics: struct{}{}, + cadvisormetrics.CpuLoadMetrics: struct{}{}, + cadvisormetrics.DiskIOMetrics: struct{}{}, + cadvisormetrics.NetworkUsageMetrics: struct{}{}, + cadvisormetrics.AppMetrics: struct{}{}, + cadvisormetrics.ProcessMetrics: struct{}{}, } + + // Only add the Accelerator metrics if the feature is inactive + if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableAcceleratorUsageMetrics) { + includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{} + } + if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{} } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 00890d662b8..25fd0ccdabc 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -679,7 +679,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, klet.runtimeCache, kubeDeps.RemoteRuntimeService, kubeDeps.RemoteImageService, - hostStatsProvider) + hostStatsProvider, + utilfeature.DefaultFeatureGate.Enabled(features.DisableAcceleratorUsageMetrics)) } klet.pleg = pleg.NewGenericPLEG(klet.containerRuntime, plegChannelCapacity, plegRelistPeriod, klet.podCache, clock.RealClock{}) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index e8fe923e41f..e2f6ed41a27 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -67,8 +67,9 @@ type criStatsProvider struct { hostStatsProvider HostStatsProvider // cpuUsageCache caches the cpu usage for containers. - cpuUsageCache map[string]*cpuUsageRecord - mutex sync.RWMutex + cpuUsageCache map[string]*cpuUsageRecord + mutex sync.RWMutex + disableAcceleratorUsageMetrics bool } // newCRIStatsProvider returns a containerStatsProvider implementation that @@ -79,14 +80,16 @@ func newCRIStatsProvider( runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, hostStatsProvider HostStatsProvider, + disableAcceleratorUsageMetrics bool, ) containerStatsProvider { return &criStatsProvider{ - cadvisor: cadvisor, - resourceAnalyzer: resourceAnalyzer, - runtimeService: runtimeService, - imageService: imageService, - hostStatsProvider: hostStatsProvider, - cpuUsageCache: make(map[string]*cpuUsageRecord), + cadvisor: cadvisor, + resourceAnalyzer: resourceAnalyzer, + runtimeService: runtimeService, + imageService: imageService, + hostStatsProvider: hostStatsProvider, + cpuUsageCache: make(map[string]*cpuUsageRecord), + disableAcceleratorUsageMetrics: disableAcceleratorUsageMetrics, } } @@ -784,8 +787,11 @@ func (p *criStatsProvider) addCadvisorContainerStats( if memory != nil { cs.Memory = memory } - accelerators := cadvisorInfoToAcceleratorStats(caPodStats) - cs.Accelerators = accelerators + + if !p.disableAcceleratorUsageMetrics { + accelerators := cadvisorInfoToAcceleratorStats(caPodStats) + cs.Accelerators = accelerators + } } func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats( diff --git a/pkg/kubelet/stats/cri_stats_provider_test.go b/pkg/kubelet/stats/cri_stats_provider_test.go index e1aaa717dbf..a5435d205e5 100644 --- a/pkg/kubelet/stats/cri_stats_provider_test.go +++ b/pkg/kubelet/stats/cri_stats_provider_test.go @@ -231,6 +231,7 @@ func TestCRIListPodStats(t *testing.T) { fakeRuntimeService, fakeImageService, NewFakeHostStatsProviderWithData(fakeStats, fakeOS), + false, ) stats, err := provider.ListPodStats() @@ -319,6 +320,113 @@ func TestCRIListPodStats(t *testing.T) { mockCadvisor.AssertExpectations(t) } +func TestAcceleratorUsageStatsCanBeDisabled(t *testing.T) { + var ( + imageFsMountpoint = "/test/mount/point" + unknownMountpoint = "/unknown/mount/point" + imageFsInfo = getTestFsInfo(2000) + rootFsInfo = getTestFsInfo(1000) + + sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns", false) + sandbox0Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox0.PodSandboxStatus.Metadata.Uid)) + container0 = makeFakeContainer(sandbox0, cName0, 0, false) + containerStats0 = makeFakeContainerStats(container0, imageFsMountpoint) + container1 = makeFakeContainer(sandbox0, cName1, 0, false) + containerStats1 = makeFakeContainerStats(container1, unknownMountpoint) + ) + + var ( + mockCadvisor = new(cadvisortest.Mock) + mockRuntimeCache = new(kubecontainertest.MockRuntimeCache) + mockPodManager = new(kubepodtest.MockManager) + resourceAnalyzer = new(fakeResourceAnalyzer) + fakeRuntimeService = critest.NewFakeRuntimeService() + fakeImageService = critest.NewFakeImageService() + ) + + infos := map[string]cadvisorapiv2.ContainerInfo{ + "/": getTestContainerInfo(seedRoot, "", "", ""), + "/kubelet": getTestContainerInfo(seedKubelet, "", "", ""), + "/system": getTestContainerInfo(seedMisc, "", "", ""), + sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), + sandbox0Cgroup: getTestContainerInfo(seedSandbox0, "", "", ""), + container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0), + container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1), + } + + options := cadvisorapiv2.RequestOptions{ + IdType: cadvisorapiv2.TypeName, + Count: 2, + Recursive: true, + } + + mockCadvisor. + On("ContainerInfoV2", "/", options).Return(infos, nil). + On("RootFsInfo").Return(rootFsInfo, nil). + On("GetDirFsInfo", imageFsMountpoint).Return(imageFsInfo, nil). + On("GetDirFsInfo", unknownMountpoint).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice) + fakeRuntimeService.SetFakeSandboxes([]*critest.FakePodSandbox{ + sandbox0, + }) + fakeRuntimeService.SetFakeContainers([]*critest.FakeContainer{ + container0, container1, + }) + fakeRuntimeService.SetFakeContainerStats([]*runtimeapi.ContainerStats{ + containerStats0, containerStats1, + }) + + ephemeralVolumes := makeFakeVolumeStats([]string{"ephVolume1, ephVolumes2"}) + persistentVolumes := makeFakeVolumeStats([]string{"persisVolume1, persisVolumes2"}) + resourceAnalyzer.podVolumeStats = serverstats.PodVolumeStats{ + EphemeralVolumes: ephemeralVolumes, + PersistentVolumes: persistentVolumes, + } + + provider := NewCRIStatsProvider( + mockCadvisor, + resourceAnalyzer, + mockPodManager, + mockRuntimeCache, + fakeRuntimeService, + fakeImageService, + NewFakeHostStatsProvider(), + true, // this is what the test is actually testing + ) + + stats, err := provider.ListPodStats() + assert := assert.New(t) + assert.NoError(err) + assert.Equal(1, len(stats)) + + podStatsMap := make(map[statsapi.PodReference]statsapi.PodStats) + for _, s := range stats { + podStatsMap[s.PodRef] = s + } + + p0 := podStatsMap[statsapi.PodReference{Name: "sandbox0-name", UID: "sandbox0-uid", Namespace: "sandbox0-ns"}] + assert.Equal(sandbox0.CreatedAt, p0.StartTime.UnixNano()) + assert.Equal(2, len(p0.Containers)) + + containerStatsMap := make(map[string]statsapi.ContainerStats) + for _, s := range p0.Containers { + containerStatsMap[s.Name] = s + } + + c0 := containerStatsMap[cName0] + assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano()) + checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0]) + assert.Nil(c0.Accelerators) + + c1 := containerStatsMap[cName1] + assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano()) + checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0]) + assert.Nil(c1.Accelerators) + + checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0]) + + mockCadvisor.AssertExpectations(t) +} + func TestCRIListPodCPUAndMemoryStats(t *testing.T) { var ( @@ -426,6 +534,7 @@ func TestCRIListPodCPUAndMemoryStats(t *testing.T) { fakeRuntimeService, nil, NewFakeHostStatsProvider(), + false, ) stats, err := provider.ListPodCPUAndMemoryStats() @@ -554,6 +663,7 @@ func TestCRIImagesFsStats(t *testing.T) { fakeRuntimeService, fakeImageService, NewFakeHostStatsProvider(), + false, ) stats, err := provider.ImageFsStats() diff --git a/pkg/kubelet/stats/provider.go b/pkg/kubelet/stats/provider.go index 360c740315d..fd3c5dd8248 100644 --- a/pkg/kubelet/stats/provider.go +++ b/pkg/kubelet/stats/provider.go @@ -42,9 +42,10 @@ func NewCRIStatsProvider( runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, hostStatsProvider HostStatsProvider, + disableAcceleratorUsageMetrics bool, ) *Provider { return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer, - runtimeService, imageService, hostStatsProvider)) + runtimeService, imageService, hostStatsProvider, disableAcceleratorUsageMetrics)) } // NewCadvisorStatsProvider returns a containerStatsProvider that provides both