From e073b0fd652993227050174d85ddc8f69b7c5757 Mon Sep 17 00:00:00 2001 From: Paco Xu Date: Mon, 4 Jul 2022 15:45:02 +0800 Subject: [PATCH] Disable AcceleratorUsage Metrics: ga --- pkg/features/kube_features.go | 3 +- pkg/kubelet/kubelet.go | 1 - pkg/kubelet/server/server.go | 6 - pkg/kubelet/stats/cri_stats_provider.go | 30 ++--- pkg/kubelet/stats/cri_stats_provider_test.go | 132 +------------------ pkg/kubelet/stats/helper.go | 21 --- pkg/kubelet/stats/provider.go | 4 +- 7 files changed, 19 insertions(+), 178 deletions(-) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index c64c836ed07..3ca68e6c466 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -245,6 +245,7 @@ const ( // owner: @RenaudWasTaken @dashpole // alpha: v1.19 // beta: v1.20 + // ga: v1.25 // // Disables Accelerator Metrics Collected by Kubelet DisableAcceleratorUsageMetrics featuregate.Feature = "DisableAcceleratorUsageMetrics" @@ -873,7 +874,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS DevicePlugins: {Default: true, PreRelease: featuregate.Beta}, - DisableAcceleratorUsageMetrics: {Default: true, PreRelease: featuregate.Beta}, + DisableAcceleratorUsageMetrics: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, DisableCloudProviders: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index a5feec5e03a..24310cc3c3a 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -678,7 +678,6 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps.RemoteRuntimeService, kubeDeps.RemoteImageService, hostStatsProvider, - utilfeature.DefaultFeatureGate.Enabled(features.DisableAcceleratorUsageMetrics), utilfeature.DefaultFeatureGate.Enabled(features.PodAndContainerStatsFromCRI)) } diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go index 8dfb4d773a5..3f33c8885d1 100644 --- a/pkg/kubelet/server/server.go +++ b/pkg/kubelet/server/server.go @@ -372,12 +372,6 @@ func (s *Server) InstallDefaultHandlers() { cadvisormetrics.OOMMetrics: struct{}{}, } - // Only add the Accelerator metrics if the feature is inactive - // Note: Accelerator metrics will be removed in the future, hence the feature gate. - if !utilfeature.DefaultFeatureGate.Enabled(features.DisableAcceleratorUsageMetrics) { - includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{} - } - cadvisorOpts := cadvisorv2.RequestOptions{ IdType: cadvisorv2.TypeName, Count: 1, diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index 650a48a8e60..fa9c64e468f 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -74,10 +74,9 @@ type criStatsProvider struct { clock clock.Clock // cpuUsageCache caches the cpu usage for containers. - cpuUsageCache map[string]*cpuUsageRecord - mutex sync.RWMutex - disableAcceleratorUsageMetrics bool - podAndContainerStatsFromCRI bool + cpuUsageCache map[string]*cpuUsageRecord + mutex sync.RWMutex + podAndContainerStatsFromCRI bool } // newCRIStatsProvider returns a containerStatsProvider implementation that @@ -88,19 +87,17 @@ func newCRIStatsProvider( runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, hostStatsProvider HostStatsProvider, - disableAcceleratorUsageMetrics, podAndContainerStatsFromCRI bool, ) containerStatsProvider { return &criStatsProvider{ - cadvisor: cadvisor, - resourceAnalyzer: resourceAnalyzer, - runtimeService: runtimeService, - imageService: imageService, - hostStatsProvider: hostStatsProvider, - cpuUsageCache: make(map[string]*cpuUsageRecord), - disableAcceleratorUsageMetrics: disableAcceleratorUsageMetrics, - podAndContainerStatsFromCRI: podAndContainerStatsFromCRI, - clock: clock.RealClock{}, + cadvisor: cadvisor, + resourceAnalyzer: resourceAnalyzer, + runtimeService: runtimeService, + imageService: imageService, + hostStatsProvider: hostStatsProvider, + cpuUsageCache: make(map[string]*cpuUsageRecord), + podAndContainerStatsFromCRI: podAndContainerStatsFromCRI, + clock: clock.RealClock{}, } } @@ -884,11 +881,6 @@ func (p *criStatsProvider) addCadvisorContainerStats( if memory != nil { cs.Memory = memory } - - if !p.disableAcceleratorUsageMetrics { - accelerators := cadvisorInfoToAcceleratorStats(caPodStats) - cs.Accelerators = accelerators - } } func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats( diff --git a/pkg/kubelet/stats/cri_stats_provider_test.go b/pkg/kubelet/stats/cri_stats_provider_test.go index d7dc55aa4e7..3daafde7b01 100644 --- a/pkg/kubelet/stats/cri_stats_provider_test.go +++ b/pkg/kubelet/stats/cri_stats_provider_test.go @@ -236,7 +236,6 @@ func TestCRIListPodStats(t *testing.T) { fakeImageService, NewFakeHostStatsProviderWithData(fakeStats, fakeOS), false, - false, ) stats, err := provider.ListPodStats() @@ -264,14 +263,14 @@ func TestCRIListPodStats(t *testing.T) { c0 := containerStatsMap[cName0] assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano()) checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0]) - checkCRIAcceleratorStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0]) + assert.Nil(c0.Accelerators) checkCRIRootfsStats(assert, c0, containerStats0, &imageFsInfo) checkCRILogsStats(assert, c0, &rootFsInfo, containerLogStats0) c1 := containerStatsMap[cName1] assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano()) checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0]) - checkCRIAcceleratorStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0]) + assert.Nil(c0.Accelerators) checkCRIRootfsStats(assert, c1, containerStats1, nil) checkCRILogsStats(assert, c1, &rootFsInfo, containerLogStats1) checkCRINetworkStats(assert, p0.Network, infos[sandbox0.PodSandboxStatus.Id].Stats[0].Network) @@ -287,7 +286,7 @@ func TestCRIListPodStats(t *testing.T) { assert.Equal(cName2, c2.Name) assert.Equal(container2.CreatedAt, c2.StartTime.UnixNano()) checkCRICPUAndMemoryStats(assert, c2, infos[container2.ContainerStatus.Id].Stats[0]) - checkCRIAcceleratorStats(assert, c2, infos[container2.ContainerStatus.Id].Stats[0]) + assert.Nil(c0.Accelerators) checkCRIRootfsStats(assert, c2, containerStats2, &imageFsInfo) checkCRILogsStats(assert, c2, &rootFsInfo, containerLogStats2) checkCRINetworkStats(assert, p1.Network, infos[sandbox1.PodSandboxStatus.Id].Stats[0].Network) @@ -304,7 +303,7 @@ func TestCRIListPodStats(t *testing.T) { assert.Equal(cName3, c3.Name) assert.Equal(container4.CreatedAt, c3.StartTime.UnixNano()) checkCRICPUAndMemoryStats(assert, c3, infos[container4.ContainerStatus.Id].Stats[0]) - checkCRIAcceleratorStats(assert, c3, infos[container4.ContainerStatus.Id].Stats[0]) + assert.Nil(c0.Accelerators) checkCRIRootfsStats(assert, c3, containerStats4, &imageFsInfo) checkCRILogsStats(assert, c3, &rootFsInfo, containerLogStats4) @@ -323,115 +322,6 @@ func TestCRIListPodStats(t *testing.T) { checkCRIPodCPUAndMemoryStats(assert, p3, infos[sandbox3Cgroup].Stats[0]) } -func TestAcceleratorUsageStatsCanBeDisabled(t *testing.T) { - var ( - imageFsMountpoint = "/test/mount/point" - unknownMountpoint = "/unknown/mount/point" - imageFsInfo = getTestFsInfo(2000) - rootFsInfo = getTestFsInfo(1000) - - sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns", false) - sandbox0Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox0.PodSandboxStatus.Metadata.Uid)) - container0 = makeFakeContainer(sandbox0, cName0, 0, false) - containerStats0 = makeFakeContainerStats(container0, imageFsMountpoint) - container1 = makeFakeContainer(sandbox0, cName1, 0, false) - containerStats1 = makeFakeContainerStats(container1, unknownMountpoint) - ) - - mockCtrl := gomock.NewController(t) - defer mockCtrl.Finish() - - var ( - mockCadvisor = cadvisortest.NewMockInterface(mockCtrl) - mockRuntimeCache = new(kubecontainertest.MockRuntimeCache) - mockPodManager = new(kubepodtest.MockManager) - resourceAnalyzer = new(fakeResourceAnalyzer) - fakeRuntimeService = critest.NewFakeRuntimeService() - fakeImageService = critest.NewFakeImageService() - ) - - infos := map[string]cadvisorapiv2.ContainerInfo{ - "/": getTestContainerInfo(seedRoot, "", "", ""), - "/kubelet": getTestContainerInfo(seedKubelet, "", "", ""), - "/system": getTestContainerInfo(seedMisc, "", "", ""), - sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), - sandbox0Cgroup: getTestContainerInfo(seedSandbox0, "", "", ""), - container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0), - container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1), - } - - options := cadvisorapiv2.RequestOptions{ - IdType: cadvisorapiv2.TypeName, - Count: 2, - Recursive: true, - } - - mockCadvisor.EXPECT().ContainerInfoV2("/", options).Return(infos, nil) - mockCadvisor.EXPECT().RootFsInfo().Return(rootFsInfo, nil) - mockCadvisor.EXPECT().GetDirFsInfo(imageFsMountpoint).Return(imageFsInfo, nil) - mockCadvisor.EXPECT().GetDirFsInfo(unknownMountpoint).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice) - - fakeRuntimeService.SetFakeSandboxes([]*critest.FakePodSandbox{ - sandbox0, - }) - fakeRuntimeService.SetFakeContainers([]*critest.FakeContainer{ - container0, container1, - }) - fakeRuntimeService.SetFakeContainerStats([]*runtimeapi.ContainerStats{ - containerStats0, containerStats1, - }) - - ephemeralVolumes := makeFakeVolumeStats([]string{"ephVolume1, ephVolumes2"}) - persistentVolumes := makeFakeVolumeStats([]string{"persisVolume1, persisVolumes2"}) - resourceAnalyzer.podVolumeStats = serverstats.PodVolumeStats{ - EphemeralVolumes: ephemeralVolumes, - PersistentVolumes: persistentVolumes, - } - - provider := NewCRIStatsProvider( - mockCadvisor, - resourceAnalyzer, - mockPodManager, - mockRuntimeCache, - fakeRuntimeService, - fakeImageService, - NewFakeHostStatsProvider(), - true, // this is what the test is actually testing - false, - ) - - stats, err := provider.ListPodStats() - assert := assert.New(t) - assert.NoError(err) - assert.Equal(1, len(stats)) - - podStatsMap := make(map[statsapi.PodReference]statsapi.PodStats) - for _, s := range stats { - podStatsMap[s.PodRef] = s - } - - p0 := podStatsMap[statsapi.PodReference{Name: "sandbox0-name", UID: "sandbox0-uid", Namespace: "sandbox0-ns"}] - assert.Equal(sandbox0.CreatedAt, p0.StartTime.UnixNano()) - assert.Equal(2, len(p0.Containers)) - - containerStatsMap := make(map[string]statsapi.ContainerStats) - for _, s := range p0.Containers { - containerStatsMap[s.Name] = s - } - - c0 := containerStatsMap[cName0] - assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano()) - checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0]) - assert.Nil(c0.Accelerators) - - c1 := containerStatsMap[cName1] - assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano()) - checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0]) - assert.Nil(c1.Accelerators) - - checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0]) -} - func TestCRIListPodCPUAndMemoryStats(t *testing.T) { var ( @@ -543,7 +433,6 @@ func TestCRIListPodCPUAndMemoryStats(t *testing.T) { nil, NewFakeHostStatsProvider(), false, - false, ) stats, err := provider.ListPodCPUAndMemoryStats() @@ -674,7 +563,6 @@ func TestCRIImagesFsStats(t *testing.T) { fakeImageService, NewFakeHostStatsProvider(), false, - false, ) stats, err := provider.ImageFsStats() @@ -811,18 +699,6 @@ func checkCRICPUAndMemoryStats(assert *assert.Assertions, actual statsapi.Contai assert.Equal(cs.Memory.ContainerData.Pgmajfault, *actual.Memory.MajorPageFaults) } -func checkCRIAcceleratorStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *cadvisorapiv2.ContainerStats) { - assert.Equal(len(cs.Accelerators), len(actual.Accelerators)) - for i := range cs.Accelerators { - assert.Equal(cs.Accelerators[i].Make, actual.Accelerators[i].Make) - assert.Equal(cs.Accelerators[i].Model, actual.Accelerators[i].Model) - assert.Equal(cs.Accelerators[i].ID, actual.Accelerators[i].ID) - assert.Equal(cs.Accelerators[i].MemoryTotal, actual.Accelerators[i].MemoryTotal) - assert.Equal(cs.Accelerators[i].MemoryUsed, actual.Accelerators[i].MemoryUsed) - assert.Equal(cs.Accelerators[i].DutyCycle, actual.Accelerators[i].DutyCycle) - } -} - func checkCRIRootfsStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *runtimeapi.ContainerStats, imageFsInfo *cadvisorapiv2.FsInfo) { assert.Equal(cs.WritableLayer.Timestamp, actual.Rootfs.Time.UnixNano()) if imageFsInfo != nil { diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index da32fb11eed..fdab200a005 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -156,27 +156,6 @@ func cadvisorInfoToContainerCPUAndMemoryStats(name string, info *cadvisorapiv2.C return result } -// cadvisorInfoToAcceleratorStats returns the statsapi.AcceleratorStats converted from -// the container info from cadvisor. -func cadvisorInfoToAcceleratorStats(info *cadvisorapiv2.ContainerInfo) []statsapi.AcceleratorStats { - cstat, found := latestContainerStats(info) - if !found || cstat.Accelerators == nil { - return nil - } - var result []statsapi.AcceleratorStats - for _, acc := range cstat.Accelerators { - result = append(result, statsapi.AcceleratorStats{ - Make: acc.Make, - Model: acc.Model, - ID: acc.ID, - MemoryTotal: acc.MemoryTotal, - MemoryUsed: acc.MemoryUsed, - DutyCycle: acc.DutyCycle, - }) - } - return result -} - func cadvisorInfoToProcessStats(info *cadvisorapiv2.ContainerInfo) *statsapi.ProcessStats { cstat, found := latestContainerStats(info) if !found || cstat.Processes == nil { diff --git a/pkg/kubelet/stats/provider.go b/pkg/kubelet/stats/provider.go index 7c06a550949..3e8a1d38de5 100644 --- a/pkg/kubelet/stats/provider.go +++ b/pkg/kubelet/stats/provider.go @@ -42,10 +42,10 @@ func NewCRIStatsProvider( runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, hostStatsProvider HostStatsProvider, - disableAcceleratorUsageMetrics, podAndContainerStatsFromCRI bool, + podAndContainerStatsFromCRI bool, ) *Provider { return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer, - runtimeService, imageService, hostStatsProvider, disableAcceleratorUsageMetrics, podAndContainerStatsFromCRI)) + runtimeService, imageService, hostStatsProvider, podAndContainerStatsFromCRI)) } // NewCadvisorStatsProvider returns a containerStatsProvider that provides both