disable collecting of accelerator metrics and exposing it for containerd

This commit is contained in:
Sergey Kanzhelev 2021-04-30 22:16:34 +00:00
parent 204ff6caeb
commit e8ae653c1d
5 changed files with 143 additions and 20 deletions

View File

@ -83,15 +83,20 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [
sysFs := sysfs.NewRealSysFs()
includedMetrics := cadvisormetrics.MetricSet{
cadvisormetrics.CpuUsageMetrics: struct{}{},
cadvisormetrics.MemoryUsageMetrics: struct{}{},
cadvisormetrics.CpuLoadMetrics: struct{}{},
cadvisormetrics.DiskIOMetrics: struct{}{},
cadvisormetrics.NetworkUsageMetrics: struct{}{},
cadvisormetrics.AcceleratorUsageMetrics: struct{}{},
cadvisormetrics.AppMetrics: struct{}{},
cadvisormetrics.ProcessMetrics: struct{}{},
cadvisormetrics.CpuUsageMetrics: struct{}{},
cadvisormetrics.MemoryUsageMetrics: struct{}{},
cadvisormetrics.CpuLoadMetrics: struct{}{},
cadvisormetrics.DiskIOMetrics: struct{}{},
cadvisormetrics.NetworkUsageMetrics: struct{}{},
cadvisormetrics.AppMetrics: struct{}{},
cadvisormetrics.ProcessMetrics: struct{}{},
}
// Only add the Accelerator metrics if the feature is inactive
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableAcceleratorUsageMetrics) {
includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{}
}
if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
}

View File

@ -679,7 +679,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.runtimeCache,
kubeDeps.RemoteRuntimeService,
kubeDeps.RemoteImageService,
hostStatsProvider)
hostStatsProvider,
utilfeature.DefaultFeatureGate.Enabled(features.DisableAcceleratorUsageMetrics))
}
klet.pleg = pleg.NewGenericPLEG(klet.containerRuntime, plegChannelCapacity, plegRelistPeriod, klet.podCache, clock.RealClock{})

View File

@ -67,8 +67,9 @@ type criStatsProvider struct {
hostStatsProvider HostStatsProvider
// cpuUsageCache caches the cpu usage for containers.
cpuUsageCache map[string]*cpuUsageRecord
mutex sync.RWMutex
cpuUsageCache map[string]*cpuUsageRecord
mutex sync.RWMutex
disableAcceleratorUsageMetrics bool
}
// newCRIStatsProvider returns a containerStatsProvider implementation that
@ -79,14 +80,16 @@ func newCRIStatsProvider(
runtimeService internalapi.RuntimeService,
imageService internalapi.ImageManagerService,
hostStatsProvider HostStatsProvider,
disableAcceleratorUsageMetrics bool,
) containerStatsProvider {
return &criStatsProvider{
cadvisor: cadvisor,
resourceAnalyzer: resourceAnalyzer,
runtimeService: runtimeService,
imageService: imageService,
hostStatsProvider: hostStatsProvider,
cpuUsageCache: make(map[string]*cpuUsageRecord),
cadvisor: cadvisor,
resourceAnalyzer: resourceAnalyzer,
runtimeService: runtimeService,
imageService: imageService,
hostStatsProvider: hostStatsProvider,
cpuUsageCache: make(map[string]*cpuUsageRecord),
disableAcceleratorUsageMetrics: disableAcceleratorUsageMetrics,
}
}
@ -784,8 +787,11 @@ func (p *criStatsProvider) addCadvisorContainerStats(
if memory != nil {
cs.Memory = memory
}
accelerators := cadvisorInfoToAcceleratorStats(caPodStats)
cs.Accelerators = accelerators
if !p.disableAcceleratorUsageMetrics {
accelerators := cadvisorInfoToAcceleratorStats(caPodStats)
cs.Accelerators = accelerators
}
}
func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats(

View File

@ -231,6 +231,7 @@ func TestCRIListPodStats(t *testing.T) {
fakeRuntimeService,
fakeImageService,
NewFakeHostStatsProviderWithData(fakeStats, fakeOS),
false,
)
stats, err := provider.ListPodStats()
@ -319,6 +320,113 @@ func TestCRIListPodStats(t *testing.T) {
mockCadvisor.AssertExpectations(t)
}
func TestAcceleratorUsageStatsCanBeDisabled(t *testing.T) {
var (
imageFsMountpoint = "/test/mount/point"
unknownMountpoint = "/unknown/mount/point"
imageFsInfo = getTestFsInfo(2000)
rootFsInfo = getTestFsInfo(1000)
sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns", false)
sandbox0Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox0.PodSandboxStatus.Metadata.Uid))
container0 = makeFakeContainer(sandbox0, cName0, 0, false)
containerStats0 = makeFakeContainerStats(container0, imageFsMountpoint)
container1 = makeFakeContainer(sandbox0, cName1, 0, false)
containerStats1 = makeFakeContainerStats(container1, unknownMountpoint)
)
var (
mockCadvisor = new(cadvisortest.Mock)
mockRuntimeCache = new(kubecontainertest.MockRuntimeCache)
mockPodManager = new(kubepodtest.MockManager)
resourceAnalyzer = new(fakeResourceAnalyzer)
fakeRuntimeService = critest.NewFakeRuntimeService()
fakeImageService = critest.NewFakeImageService()
)
infos := map[string]cadvisorapiv2.ContainerInfo{
"/": getTestContainerInfo(seedRoot, "", "", ""),
"/kubelet": getTestContainerInfo(seedKubelet, "", "", ""),
"/system": getTestContainerInfo(seedMisc, "", "", ""),
sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName),
sandbox0Cgroup: getTestContainerInfo(seedSandbox0, "", "", ""),
container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0),
container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1),
}
options := cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2,
Recursive: true,
}
mockCadvisor.
On("ContainerInfoV2", "/", options).Return(infos, nil).
On("RootFsInfo").Return(rootFsInfo, nil).
On("GetDirFsInfo", imageFsMountpoint).Return(imageFsInfo, nil).
On("GetDirFsInfo", unknownMountpoint).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice)
fakeRuntimeService.SetFakeSandboxes([]*critest.FakePodSandbox{
sandbox0,
})
fakeRuntimeService.SetFakeContainers([]*critest.FakeContainer{
container0, container1,
})
fakeRuntimeService.SetFakeContainerStats([]*runtimeapi.ContainerStats{
containerStats0, containerStats1,
})
ephemeralVolumes := makeFakeVolumeStats([]string{"ephVolume1, ephVolumes2"})
persistentVolumes := makeFakeVolumeStats([]string{"persisVolume1, persisVolumes2"})
resourceAnalyzer.podVolumeStats = serverstats.PodVolumeStats{
EphemeralVolumes: ephemeralVolumes,
PersistentVolumes: persistentVolumes,
}
provider := NewCRIStatsProvider(
mockCadvisor,
resourceAnalyzer,
mockPodManager,
mockRuntimeCache,
fakeRuntimeService,
fakeImageService,
NewFakeHostStatsProvider(),
true, // this is what the test is actually testing
)
stats, err := provider.ListPodStats()
assert := assert.New(t)
assert.NoError(err)
assert.Equal(1, len(stats))
podStatsMap := make(map[statsapi.PodReference]statsapi.PodStats)
for _, s := range stats {
podStatsMap[s.PodRef] = s
}
p0 := podStatsMap[statsapi.PodReference{Name: "sandbox0-name", UID: "sandbox0-uid", Namespace: "sandbox0-ns"}]
assert.Equal(sandbox0.CreatedAt, p0.StartTime.UnixNano())
assert.Equal(2, len(p0.Containers))
containerStatsMap := make(map[string]statsapi.ContainerStats)
for _, s := range p0.Containers {
containerStatsMap[s.Name] = s
}
c0 := containerStatsMap[cName0]
assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0])
assert.Nil(c0.Accelerators)
c1 := containerStatsMap[cName1]
assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0])
assert.Nil(c1.Accelerators)
checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0])
mockCadvisor.AssertExpectations(t)
}
func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
var (
@ -426,6 +534,7 @@ func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
fakeRuntimeService,
nil,
NewFakeHostStatsProvider(),
false,
)
stats, err := provider.ListPodCPUAndMemoryStats()
@ -554,6 +663,7 @@ func TestCRIImagesFsStats(t *testing.T) {
fakeRuntimeService,
fakeImageService,
NewFakeHostStatsProvider(),
false,
)
stats, err := provider.ImageFsStats()

View File

@ -42,9 +42,10 @@ func NewCRIStatsProvider(
runtimeService internalapi.RuntimeService,
imageService internalapi.ImageManagerService,
hostStatsProvider HostStatsProvider,
disableAcceleratorUsageMetrics bool,
) *Provider {
return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer,
runtimeService, imageService, hostStatsProvider))
runtimeService, imageService, hostStatsProvider, disableAcceleratorUsageMetrics))
}
// NewCadvisorStatsProvider returns a containerStatsProvider that provides both