mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
disable collecting of accelerator metrics and exposing it for containerd
This commit is contained in:
parent
204ff6caeb
commit
e8ae653c1d
@ -83,15 +83,20 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [
|
||||
sysFs := sysfs.NewRealSysFs()
|
||||
|
||||
includedMetrics := cadvisormetrics.MetricSet{
|
||||
cadvisormetrics.CpuUsageMetrics: struct{}{},
|
||||
cadvisormetrics.MemoryUsageMetrics: struct{}{},
|
||||
cadvisormetrics.CpuLoadMetrics: struct{}{},
|
||||
cadvisormetrics.DiskIOMetrics: struct{}{},
|
||||
cadvisormetrics.NetworkUsageMetrics: struct{}{},
|
||||
cadvisormetrics.AcceleratorUsageMetrics: struct{}{},
|
||||
cadvisormetrics.AppMetrics: struct{}{},
|
||||
cadvisormetrics.ProcessMetrics: struct{}{},
|
||||
cadvisormetrics.CpuUsageMetrics: struct{}{},
|
||||
cadvisormetrics.MemoryUsageMetrics: struct{}{},
|
||||
cadvisormetrics.CpuLoadMetrics: struct{}{},
|
||||
cadvisormetrics.DiskIOMetrics: struct{}{},
|
||||
cadvisormetrics.NetworkUsageMetrics: struct{}{},
|
||||
cadvisormetrics.AppMetrics: struct{}{},
|
||||
cadvisormetrics.ProcessMetrics: struct{}{},
|
||||
}
|
||||
|
||||
// Only add the Accelerator metrics if the feature is inactive
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableAcceleratorUsageMetrics) {
|
||||
includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{}
|
||||
}
|
||||
|
||||
if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
|
||||
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
|
||||
}
|
||||
|
@ -679,7 +679,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
||||
klet.runtimeCache,
|
||||
kubeDeps.RemoteRuntimeService,
|
||||
kubeDeps.RemoteImageService,
|
||||
hostStatsProvider)
|
||||
hostStatsProvider,
|
||||
utilfeature.DefaultFeatureGate.Enabled(features.DisableAcceleratorUsageMetrics))
|
||||
}
|
||||
|
||||
klet.pleg = pleg.NewGenericPLEG(klet.containerRuntime, plegChannelCapacity, plegRelistPeriod, klet.podCache, clock.RealClock{})
|
||||
|
@ -67,8 +67,9 @@ type criStatsProvider struct {
|
||||
hostStatsProvider HostStatsProvider
|
||||
|
||||
// cpuUsageCache caches the cpu usage for containers.
|
||||
cpuUsageCache map[string]*cpuUsageRecord
|
||||
mutex sync.RWMutex
|
||||
cpuUsageCache map[string]*cpuUsageRecord
|
||||
mutex sync.RWMutex
|
||||
disableAcceleratorUsageMetrics bool
|
||||
}
|
||||
|
||||
// newCRIStatsProvider returns a containerStatsProvider implementation that
|
||||
@ -79,14 +80,16 @@ func newCRIStatsProvider(
|
||||
runtimeService internalapi.RuntimeService,
|
||||
imageService internalapi.ImageManagerService,
|
||||
hostStatsProvider HostStatsProvider,
|
||||
disableAcceleratorUsageMetrics bool,
|
||||
) containerStatsProvider {
|
||||
return &criStatsProvider{
|
||||
cadvisor: cadvisor,
|
||||
resourceAnalyzer: resourceAnalyzer,
|
||||
runtimeService: runtimeService,
|
||||
imageService: imageService,
|
||||
hostStatsProvider: hostStatsProvider,
|
||||
cpuUsageCache: make(map[string]*cpuUsageRecord),
|
||||
cadvisor: cadvisor,
|
||||
resourceAnalyzer: resourceAnalyzer,
|
||||
runtimeService: runtimeService,
|
||||
imageService: imageService,
|
||||
hostStatsProvider: hostStatsProvider,
|
||||
cpuUsageCache: make(map[string]*cpuUsageRecord),
|
||||
disableAcceleratorUsageMetrics: disableAcceleratorUsageMetrics,
|
||||
}
|
||||
}
|
||||
|
||||
@ -784,8 +787,11 @@ func (p *criStatsProvider) addCadvisorContainerStats(
|
||||
if memory != nil {
|
||||
cs.Memory = memory
|
||||
}
|
||||
accelerators := cadvisorInfoToAcceleratorStats(caPodStats)
|
||||
cs.Accelerators = accelerators
|
||||
|
||||
if !p.disableAcceleratorUsageMetrics {
|
||||
accelerators := cadvisorInfoToAcceleratorStats(caPodStats)
|
||||
cs.Accelerators = accelerators
|
||||
}
|
||||
}
|
||||
|
||||
func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats(
|
||||
|
@ -231,6 +231,7 @@ func TestCRIListPodStats(t *testing.T) {
|
||||
fakeRuntimeService,
|
||||
fakeImageService,
|
||||
NewFakeHostStatsProviderWithData(fakeStats, fakeOS),
|
||||
false,
|
||||
)
|
||||
|
||||
stats, err := provider.ListPodStats()
|
||||
@ -319,6 +320,113 @@ func TestCRIListPodStats(t *testing.T) {
|
||||
mockCadvisor.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestAcceleratorUsageStatsCanBeDisabled(t *testing.T) {
|
||||
var (
|
||||
imageFsMountpoint = "/test/mount/point"
|
||||
unknownMountpoint = "/unknown/mount/point"
|
||||
imageFsInfo = getTestFsInfo(2000)
|
||||
rootFsInfo = getTestFsInfo(1000)
|
||||
|
||||
sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns", false)
|
||||
sandbox0Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox0.PodSandboxStatus.Metadata.Uid))
|
||||
container0 = makeFakeContainer(sandbox0, cName0, 0, false)
|
||||
containerStats0 = makeFakeContainerStats(container0, imageFsMountpoint)
|
||||
container1 = makeFakeContainer(sandbox0, cName1, 0, false)
|
||||
containerStats1 = makeFakeContainerStats(container1, unknownMountpoint)
|
||||
)
|
||||
|
||||
var (
|
||||
mockCadvisor = new(cadvisortest.Mock)
|
||||
mockRuntimeCache = new(kubecontainertest.MockRuntimeCache)
|
||||
mockPodManager = new(kubepodtest.MockManager)
|
||||
resourceAnalyzer = new(fakeResourceAnalyzer)
|
||||
fakeRuntimeService = critest.NewFakeRuntimeService()
|
||||
fakeImageService = critest.NewFakeImageService()
|
||||
)
|
||||
|
||||
infos := map[string]cadvisorapiv2.ContainerInfo{
|
||||
"/": getTestContainerInfo(seedRoot, "", "", ""),
|
||||
"/kubelet": getTestContainerInfo(seedKubelet, "", "", ""),
|
||||
"/system": getTestContainerInfo(seedMisc, "", "", ""),
|
||||
sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName),
|
||||
sandbox0Cgroup: getTestContainerInfo(seedSandbox0, "", "", ""),
|
||||
container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0),
|
||||
container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1),
|
||||
}
|
||||
|
||||
options := cadvisorapiv2.RequestOptions{
|
||||
IdType: cadvisorapiv2.TypeName,
|
||||
Count: 2,
|
||||
Recursive: true,
|
||||
}
|
||||
|
||||
mockCadvisor.
|
||||
On("ContainerInfoV2", "/", options).Return(infos, nil).
|
||||
On("RootFsInfo").Return(rootFsInfo, nil).
|
||||
On("GetDirFsInfo", imageFsMountpoint).Return(imageFsInfo, nil).
|
||||
On("GetDirFsInfo", unknownMountpoint).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice)
|
||||
fakeRuntimeService.SetFakeSandboxes([]*critest.FakePodSandbox{
|
||||
sandbox0,
|
||||
})
|
||||
fakeRuntimeService.SetFakeContainers([]*critest.FakeContainer{
|
||||
container0, container1,
|
||||
})
|
||||
fakeRuntimeService.SetFakeContainerStats([]*runtimeapi.ContainerStats{
|
||||
containerStats0, containerStats1,
|
||||
})
|
||||
|
||||
ephemeralVolumes := makeFakeVolumeStats([]string{"ephVolume1, ephVolumes2"})
|
||||
persistentVolumes := makeFakeVolumeStats([]string{"persisVolume1, persisVolumes2"})
|
||||
resourceAnalyzer.podVolumeStats = serverstats.PodVolumeStats{
|
||||
EphemeralVolumes: ephemeralVolumes,
|
||||
PersistentVolumes: persistentVolumes,
|
||||
}
|
||||
|
||||
provider := NewCRIStatsProvider(
|
||||
mockCadvisor,
|
||||
resourceAnalyzer,
|
||||
mockPodManager,
|
||||
mockRuntimeCache,
|
||||
fakeRuntimeService,
|
||||
fakeImageService,
|
||||
NewFakeHostStatsProvider(),
|
||||
true, // this is what the test is actually testing
|
||||
)
|
||||
|
||||
stats, err := provider.ListPodStats()
|
||||
assert := assert.New(t)
|
||||
assert.NoError(err)
|
||||
assert.Equal(1, len(stats))
|
||||
|
||||
podStatsMap := make(map[statsapi.PodReference]statsapi.PodStats)
|
||||
for _, s := range stats {
|
||||
podStatsMap[s.PodRef] = s
|
||||
}
|
||||
|
||||
p0 := podStatsMap[statsapi.PodReference{Name: "sandbox0-name", UID: "sandbox0-uid", Namespace: "sandbox0-ns"}]
|
||||
assert.Equal(sandbox0.CreatedAt, p0.StartTime.UnixNano())
|
||||
assert.Equal(2, len(p0.Containers))
|
||||
|
||||
containerStatsMap := make(map[string]statsapi.ContainerStats)
|
||||
for _, s := range p0.Containers {
|
||||
containerStatsMap[s.Name] = s
|
||||
}
|
||||
|
||||
c0 := containerStatsMap[cName0]
|
||||
assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano())
|
||||
checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0])
|
||||
assert.Nil(c0.Accelerators)
|
||||
|
||||
c1 := containerStatsMap[cName1]
|
||||
assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano())
|
||||
checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0])
|
||||
assert.Nil(c1.Accelerators)
|
||||
|
||||
checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0])
|
||||
|
||||
mockCadvisor.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
|
||||
|
||||
var (
|
||||
@ -426,6 +534,7 @@ func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
|
||||
fakeRuntimeService,
|
||||
nil,
|
||||
NewFakeHostStatsProvider(),
|
||||
false,
|
||||
)
|
||||
|
||||
stats, err := provider.ListPodCPUAndMemoryStats()
|
||||
@ -554,6 +663,7 @@ func TestCRIImagesFsStats(t *testing.T) {
|
||||
fakeRuntimeService,
|
||||
fakeImageService,
|
||||
NewFakeHostStatsProvider(),
|
||||
false,
|
||||
)
|
||||
|
||||
stats, err := provider.ImageFsStats()
|
||||
|
@ -42,9 +42,10 @@ func NewCRIStatsProvider(
|
||||
runtimeService internalapi.RuntimeService,
|
||||
imageService internalapi.ImageManagerService,
|
||||
hostStatsProvider HostStatsProvider,
|
||||
disableAcceleratorUsageMetrics bool,
|
||||
) *Provider {
|
||||
return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer,
|
||||
runtimeService, imageService, hostStatsProvider))
|
||||
runtimeService, imageService, hostStatsProvider, disableAcceleratorUsageMetrics))
|
||||
}
|
||||
|
||||
// NewCadvisorStatsProvider returns a containerStatsProvider that provides both
|
||||
|
Loading…
Reference in New Issue
Block a user