From da988294ec5a6045dd602b024d4c5582ad0d5e1c Mon Sep 17 00:00:00 2001 From: Joel Smith Date: Mon, 2 Mar 2020 08:08:35 -0700 Subject: [PATCH] Work-around for missing metrics on CRI-O exited containers HPA needs metrics for exited init containers before it will take action. By setting memory and CPU usage to zero for any containers that cAdvisor didn't provide statistics for, we are assured that HPA will be able to correctly calculate pod resource usage. --- .../stats/cadvisor_stats_provider_test.go | 48 ++++++++++++++++++- pkg/kubelet/stats/helper.go | 13 +++-- pkg/kubelet/stats/stats_provider_test.go | 25 ++++++++++ 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/stats/cadvisor_stats_provider_test.go b/pkg/kubelet/stats/cadvisor_stats_provider_test.go index 1695c6503a5..9e1faee180e 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider_test.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider_test.go @@ -89,6 +89,9 @@ func TestCadvisorListPodStats(t *testing.T) { seedPod1Container = 4000 seedPod2Infra = 5000 seedPod2Container = 6000 + seedPod3Infra = 7000 + seedPod3Container0 = 8000 + seedPod3Container1 = 8001 seedEphemeralVolume1 = 10000 seedEphemeralVolume2 = 10001 seedPersistentVolume1 = 20000 @@ -98,12 +101,15 @@ func TestCadvisorListPodStats(t *testing.T) { pName0 = "pod0" pName1 = "pod1" pName2 = "pod0" // ensure pName2 conflicts with pName0, but is in a different namespace + pName3 = "pod3" ) const ( cName00 = "c0" cName01 = "c1" cName10 = "c0" // ensure cName10 conflicts with cName02, but is in a different pod cName20 = "c1" // ensure cName20 conflicts with cName01, but is in a different pod + namespace + cName30 = "c0-init" + cName31 = "c1" ) const ( rootfsCapacity = uint64(10000000) @@ -119,6 +125,7 @@ func TestCadvisorListPodStats(t *testing.T) { prf0 := statsapi.PodReference{Name: pName0, Namespace: namespace0, UID: "UID" + pName0} prf1 := statsapi.PodReference{Name: pName1, Namespace: namespace0, UID: "UID" + pName1} prf2 := statsapi.PodReference{Name: pName2, Namespace: namespace2, UID: "UID" + pName2} + prf3 := statsapi.PodReference{Name: pName3, Namespace: namespace0, UID: "UID" + pName3} infos := map[string]cadvisorapiv2.ContainerInfo{ "/": getTestContainerInfo(seedRoot, "", "", ""), "/docker-daemon": getTestContainerInfo(seedRuntime, "", "", ""), @@ -136,6 +143,10 @@ func TestCadvisorListPodStats(t *testing.T) { "/pod2-c0": getTestContainerInfo(seedPod2Container, pName2, namespace2, cName20), "/kubepods/burstable/podUIDpod0": getTestContainerInfo(seedPod0Infra, pName0, namespace0, leaky.PodInfraContainerName), "/kubepods/podUIDpod1": getTestContainerInfo(seedPod1Infra, pName1, namespace0, leaky.PodInfraContainerName), + // Pod3 - Namespace0 + "/pod3-i": getTestContainerInfo(seedPod3Infra, pName3, namespace0, leaky.PodInfraContainerName), + "/pod3-c0-init": getTestContainerInfo(seedPod3Container0, pName3, namespace0, cName30), + "/pod3-c1": getTestContainerInfo(seedPod3Container1, pName3, namespace0, cName31), } freeRootfsInodes := rootfsInodesFree @@ -169,6 +180,21 @@ func TestCadvisorListPodStats(t *testing.T) { info.Spec.Memory.Limit = memoryLimitOverride infos[name] = info } + // any container for which cadvisor should return no stats (as might be the case for an exited init container) + nostatsOverrides := []string{ + "/pod3-c0-init", + } + for _, name := range nostatsOverrides { + info, found := infos[name] + if !found { + t.Errorf("No container defined with name %v", name) + } + info.Spec.Memory = cadvisorapiv2.MemorySpec{} + info.Spec.Cpu = cadvisorapiv2.CpuSpec{} + info.Spec.HasMemory = false + info.Spec.HasCpu = false + infos[name] = info + } options := cadvisorapiv2.RequestOptions{ IdType: cadvisorapiv2.TypeName, @@ -197,10 +223,12 @@ func TestCadvisorListPodStats(t *testing.T) { p0Time := metav1.Now() p1Time := metav1.Now() p2Time := metav1.Now() + p3Time := metav1.Now() mockStatus := new(statustest.MockStatusProvider) mockStatus.On("GetPodStatus", types.UID("UID"+pName0)).Return(v1.PodStatus{StartTime: &p0Time}, true) mockStatus.On("GetPodStatus", types.UID("UID"+pName1)).Return(v1.PodStatus{StartTime: &p1Time}, true) mockStatus.On("GetPodStatus", types.UID("UID"+pName2)).Return(v1.PodStatus{StartTime: &p2Time}, true) + mockStatus.On("GetPodStatus", types.UID("UID"+pName3)).Return(v1.PodStatus{StartTime: &p3Time}, true) resourceAnalyzer := &fakeResourceAnalyzer{podVolumeStats: volumeStats} @@ -208,7 +236,7 @@ func TestCadvisorListPodStats(t *testing.T) { pods, err := p.ListPodStats() assert.NoError(t, err) - assert.Equal(t, 3, len(pods)) + assert.Equal(t, 4, len(pods)) indexPods := make(map[statsapi.PodReference]statsapi.PodStats, len(pods)) for _, pod := range pods { indexPods[pod.PodRef] = pod @@ -261,6 +289,24 @@ func TestCadvisorListPodStats(t *testing.T) { checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU) checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory) checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network) + + // Validate Pod3 Results + + ps, found = indexPods[prf3] + assert.True(t, found) + assert.Len(t, ps.Containers, 2) + indexCon = make(map[string]statsapi.ContainerStats, len(ps.Containers)) + for _, con := range ps.Containers { + indexCon[con.Name] = con + } + con = indexCon[cName31] + assert.Equal(t, cName31, con.Name) + checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU) + checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory) + con = indexCon[cName30] + assert.Equal(t, cName30, con.Name) + checkEmptyCPUStats(t, "Pod3Container0", seedPod3Container0, con.CPU) + checkEmptyMemoryStats(t, "Pod3Container0", seedPod3Container0, infos["/pod3-c0-init"], con.Memory) } func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) { diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index 454facc4e98..2c8a78c9362 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -40,10 +40,12 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa } var cpuStats *statsapi.CPUStats var memoryStats *statsapi.MemoryStats + cpuStats = &statsapi.CPUStats{ + Time: metav1.NewTime(cstat.Timestamp), + UsageNanoCores: uint64Ptr(0), + UsageCoreNanoSeconds: uint64Ptr(0), + } if info.Spec.HasCpu { - cpuStats = &statsapi.CPUStats{ - Time: metav1.NewTime(cstat.Timestamp), - } if cstat.CpuInst != nil { cpuStats.UsageNanoCores = &cstat.CpuInst.Usage.Total } @@ -67,6 +69,11 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet memoryStats.AvailableBytes = &availableBytes } + } else { + memoryStats = &statsapi.MemoryStats{ + Time: metav1.NewTime(cstat.Timestamp), + WorkingSetBytes: uint64Ptr(0), + } } return cpuStats, memoryStats } diff --git a/pkg/kubelet/stats/stats_provider_test.go b/pkg/kubelet/stats/stats_provider_test.go index 8e5d6c34ec7..85b287a235f 100644 --- a/pkg/kubelet/stats/stats_provider_test.go +++ b/pkg/kubelet/stats/stats_provider_test.go @@ -621,7 +621,32 @@ func checkNetworkStats(t *testing.T, label string, seed int, stats *statsapi.Net } +// container which had no stats should have zero-valued CPU usage +func checkEmptyCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) { + require.NotNil(t, stats.Time, label+".CPU.Time") + require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores") + require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds") + assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time") + assert.EqualValues(t, 0, *stats.UsageNanoCores, label+".CPU.UsageCores") + assert.EqualValues(t, 0, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds") +} + +// container which had no stats should have zero-valued Memory usage +func checkEmptyMemoryStats(t *testing.T, label string, seed int, info cadvisorapiv2.ContainerInfo, stats *statsapi.MemoryStats) { + assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".Mem.Time") + require.NotNil(t, stats.WorkingSetBytes, label+".Mem.WorkingSetBytes") + assert.EqualValues(t, 0, *stats.WorkingSetBytes, label+".Mem.WorkingSetBytes") + assert.Nil(t, stats.UsageBytes, label+".Mem.UsageBytes") + assert.Nil(t, stats.RSSBytes, label+".Mem.RSSBytes") + assert.Nil(t, stats.PageFaults, label+".Mem.PageFaults") + assert.Nil(t, stats.MajorPageFaults, label+".Mem.MajorPageFaults") + assert.Nil(t, stats.AvailableBytes, label+".Mem.AvailableBytes") +} + func checkCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) { + require.NotNil(t, stats.Time, label+".CPU.Time") + require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores") + require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds") assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time") assert.EqualValues(t, seed+offsetCPUUsageCores, *stats.UsageNanoCores, label+".CPU.UsageCores") assert.EqualValues(t, seed+offsetCPUUsageCoreSeconds, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")