From da988294ec5a6045dd602b024d4c5582ad0d5e1c Mon Sep 17 00:00:00 2001
From: Joel Smith <joesmith@redhat.com>
Date: Mon, 2 Mar 2020 08:08:35 -0700
Subject: [PATCH] Work-around for missing metrics on CRI-O exited containers

HPA needs metrics for exited init containers before it will
take action. By setting memory and CPU usage to zero for any
containers that cAdvisor didn't provide statistics for, we
are assured that HPA will be able to correctly calculate
pod resource usage.
---
 .../stats/cadvisor_stats_provider_test.go     | 48 ++++++++++++++++++-
 pkg/kubelet/stats/helper.go                   | 13 +++--
 pkg/kubelet/stats/stats_provider_test.go      | 25 ++++++++++
 3 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/pkg/kubelet/stats/cadvisor_stats_provider_test.go b/pkg/kubelet/stats/cadvisor_stats_provider_test.go
index 1695c6503a5..9e1faee180e 100644
--- a/pkg/kubelet/stats/cadvisor_stats_provider_test.go
+++ b/pkg/kubelet/stats/cadvisor_stats_provider_test.go
@@ -89,6 +89,9 @@ func TestCadvisorListPodStats(t *testing.T) {
 		seedPod1Container     = 4000
 		seedPod2Infra         = 5000
 		seedPod2Container     = 6000
+		seedPod3Infra         = 7000
+		seedPod3Container0    = 8000
+		seedPod3Container1    = 8001
 		seedEphemeralVolume1  = 10000
 		seedEphemeralVolume2  = 10001
 		seedPersistentVolume1 = 20000
@@ -98,12 +101,15 @@ func TestCadvisorListPodStats(t *testing.T) {
 		pName0 = "pod0"
 		pName1 = "pod1"
 		pName2 = "pod0" // ensure pName2 conflicts with pName0, but is in a different namespace
+		pName3 = "pod3"
 	)
 	const (
 		cName00 = "c0"
 		cName01 = "c1"
 		cName10 = "c0" // ensure cName10 conflicts with cName02, but is in a different pod
 		cName20 = "c1" // ensure cName20 conflicts with cName01, but is in a different pod + namespace
+		cName30 = "c0-init"
+		cName31 = "c1"
 	)
 	const (
 		rootfsCapacity    = uint64(10000000)
@@ -119,6 +125,7 @@ func TestCadvisorListPodStats(t *testing.T) {
 	prf0 := statsapi.PodReference{Name: pName0, Namespace: namespace0, UID: "UID" + pName0}
 	prf1 := statsapi.PodReference{Name: pName1, Namespace: namespace0, UID: "UID" + pName1}
 	prf2 := statsapi.PodReference{Name: pName2, Namespace: namespace2, UID: "UID" + pName2}
+	prf3 := statsapi.PodReference{Name: pName3, Namespace: namespace0, UID: "UID" + pName3}
 	infos := map[string]cadvisorapiv2.ContainerInfo{
 		"/":              getTestContainerInfo(seedRoot, "", "", ""),
 		"/docker-daemon": getTestContainerInfo(seedRuntime, "", "", ""),
@@ -136,6 +143,10 @@ func TestCadvisorListPodStats(t *testing.T) {
 		"/pod2-c0":                       getTestContainerInfo(seedPod2Container, pName2, namespace2, cName20),
 		"/kubepods/burstable/podUIDpod0": getTestContainerInfo(seedPod0Infra, pName0, namespace0, leaky.PodInfraContainerName),
 		"/kubepods/podUIDpod1":           getTestContainerInfo(seedPod1Infra, pName1, namespace0, leaky.PodInfraContainerName),
+		// Pod3 - Namespace0
+		"/pod3-i":       getTestContainerInfo(seedPod3Infra, pName3, namespace0, leaky.PodInfraContainerName),
+		"/pod3-c0-init": getTestContainerInfo(seedPod3Container0, pName3, namespace0, cName30),
+		"/pod3-c1":      getTestContainerInfo(seedPod3Container1, pName3, namespace0, cName31),
 	}
 
 	freeRootfsInodes := rootfsInodesFree
@@ -169,6 +180,21 @@ func TestCadvisorListPodStats(t *testing.T) {
 		info.Spec.Memory.Limit = memoryLimitOverride
 		infos[name] = info
 	}
+	// any container for which cadvisor should return no stats (as might be the case for an exited init container)
+	nostatsOverrides := []string{
+		"/pod3-c0-init",
+	}
+	for _, name := range nostatsOverrides {
+		info, found := infos[name]
+		if !found {
+			t.Errorf("No container defined with name %v", name)
+		}
+		info.Spec.Memory = cadvisorapiv2.MemorySpec{}
+		info.Spec.Cpu = cadvisorapiv2.CpuSpec{}
+		info.Spec.HasMemory = false
+		info.Spec.HasCpu = false
+		infos[name] = info
+	}
 
 	options := cadvisorapiv2.RequestOptions{
 		IdType:    cadvisorapiv2.TypeName,
@@ -197,10 +223,12 @@ func TestCadvisorListPodStats(t *testing.T) {
 	p0Time := metav1.Now()
 	p1Time := metav1.Now()
 	p2Time := metav1.Now()
+	p3Time := metav1.Now()
 	mockStatus := new(statustest.MockStatusProvider)
 	mockStatus.On("GetPodStatus", types.UID("UID"+pName0)).Return(v1.PodStatus{StartTime: &p0Time}, true)
 	mockStatus.On("GetPodStatus", types.UID("UID"+pName1)).Return(v1.PodStatus{StartTime: &p1Time}, true)
 	mockStatus.On("GetPodStatus", types.UID("UID"+pName2)).Return(v1.PodStatus{StartTime: &p2Time}, true)
+	mockStatus.On("GetPodStatus", types.UID("UID"+pName3)).Return(v1.PodStatus{StartTime: &p3Time}, true)
 
 	resourceAnalyzer := &fakeResourceAnalyzer{podVolumeStats: volumeStats}
 
@@ -208,7 +236,7 @@ func TestCadvisorListPodStats(t *testing.T) {
 	pods, err := p.ListPodStats()
 	assert.NoError(t, err)
 
-	assert.Equal(t, 3, len(pods))
+	assert.Equal(t, 4, len(pods))
 	indexPods := make(map[statsapi.PodReference]statsapi.PodStats, len(pods))
 	for _, pod := range pods {
 		indexPods[pod.PodRef] = pod
@@ -261,6 +289,24 @@ func TestCadvisorListPodStats(t *testing.T) {
 	checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU)
 	checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
 	checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network)
+
+	// Validate Pod3 Results
+
+	ps, found = indexPods[prf3]
+	assert.True(t, found)
+	assert.Len(t, ps.Containers, 2)
+	indexCon = make(map[string]statsapi.ContainerStats, len(ps.Containers))
+	for _, con := range ps.Containers {
+		indexCon[con.Name] = con
+	}
+	con = indexCon[cName31]
+	assert.Equal(t, cName31, con.Name)
+	checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU)
+	checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory)
+	con = indexCon[cName30]
+	assert.Equal(t, cName30, con.Name)
+	checkEmptyCPUStats(t, "Pod3Container0", seedPod3Container0, con.CPU)
+	checkEmptyMemoryStats(t, "Pod3Container0", seedPod3Container0, infos["/pod3-c0-init"], con.Memory)
 }
 
 func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go
index 454facc4e98..2c8a78c9362 100644
--- a/pkg/kubelet/stats/helper.go
+++ b/pkg/kubelet/stats/helper.go
@@ -40,10 +40,12 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa
 	}
 	var cpuStats *statsapi.CPUStats
 	var memoryStats *statsapi.MemoryStats
+	cpuStats = &statsapi.CPUStats{
+		Time:                 metav1.NewTime(cstat.Timestamp),
+		UsageNanoCores:       uint64Ptr(0),
+		UsageCoreNanoSeconds: uint64Ptr(0),
+	}
 	if info.Spec.HasCpu {
-		cpuStats = &statsapi.CPUStats{
-			Time: metav1.NewTime(cstat.Timestamp),
-		}
 		if cstat.CpuInst != nil {
 			cpuStats.UsageNanoCores = &cstat.CpuInst.Usage.Total
 		}
@@ -67,6 +69,11 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa
 			availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet
 			memoryStats.AvailableBytes = &availableBytes
 		}
+	} else {
+		memoryStats = &statsapi.MemoryStats{
+			Time:            metav1.NewTime(cstat.Timestamp),
+			WorkingSetBytes: uint64Ptr(0),
+		}
 	}
 	return cpuStats, memoryStats
 }
diff --git a/pkg/kubelet/stats/stats_provider_test.go b/pkg/kubelet/stats/stats_provider_test.go
index 8e5d6c34ec7..85b287a235f 100644
--- a/pkg/kubelet/stats/stats_provider_test.go
+++ b/pkg/kubelet/stats/stats_provider_test.go
@@ -621,7 +621,32 @@ func checkNetworkStats(t *testing.T, label string, seed int, stats *statsapi.Net
 
 }
 
+// container which had no stats should have zero-valued CPU usage
+func checkEmptyCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) {
+	require.NotNil(t, stats.Time, label+".CPU.Time")
+	require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores")
+	require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds")
+	assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time")
+	assert.EqualValues(t, 0, *stats.UsageNanoCores, label+".CPU.UsageCores")
+	assert.EqualValues(t, 0, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")
+}
+
+// container which had no stats should have zero-valued Memory usage
+func checkEmptyMemoryStats(t *testing.T, label string, seed int, info cadvisorapiv2.ContainerInfo, stats *statsapi.MemoryStats) {
+	assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".Mem.Time")
+	require.NotNil(t, stats.WorkingSetBytes, label+".Mem.WorkingSetBytes")
+	assert.EqualValues(t, 0, *stats.WorkingSetBytes, label+".Mem.WorkingSetBytes")
+	assert.Nil(t, stats.UsageBytes, label+".Mem.UsageBytes")
+	assert.Nil(t, stats.RSSBytes, label+".Mem.RSSBytes")
+	assert.Nil(t, stats.PageFaults, label+".Mem.PageFaults")
+	assert.Nil(t, stats.MajorPageFaults, label+".Mem.MajorPageFaults")
+	assert.Nil(t, stats.AvailableBytes, label+".Mem.AvailableBytes")
+}
+
 func checkCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) {
+	require.NotNil(t, stats.Time, label+".CPU.Time")
+	require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores")
+	require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds")
 	assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time")
 	assert.EqualValues(t, seed+offsetCPUUsageCores, *stats.UsageNanoCores, label+".CPU.UsageCores")
 	assert.EqualValues(t, seed+offsetCPUUsageCoreSeconds, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")