Work-around for missing metrics on CRI-O exited containers

HPA needs metrics for exited init containers before it will
take action. By setting memory and CPU usage to zero for any
containers that cAdvisor didn't provide statistics for, we
are assured that HPA will be able to correctly calculate
pod resource usage.
This commit is contained in:
Joel Smith 2020-03-02 08:08:35 -07:00
parent 0ad60b35c1
commit da988294ec
3 changed files with 82 additions and 4 deletions

View File

@ -89,6 +89,9 @@ func TestCadvisorListPodStats(t *testing.T) {
seedPod1Container = 4000
seedPod2Infra = 5000
seedPod2Container = 6000
seedPod3Infra = 7000
seedPod3Container0 = 8000
seedPod3Container1 = 8001
seedEphemeralVolume1 = 10000
seedEphemeralVolume2 = 10001
seedPersistentVolume1 = 20000
@ -98,12 +101,15 @@ func TestCadvisorListPodStats(t *testing.T) {
pName0 = "pod0"
pName1 = "pod1"
pName2 = "pod0" // ensure pName2 conflicts with pName0, but is in a different namespace
pName3 = "pod3"
)
const (
cName00 = "c0"
cName01 = "c1"
cName10 = "c0" // ensure cName10 conflicts with cName02, but is in a different pod
cName20 = "c1" // ensure cName20 conflicts with cName01, but is in a different pod + namespace
cName30 = "c0-init"
cName31 = "c1"
)
const (
rootfsCapacity = uint64(10000000)
@ -119,6 +125,7 @@ func TestCadvisorListPodStats(t *testing.T) {
prf0 := statsapi.PodReference{Name: pName0, Namespace: namespace0, UID: "UID" + pName0}
prf1 := statsapi.PodReference{Name: pName1, Namespace: namespace0, UID: "UID" + pName1}
prf2 := statsapi.PodReference{Name: pName2, Namespace: namespace2, UID: "UID" + pName2}
prf3 := statsapi.PodReference{Name: pName3, Namespace: namespace0, UID: "UID" + pName3}
infos := map[string]cadvisorapiv2.ContainerInfo{
"/": getTestContainerInfo(seedRoot, "", "", ""),
"/docker-daemon": getTestContainerInfo(seedRuntime, "", "", ""),
@ -136,6 +143,10 @@ func TestCadvisorListPodStats(t *testing.T) {
"/pod2-c0": getTestContainerInfo(seedPod2Container, pName2, namespace2, cName20),
"/kubepods/burstable/podUIDpod0": getTestContainerInfo(seedPod0Infra, pName0, namespace0, leaky.PodInfraContainerName),
"/kubepods/podUIDpod1": getTestContainerInfo(seedPod1Infra, pName1, namespace0, leaky.PodInfraContainerName),
// Pod3 - Namespace0
"/pod3-i": getTestContainerInfo(seedPod3Infra, pName3, namespace0, leaky.PodInfraContainerName),
"/pod3-c0-init": getTestContainerInfo(seedPod3Container0, pName3, namespace0, cName30),
"/pod3-c1": getTestContainerInfo(seedPod3Container1, pName3, namespace0, cName31),
}
freeRootfsInodes := rootfsInodesFree
@ -169,6 +180,21 @@ func TestCadvisorListPodStats(t *testing.T) {
info.Spec.Memory.Limit = memoryLimitOverride
infos[name] = info
}
// any container for which cadvisor should return no stats (as might be the case for an exited init container)
nostatsOverrides := []string{
"/pod3-c0-init",
}
for _, name := range nostatsOverrides {
info, found := infos[name]
if !found {
t.Errorf("No container defined with name %v", name)
}
info.Spec.Memory = cadvisorapiv2.MemorySpec{}
info.Spec.Cpu = cadvisorapiv2.CpuSpec{}
info.Spec.HasMemory = false
info.Spec.HasCpu = false
infos[name] = info
}
options := cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
@ -197,10 +223,12 @@ func TestCadvisorListPodStats(t *testing.T) {
p0Time := metav1.Now()
p1Time := metav1.Now()
p2Time := metav1.Now()
p3Time := metav1.Now()
mockStatus := new(statustest.MockStatusProvider)
mockStatus.On("GetPodStatus", types.UID("UID"+pName0)).Return(v1.PodStatus{StartTime: &p0Time}, true)
mockStatus.On("GetPodStatus", types.UID("UID"+pName1)).Return(v1.PodStatus{StartTime: &p1Time}, true)
mockStatus.On("GetPodStatus", types.UID("UID"+pName2)).Return(v1.PodStatus{StartTime: &p2Time}, true)
mockStatus.On("GetPodStatus", types.UID("UID"+pName3)).Return(v1.PodStatus{StartTime: &p3Time}, true)
resourceAnalyzer := &fakeResourceAnalyzer{podVolumeStats: volumeStats}
@ -208,7 +236,7 @@ func TestCadvisorListPodStats(t *testing.T) {
pods, err := p.ListPodStats()
assert.NoError(t, err)
assert.Equal(t, 3, len(pods))
assert.Equal(t, 4, len(pods))
indexPods := make(map[statsapi.PodReference]statsapi.PodStats, len(pods))
for _, pod := range pods {
indexPods[pod.PodRef] = pod
@ -261,6 +289,24 @@ func TestCadvisorListPodStats(t *testing.T) {
checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU)
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network)
// Validate Pod3 Results
ps, found = indexPods[prf3]
assert.True(t, found)
assert.Len(t, ps.Containers, 2)
indexCon = make(map[string]statsapi.ContainerStats, len(ps.Containers))
for _, con := range ps.Containers {
indexCon[con.Name] = con
}
con = indexCon[cName31]
assert.Equal(t, cName31, con.Name)
checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU)
checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory)
con = indexCon[cName30]
assert.Equal(t, cName30, con.Name)
checkEmptyCPUStats(t, "Pod3Container0", seedPod3Container0, con.CPU)
checkEmptyMemoryStats(t, "Pod3Container0", seedPod3Container0, infos["/pod3-c0-init"], con.Memory)
}
func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {

View File

@ -40,10 +40,12 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa
}
var cpuStats *statsapi.CPUStats
var memoryStats *statsapi.MemoryStats
cpuStats = &statsapi.CPUStats{
Time: metav1.NewTime(cstat.Timestamp),
UsageNanoCores: uint64Ptr(0),
UsageCoreNanoSeconds: uint64Ptr(0),
}
if info.Spec.HasCpu {
cpuStats = &statsapi.CPUStats{
Time: metav1.NewTime(cstat.Timestamp),
}
if cstat.CpuInst != nil {
cpuStats.UsageNanoCores = &cstat.CpuInst.Usage.Total
}
@ -67,6 +69,11 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa
availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet
memoryStats.AvailableBytes = &availableBytes
}
} else {
memoryStats = &statsapi.MemoryStats{
Time: metav1.NewTime(cstat.Timestamp),
WorkingSetBytes: uint64Ptr(0),
}
}
return cpuStats, memoryStats
}

View File

@ -621,7 +621,32 @@ func checkNetworkStats(t *testing.T, label string, seed int, stats *statsapi.Net
}
// container which had no stats should have zero-valued CPU usage
func checkEmptyCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) {
require.NotNil(t, stats.Time, label+".CPU.Time")
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores")
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds")
assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time")
assert.EqualValues(t, 0, *stats.UsageNanoCores, label+".CPU.UsageCores")
assert.EqualValues(t, 0, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")
}
// container which had no stats should have zero-valued Memory usage
func checkEmptyMemoryStats(t *testing.T, label string, seed int, info cadvisorapiv2.ContainerInfo, stats *statsapi.MemoryStats) {
assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".Mem.Time")
require.NotNil(t, stats.WorkingSetBytes, label+".Mem.WorkingSetBytes")
assert.EqualValues(t, 0, *stats.WorkingSetBytes, label+".Mem.WorkingSetBytes")
assert.Nil(t, stats.UsageBytes, label+".Mem.UsageBytes")
assert.Nil(t, stats.RSSBytes, label+".Mem.RSSBytes")
assert.Nil(t, stats.PageFaults, label+".Mem.PageFaults")
assert.Nil(t, stats.MajorPageFaults, label+".Mem.MajorPageFaults")
assert.Nil(t, stats.AvailableBytes, label+".Mem.AvailableBytes")
}
func checkCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) {
require.NotNil(t, stats.Time, label+".CPU.Time")
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores")
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds")
assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time")
assert.EqualValues(t, seed+offsetCPUUsageCores, *stats.UsageNanoCores, label+".CPU.UsageCores")
assert.EqualValues(t, seed+offsetCPUUsageCoreSeconds, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")