Remove the status of the terminated containers in the summary endpoint

This commit is contained in:
Yang Guo 2017-07-10 16:15:02 -07:00
parent 0744964956
commit af76564e92
2 changed files with 151 additions and 10 deletions

View File

@ -18,6 +18,7 @@ package stats
import (
"fmt"
"sort"
"strings"
"time"
@ -128,7 +129,7 @@ func (sb *summaryBuilder) build() (*stats.Summary, error) {
}
rootStats := sb.containerInfoV2ToStats("", &rootInfo)
cStats, _ := sb.latestContainerStats(&rootInfo)
cStats, _ := latestContainerStats(&rootInfo)
nodeStats := stats.NodeStats{
NodeName: sb.node.Name,
CPU: rootStats.CPU,
@ -184,7 +185,7 @@ func (sb *summaryBuilder) containerInfoV2FsStats(
info *cadvisorapiv2.ContainerInfo,
cs *stats.ContainerStats) {
lcs, found := sb.latestContainerStats(info)
lcs, found := latestContainerStats(info)
if !found {
return
}
@ -230,7 +231,7 @@ func (sb *summaryBuilder) containerInfoV2FsStats(
}
// latestContainerStats returns the latest container stats from cadvisor, or nil if none exist
func (sb *summaryBuilder) latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) {
func latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) {
stats := info.Stats
if len(stats) < 1 {
return nil, false
@ -242,12 +243,101 @@ func (sb *summaryBuilder) latestContainerStats(info *cadvisorapiv2.ContainerInfo
return latest, true
}
// hasMemoryAndCPUInstUsage returns true if the specified container info has
// both non-zero CPU instantaneous usage and non-zero memory RSS usage, and
// false otherwise.
func hasMemoryAndCPUInstUsage(info *cadvisorapiv2.ContainerInfo) bool {
if !info.Spec.HasCpu || !info.Spec.HasMemory {
return false
}
cstat, found := latestContainerStats(info)
if !found {
return false
}
if cstat.CpuInst == nil {
return false
}
return cstat.CpuInst.Usage.Total != 0 && cstat.Memory.RSS != 0
}
// ByCreationTime implements sort.Interface for []containerInfoWithCgroup based
// on the cinfo.Spec.CreationTime field.
type ByCreationTime []containerInfoWithCgroup
func (a ByCreationTime) Len() int { return len(a) }
func (a ByCreationTime) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByCreationTime) Less(i, j int) bool {
if a[i].cinfo.Spec.CreationTime.Equal(a[j].cinfo.Spec.CreationTime) {
// There shouldn't be two containers with the same name and/or the same
// creation time. However, to make the logic here robust, we break the
// tie by moving the one without CPU instantaneous or memory RSS usage
// to the beginning.
return hasMemoryAndCPUInstUsage(&a[j].cinfo)
}
return a[i].cinfo.Spec.CreationTime.Before(a[j].cinfo.Spec.CreationTime)
}
// containerID is the identity of a container in a pod.
type containerID struct {
podRef stats.PodReference
containerName string
}
// containerInfoWithCgroup contains the ContainerInfo and its cgroup name.
type containerInfoWithCgroup struct {
cinfo cadvisorapiv2.ContainerInfo
cgroup string
}
// removeTerminatedContainerInfo returns the specified containerInfo but with
// the stats of the terminated containers removed.
//
// A ContainerInfo is considered to be of a terminated container if it has an
// older CreationTime and zero CPU instantaneous and memory RSS usage.
func removeTerminatedContainerInfo(containerInfo map[string]cadvisorapiv2.ContainerInfo) map[string]cadvisorapiv2.ContainerInfo {
cinfoMap := make(map[containerID][]containerInfoWithCgroup)
for key, cinfo := range containerInfo {
if !isPodManagedContainer(&cinfo) {
continue
}
cinfoID := containerID{
podRef: buildPodRef(&cinfo),
containerName: types.GetContainerName(cinfo.Spec.Labels),
}
cinfoMap[cinfoID] = append(cinfoMap[cinfoID], containerInfoWithCgroup{
cinfo: cinfo,
cgroup: key,
})
}
result := make(map[string]cadvisorapiv2.ContainerInfo)
for _, refs := range cinfoMap {
if len(refs) == 1 {
result[refs[0].cgroup] = refs[0].cinfo
continue
}
sort.Sort(ByCreationTime(refs))
i := 0
for ; i < len(refs); i++ {
if hasMemoryAndCPUInstUsage(&refs[i].cinfo) {
// Stops removing when we first see an info with non-zero
// CPU/Memory usage.
break
}
}
for ; i < len(refs); i++ {
result[refs[i].cgroup] = refs[i].cinfo
}
}
return result
}
// buildSummaryPods aggregates and returns the container stats in cinfos by the Pod managing the container.
// Containers not managed by a Pod are omitted.
func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats {
// Map each container to a pod and update the PodStats with container data
podToStats := map[stats.PodReference]*stats.PodStats{}
for key, cinfo := range sb.infos {
infos := removeTerminatedContainerInfo(sb.infos)
for key, cinfo := range infos {
// on systemd using devicemapper each mount into the container has an associated cgroup.
// we ignore them to ensure we do not get duplicate entries in our summary.
// for details on .mount units: http://man7.org/linux/man-pages/man5/systemd.mount.5.html
@ -255,10 +345,10 @@ func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats {
continue
}
// Build the Pod key if this container is managed by a Pod
if !sb.isPodManagedContainer(&cinfo) {
if !isPodManagedContainer(&cinfo) {
continue
}
ref := sb.buildPodRef(&cinfo)
ref := buildPodRef(&cinfo)
// Lookup the PodStats for the pod using the PodRef. If none exists, initialize a new entry.
podStats, found := podToStats[ref]
@ -292,7 +382,7 @@ func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats {
}
// buildPodRef returns a PodReference that identifies the Pod managing cinfo
func (sb *summaryBuilder) buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats.PodReference {
func buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats.PodReference {
podName := types.GetPodName(cinfo.Spec.Labels)
podNamespace := types.GetPodNamespace(cinfo.Spec.Labels)
podUID := types.GetPodUID(cinfo.Spec.Labels)
@ -300,7 +390,7 @@ func (sb *summaryBuilder) buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats.
}
// isPodManagedContainer returns true if the cinfo container is managed by a Pod
func (sb *summaryBuilder) isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool {
func isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool {
podName := types.GetPodName(cinfo.Spec.Labels)
podNamespace := types.GetPodNamespace(cinfo.Spec.Labels)
managed := podName != "" && podNamespace != ""
@ -319,7 +409,7 @@ func (sb *summaryBuilder) containerInfoV2ToStats(
StartTime: metav1.NewTime(info.Spec.CreationTime),
Name: name,
}
cstat, found := sb.latestContainerStats(info)
cstat, found := latestContainerStats(info)
if !found {
return cStats
}
@ -371,7 +461,7 @@ func (sb *summaryBuilder) containerInfoV2ToNetworkStats(name string, info *cadvi
if !info.Spec.HasNetwork {
return nil
}
cstat, found := sb.latestContainerStats(info)
cstat, found := latestContainerStats(info)
if !found {
return nil
}

View File

@ -53,6 +53,50 @@ var (
creationTime = timestamp.Add(-5 * time.Minute)
)
func TestRemoveTerminatedContainerInfo(t *testing.T) {
const (
seedPastPod0Infra = 1000
seedPastPod0Container0 = 2000
seedPod0Infra = 3000
seedPod0Container0 = 4000
)
const (
namespace = "test"
pName0 = "pod0"
cName00 = "c0"
)
infos := map[string]v2.ContainerInfo{
// ContainerInfo with past creation time and no CPU/memory usage for
// simulating uncleaned cgroups of already terminated containers, which
// should not be shown in the results.
"/pod0-i-terminated-1": summaryTerminatedContainerInfo(seedPastPod0Infra, pName0, namespace, leaky.PodInfraContainerName),
"/pod0-c0-terminated-1": summaryTerminatedContainerInfo(seedPastPod0Container0, pName0, namespace, cName00),
// Same as above but uses the same creation time as the latest
// containers. They are terminated containers, so they should not be in
// the results.
"/pod0-i-terminated-2": summaryTerminatedContainerInfo(seedPod0Infra, pName0, namespace, leaky.PodInfraContainerName),
"/pod0-c0-terminated-2": summaryTerminatedContainerInfo(seedPod0Container0, pName0, namespace, cName00),
// The latest containers, which should be in the results.
"/pod0-i": summaryTestContainerInfo(seedPod0Infra, pName0, namespace, leaky.PodInfraContainerName),
"/pod0-c0": summaryTestContainerInfo(seedPod0Container0, pName0, namespace, cName00),
// Duplicated containers with non-zero CPU and memory usage. This case
// shouldn't happen unless something goes wrong, but we want to test
// that the metrics reporting logic works in this scenario.
"/pod0-i-duplicated": summaryTestContainerInfo(seedPod0Infra, pName0, namespace, leaky.PodInfraContainerName),
"/pod0-c0-duplicated": summaryTestContainerInfo(seedPod0Container0, pName0, namespace, cName00),
}
output := removeTerminatedContainerInfo(infos)
assert.Len(t, output, 4)
for _, c := range []string{"/pod0-i", "/pod0-c0", "/pod0-i-duplicated", "/pod0-c0-duplicated"} {
if _, found := output[c]; !found {
t.Errorf("%q is expected to be in the output\n", c)
}
}
}
func TestBuildSummary(t *testing.T) {
node := k8sv1.Node{}
node.Name = "FooNode"
@ -280,6 +324,13 @@ func generateCustomMetrics(spec []v1.MetricSpec) map[string][]v1.MetricVal {
return ret
}
func summaryTerminatedContainerInfo(seed int, podName string, podNamespace string, containerName string) v2.ContainerInfo {
cinfo := summaryTestContainerInfo(seed, podName, podNamespace, containerName)
cinfo.Stats[0].Memory.RSS = 0
cinfo.Stats[0].CpuInst.Usage.Total = 0
return cinfo
}
func summaryTestContainerInfo(seed int, podName string, podNamespace string, containerName string) v2.ContainerInfo {
labels := map[string]string{}
if podName != "" {