From af76564e922ed83d6334098ef6a8181d96457627 Mon Sep 17 00:00:00 2001 From: Yang Guo Date: Mon, 10 Jul 2017 16:15:02 -0700 Subject: [PATCH] Remove the status of the terminated containers in the summary endpoint --- pkg/kubelet/server/stats/summary.go | 110 ++++++++++++++++++++--- pkg/kubelet/server/stats/summary_test.go | 51 +++++++++++ 2 files changed, 151 insertions(+), 10 deletions(-) diff --git a/pkg/kubelet/server/stats/summary.go b/pkg/kubelet/server/stats/summary.go index d1d698d92ec..2db4ad00fc5 100644 --- a/pkg/kubelet/server/stats/summary.go +++ b/pkg/kubelet/server/stats/summary.go @@ -18,6 +18,7 @@ package stats import ( "fmt" + "sort" "strings" "time" @@ -128,7 +129,7 @@ func (sb *summaryBuilder) build() (*stats.Summary, error) { } rootStats := sb.containerInfoV2ToStats("", &rootInfo) - cStats, _ := sb.latestContainerStats(&rootInfo) + cStats, _ := latestContainerStats(&rootInfo) nodeStats := stats.NodeStats{ NodeName: sb.node.Name, CPU: rootStats.CPU, @@ -184,7 +185,7 @@ func (sb *summaryBuilder) containerInfoV2FsStats( info *cadvisorapiv2.ContainerInfo, cs *stats.ContainerStats) { - lcs, found := sb.latestContainerStats(info) + lcs, found := latestContainerStats(info) if !found { return } @@ -230,7 +231,7 @@ func (sb *summaryBuilder) containerInfoV2FsStats( } // latestContainerStats returns the latest container stats from cadvisor, or nil if none exist -func (sb *summaryBuilder) latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) { +func latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) { stats := info.Stats if len(stats) < 1 { return nil, false @@ -242,12 +243,101 @@ func (sb *summaryBuilder) latestContainerStats(info *cadvisorapiv2.ContainerInfo return latest, true } +// hasMemoryAndCPUInstUsage returns true if the specified container info has +// both non-zero CPU instantaneous usage and non-zero memory RSS usage, and +// false otherwise. +func hasMemoryAndCPUInstUsage(info *cadvisorapiv2.ContainerInfo) bool { + if !info.Spec.HasCpu || !info.Spec.HasMemory { + return false + } + cstat, found := latestContainerStats(info) + if !found { + return false + } + if cstat.CpuInst == nil { + return false + } + return cstat.CpuInst.Usage.Total != 0 && cstat.Memory.RSS != 0 +} + +// ByCreationTime implements sort.Interface for []containerInfoWithCgroup based +// on the cinfo.Spec.CreationTime field. +type ByCreationTime []containerInfoWithCgroup + +func (a ByCreationTime) Len() int { return len(a) } +func (a ByCreationTime) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByCreationTime) Less(i, j int) bool { + if a[i].cinfo.Spec.CreationTime.Equal(a[j].cinfo.Spec.CreationTime) { + // There shouldn't be two containers with the same name and/or the same + // creation time. However, to make the logic here robust, we break the + // tie by moving the one without CPU instantaneous or memory RSS usage + // to the beginning. + return hasMemoryAndCPUInstUsage(&a[j].cinfo) + } + return a[i].cinfo.Spec.CreationTime.Before(a[j].cinfo.Spec.CreationTime) +} + +// containerID is the identity of a container in a pod. +type containerID struct { + podRef stats.PodReference + containerName string +} + +// containerInfoWithCgroup contains the ContainerInfo and its cgroup name. +type containerInfoWithCgroup struct { + cinfo cadvisorapiv2.ContainerInfo + cgroup string +} + +// removeTerminatedContainerInfo returns the specified containerInfo but with +// the stats of the terminated containers removed. +// +// A ContainerInfo is considered to be of a terminated container if it has an +// older CreationTime and zero CPU instantaneous and memory RSS usage. +func removeTerminatedContainerInfo(containerInfo map[string]cadvisorapiv2.ContainerInfo) map[string]cadvisorapiv2.ContainerInfo { + cinfoMap := make(map[containerID][]containerInfoWithCgroup) + for key, cinfo := range containerInfo { + if !isPodManagedContainer(&cinfo) { + continue + } + cinfoID := containerID{ + podRef: buildPodRef(&cinfo), + containerName: types.GetContainerName(cinfo.Spec.Labels), + } + cinfoMap[cinfoID] = append(cinfoMap[cinfoID], containerInfoWithCgroup{ + cinfo: cinfo, + cgroup: key, + }) + } + result := make(map[string]cadvisorapiv2.ContainerInfo) + for _, refs := range cinfoMap { + if len(refs) == 1 { + result[refs[0].cgroup] = refs[0].cinfo + continue + } + sort.Sort(ByCreationTime(refs)) + i := 0 + for ; i < len(refs); i++ { + if hasMemoryAndCPUInstUsage(&refs[i].cinfo) { + // Stops removing when we first see an info with non-zero + // CPU/Memory usage. + break + } + } + for ; i < len(refs); i++ { + result[refs[i].cgroup] = refs[i].cinfo + } + } + return result +} + // buildSummaryPods aggregates and returns the container stats in cinfos by the Pod managing the container. // Containers not managed by a Pod are omitted. func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats { // Map each container to a pod and update the PodStats with container data podToStats := map[stats.PodReference]*stats.PodStats{} - for key, cinfo := range sb.infos { + infos := removeTerminatedContainerInfo(sb.infos) + for key, cinfo := range infos { // on systemd using devicemapper each mount into the container has an associated cgroup. // we ignore them to ensure we do not get duplicate entries in our summary. // for details on .mount units: http://man7.org/linux/man-pages/man5/systemd.mount.5.html @@ -255,10 +345,10 @@ func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats { continue } // Build the Pod key if this container is managed by a Pod - if !sb.isPodManagedContainer(&cinfo) { + if !isPodManagedContainer(&cinfo) { continue } - ref := sb.buildPodRef(&cinfo) + ref := buildPodRef(&cinfo) // Lookup the PodStats for the pod using the PodRef. If none exists, initialize a new entry. podStats, found := podToStats[ref] @@ -292,7 +382,7 @@ func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats { } // buildPodRef returns a PodReference that identifies the Pod managing cinfo -func (sb *summaryBuilder) buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats.PodReference { +func buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats.PodReference { podName := types.GetPodName(cinfo.Spec.Labels) podNamespace := types.GetPodNamespace(cinfo.Spec.Labels) podUID := types.GetPodUID(cinfo.Spec.Labels) @@ -300,7 +390,7 @@ func (sb *summaryBuilder) buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats. } // isPodManagedContainer returns true if the cinfo container is managed by a Pod -func (sb *summaryBuilder) isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool { +func isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool { podName := types.GetPodName(cinfo.Spec.Labels) podNamespace := types.GetPodNamespace(cinfo.Spec.Labels) managed := podName != "" && podNamespace != "" @@ -319,7 +409,7 @@ func (sb *summaryBuilder) containerInfoV2ToStats( StartTime: metav1.NewTime(info.Spec.CreationTime), Name: name, } - cstat, found := sb.latestContainerStats(info) + cstat, found := latestContainerStats(info) if !found { return cStats } @@ -371,7 +461,7 @@ func (sb *summaryBuilder) containerInfoV2ToNetworkStats(name string, info *cadvi if !info.Spec.HasNetwork { return nil } - cstat, found := sb.latestContainerStats(info) + cstat, found := latestContainerStats(info) if !found { return nil } diff --git a/pkg/kubelet/server/stats/summary_test.go b/pkg/kubelet/server/stats/summary_test.go index eb39efd44c2..2253a10b876 100644 --- a/pkg/kubelet/server/stats/summary_test.go +++ b/pkg/kubelet/server/stats/summary_test.go @@ -53,6 +53,50 @@ var ( creationTime = timestamp.Add(-5 * time.Minute) ) +func TestRemoveTerminatedContainerInfo(t *testing.T) { + const ( + seedPastPod0Infra = 1000 + seedPastPod0Container0 = 2000 + seedPod0Infra = 3000 + seedPod0Container0 = 4000 + ) + const ( + namespace = "test" + pName0 = "pod0" + cName00 = "c0" + ) + infos := map[string]v2.ContainerInfo{ + // ContainerInfo with past creation time and no CPU/memory usage for + // simulating uncleaned cgroups of already terminated containers, which + // should not be shown in the results. + "/pod0-i-terminated-1": summaryTerminatedContainerInfo(seedPastPod0Infra, pName0, namespace, leaky.PodInfraContainerName), + "/pod0-c0-terminated-1": summaryTerminatedContainerInfo(seedPastPod0Container0, pName0, namespace, cName00), + + // Same as above but uses the same creation time as the latest + // containers. They are terminated containers, so they should not be in + // the results. + "/pod0-i-terminated-2": summaryTerminatedContainerInfo(seedPod0Infra, pName0, namespace, leaky.PodInfraContainerName), + "/pod0-c0-terminated-2": summaryTerminatedContainerInfo(seedPod0Container0, pName0, namespace, cName00), + + // The latest containers, which should be in the results. + "/pod0-i": summaryTestContainerInfo(seedPod0Infra, pName0, namespace, leaky.PodInfraContainerName), + "/pod0-c0": summaryTestContainerInfo(seedPod0Container0, pName0, namespace, cName00), + + // Duplicated containers with non-zero CPU and memory usage. This case + // shouldn't happen unless something goes wrong, but we want to test + // that the metrics reporting logic works in this scenario. + "/pod0-i-duplicated": summaryTestContainerInfo(seedPod0Infra, pName0, namespace, leaky.PodInfraContainerName), + "/pod0-c0-duplicated": summaryTestContainerInfo(seedPod0Container0, pName0, namespace, cName00), + } + output := removeTerminatedContainerInfo(infos) + assert.Len(t, output, 4) + for _, c := range []string{"/pod0-i", "/pod0-c0", "/pod0-i-duplicated", "/pod0-c0-duplicated"} { + if _, found := output[c]; !found { + t.Errorf("%q is expected to be in the output\n", c) + } + } +} + func TestBuildSummary(t *testing.T) { node := k8sv1.Node{} node.Name = "FooNode" @@ -280,6 +324,13 @@ func generateCustomMetrics(spec []v1.MetricSpec) map[string][]v1.MetricVal { return ret } +func summaryTerminatedContainerInfo(seed int, podName string, podNamespace string, containerName string) v2.ContainerInfo { + cinfo := summaryTestContainerInfo(seed, podName, podNamespace, containerName) + cinfo.Stats[0].Memory.RSS = 0 + cinfo.Stats[0].CpuInst.Usage.Total = 0 + return cinfo +} + func summaryTestContainerInfo(seed int, podName string, podNamespace string, containerName string) v2.ContainerInfo { labels := map[string]string{} if podName != "" {