e2e: checking RSS memory for daemons

This commit is contained in:
Yu-Ju Hong 2016-02-10 16:35:17 -08:00
parent c70c7fde4d
commit 715ea4c8b1
2 changed files with 77 additions and 13 deletions

View File

@ -95,15 +95,53 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se
By("Reporting overall resource usage")
logPodsOnNodes(framework.Client, nodeNames.List())
rm.LogLatest()
usageSummary, err := rm.GetLatest()
Expect(err).NotTo(HaveOccurred())
Logf("%s", rm.FormatResourceUsage(usageSummary))
// TODO(yujuhong): Set realistic values after gathering enough data.
verifyMemoryLimits(resourceUsagePerContainer{
"/kubelet": &containerResourceUsage{MemoryRSSInBytes: 500 * 1024 * 1024},
"/docker-daemon": &containerResourceUsage{MemoryRSSInBytes: 500 * 1024 * 1024},
}, usageSummary)
summary := rm.GetCPUSummary()
Logf("%s", rm.FormatCPUSummary(summary))
verifyCPULimits(expected, summary)
cpuSummary := rm.GetCPUSummary()
Logf("%s", rm.FormatCPUSummary(cpuSummary))
verifyCPULimits(expected, cpuSummary)
By("Deleting the RC")
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
}
func verifyMemoryLimits(expected resourceUsagePerContainer, actual resourceUsagePerNode) {
if expected == nil {
return
}
var errList []string
for nodeName, nodeSummary := range actual {
var nodeErrs []string
for cName, expectedResult := range expected {
container, ok := nodeSummary[cName]
if !ok {
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName))
continue
}
expectedValue := expectedResult.MemoryRSSInBytes
actualValue := container.MemoryRSSInBytes
if expectedValue != 0 && actualValue > expectedValue {
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected RSS memory (MB) < %d; got %d",
cName, expectedValue, actualValue))
}
}
if len(nodeErrs) > 0 {
errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", ")))
}
}
if len(errList) > 0 {
Failf("CPU usage exceeding limits:\n %s", strings.Join(errList, "\n"))
}
}
func verifyCPULimits(expected containersCPUSummary, actual nodesCPUSummary) {
if expected == nil {
return

View File

@ -36,6 +36,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
"k8s.io/kubernetes/pkg/master/ports"
utilerrors "k8s.io/kubernetes/pkg/util/errors"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/pkg/util/wait"
)
@ -198,6 +199,7 @@ type containerResourceUsage struct {
CPUUsageInCores float64
MemoryUsageInBytes int64
MemoryWorkingSetInBytes int64
MemoryRSSInBytes uint64
// The interval used to calculate CPUUsageInCores.
CPUInterval time.Duration
}
@ -207,6 +209,7 @@ func (r *containerResourceUsage) isStrictlyGreaterThan(rhs *containerResourceUsa
}
type resourceUsagePerContainer map[string]*containerResourceUsage
type resourceUsagePerNode map[string]resourceUsagePerContainer
// getOneTimeResourceUsageOnNode queries the node's /stats/container endpoint
// and returns the resource usage of all containerNames for the past
@ -292,9 +295,9 @@ func formatResourceUsageStats(nodeName string, containerStats resourceUsagePerCo
// "/system" 0.007 119.88
buf := &bytes.Buffer{}
w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0)
fmt.Fprintf(w, "container\tcpu(cores)\tmemory(MB)\n")
fmt.Fprintf(w, "container\tcpu(cores)\tmemory_working_set(MB)\tmemory_rss(MB)\n")
for name, s := range containerStats {
fmt.Fprintf(w, "%q\t%.3f\t%.2f\n", name, s.CPUUsageInCores, float64(s.MemoryWorkingSetInBytes)/(1024*1024))
fmt.Fprintf(w, "%q\t%.3f\t%.2f\t%.2f\n", name, s.CPUUsageInCores, float64(s.MemoryWorkingSetInBytes)/(1024*1024), float64(s.MemoryRSSInBytes)/(1024*1024))
}
w.Flush()
return fmt.Sprintf("Resource usage on node %q:\n%s", nodeName, buf.String())
@ -364,6 +367,7 @@ func computeContainerResourceUsage(name string, oldStats, newStats *cadvisorapi.
CPUUsageInCores: float64(newStats.Cpu.Usage.Total-oldStats.Cpu.Usage.Total) / float64(newStats.Timestamp.Sub(oldStats.Timestamp).Nanoseconds()),
MemoryUsageInBytes: int64(newStats.Memory.Usage),
MemoryWorkingSetInBytes: int64(newStats.Memory.WorkingSet),
MemoryRSSInBytes: newStats.Memory.RSS,
CPUInterval: newStats.Timestamp.Sub(oldStats.Timestamp),
}
}
@ -437,20 +441,18 @@ func (r *resourceCollector) collectStats(oldStats map[string]*cadvisorapi.Contai
}
}
// LogLatest logs the latest resource usage of each container.
func (r *resourceCollector) LogLatest() {
func (r *resourceCollector) GetLatest() (resourceUsagePerContainer, error) {
r.lock.RLock()
defer r.lock.RUnlock()
stats := make(map[string]*containerResourceUsage)
stats := make(resourceUsagePerContainer)
for _, name := range r.containers {
contStats, ok := r.buffers[name]
if !ok || len(contStats) == 0 {
Logf("Resource usage on node %q is not ready yet", r.node)
return
return nil, fmt.Errorf("Resource usage on node %q is not ready yet", r.node)
}
stats[name] = contStats[len(contStats)-1]
}
Logf("\n%s", formatResourceUsageStats(r.node, stats))
return stats, nil
}
// Reset frees the stats and start over.
@ -534,9 +536,33 @@ func (r *resourceMonitor) Reset() {
}
func (r *resourceMonitor) LogLatest() {
for _, collector := range r.collectors {
collector.LogLatest()
summary, err := r.GetLatest()
if err != nil {
Logf("%v", err)
}
r.FormatResourceUsage(summary)
}
func (r *resourceMonitor) FormatResourceUsage(s resourceUsagePerNode) string {
summary := []string{}
for node, usage := range s {
summary = append(summary, formatResourceUsageStats(node, usage))
}
return strings.Join(summary, "\n")
}
func (r *resourceMonitor) GetLatest() (resourceUsagePerNode, error) {
result := make(resourceUsagePerNode)
errs := []error{}
for key, collector := range r.collectors {
s, err := collector.GetLatest()
if err != nil {
errs = append(errs, err)
continue
}
result[key] = s
}
return result, utilerrors.NewAggregate(errs)
}
// containersCPUSummary is indexed by the container name with each entry a