Merge pull request #23948 from derekwaynecarr/memory_available

Automatic merge from submit-queue

Add memory available to summary stats provider

To support out of resource killing when low on memory, we want to let operators specify eviction thresholds based on available memory instead of memory usage for ease of use when working with heterogeneous nodes.  

So for example, a valid eviction threshold would be the following: 
* If node.memory.available < 200Mi for 30s, then evict pod(s)

For the node, `memory.availableBytes` is always known since the `memory.limit_in_bytes` is always known for root cgroup.  For individual containers in pods, we only populate the `availableBytes` if the container was launched with a memory limit specified.  When no memory limit is specified, the cgroupfs sets a value of 1 << 63 in the `memory.limit_in_bytes` so we look for a similar max value to handle unbounded limits, and ignore setting `memory.availableBytes`.

FYI @vishh @timstclair - as discussed on Slack.

/cc @kubernetes/sig-node @kubernetes/rh-cluster-infra
This commit is contained in:
k8s-merge-robot 2016-04-17 06:32:36 -07:00
commit 75b49f591a
3 changed files with 71 additions and 21 deletions

View File

@ -128,10 +128,13 @@ type CPUStats struct {
type MemoryStats struct {
// The time at which these stats were updated.
Time unversioned.Time `json:"time"`
// Available memory for use. This is defined as the memory limit - workingSetBytes.
// If memory limit is undefined, the available bytes is omitted.
AvailableBytes *uint64 `json:"availableBytes,omitempty"`
// Total memory in use. This includes all memory regardless of when it was accessed.
UsageBytes *uint64 `json:"usageBytes,omitempty"`
// The amount of working set memory. This includes recently accessed memory,
// dirty memory, and kernel memory. UsageBytes is <= TotalBytes.
// dirty memory, and kernel memory. WorkingSetBytes is <= UsageBytes
WorkingSetBytes *uint64 `json:"workingSetBytes,omitempty"`
// The amount of anonymous and swap cache memory (includes transparent
// hugepages).

View File

@ -32,6 +32,7 @@ import (
"k8s.io/kubernetes/pkg/types"
"github.com/golang/glog"
cadvisorapiv1 "github.com/google/cadvisor/info/v1"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
)
@ -284,12 +285,27 @@ func (sb *summaryBuilder) containerInfoV2ToStats(
PageFaults: &pageFaults,
MajorPageFaults: &majorPageFaults,
}
// availableBytes = memory limit (if known) - workingset
if !isMemoryUnlimited(info.Spec.Memory.Limit) {
availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet
cStats.Memory.AvailableBytes = &availableBytes
}
}
sb.containerInfoV2FsStats(info, &cStats)
cStats.UserDefinedMetrics = sb.containerInfoV2ToUserDefinedMetrics(info)
return cStats
}
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
// TODO: cadvisor should export this https://github.com/google/cadvisor/blob/master/metrics/prometheus.go#L596
const maxMemorySize = uint64(1 << 62)
func isMemoryUnlimited(v uint64) bool {
return v > maxMemorySize
}
func (sb *summaryBuilder) containerInfoV2ToNetworkStats(name string, info *cadvisorapiv2.ContainerInfo) *stats.NetworkStats {
if !info.Spec.HasNetwork {
return nil

View File

@ -100,7 +100,7 @@ func TestBuildSummary(t *testing.T) {
// Pod0 - Namespace0
"/pod0-i": summaryTestContainerInfo(seedPod0Infra, pName0, namespace0, leaky.PodInfraContainerName),
"/pod0-c0": summaryTestContainerInfo(seedPod0Container0, pName0, namespace0, cName00),
"/pod0-c2": summaryTestContainerInfo(seedPod0Container1, pName0, namespace0, cName01),
"/pod0-c1": summaryTestContainerInfo(seedPod0Container1, pName0, namespace0, cName01),
// Pod1 - Namespace0
"/pod1-i": summaryTestContainerInfo(seedPod1Infra, pName1, namespace0, leaky.PodInfraContainerName),
"/pod1-c0": summaryTestContainerInfo(seedPod1Container, pName1, namespace0, cName10),
@ -112,6 +112,20 @@ func TestBuildSummary(t *testing.T) {
rootfs := v2.FsInfo{}
imagefs := v2.FsInfo{}
// memory limit overrides for each container (used to test available bytes if a memory limit is known)
memoryLimitOverrides := map[string]uint64{
"/": uint64(1 << 30),
"/pod2-c0": uint64(1 << 15),
}
for name, memoryLimitOverride := range memoryLimitOverrides {
info, found := infos[name]
if !found {
t.Errorf("No container defined with name %v", name)
}
info.Spec.Memory.Limit = memoryLimitOverride
infos[name] = info
}
sb := &summaryBuilder{
newFsResourceAnalyzer(&MockStatsProvider{}, time.Minute*5), &node, nodeConfig, rootfs, imagefs, infos}
summary, err := sb.build()
@ -121,7 +135,7 @@ func TestBuildSummary(t *testing.T) {
assert.Equal(t, "FooNode", nodeStats.NodeName)
assert.EqualValues(t, testTime(creationTime, seedRoot).Unix(), nodeStats.StartTime.Time.Unix())
checkCPUStats(t, "Node", seedRoot, nodeStats.CPU)
checkMemoryStats(t, "Node", seedRoot, nodeStats.Memory)
checkMemoryStats(t, "Node", seedRoot, infos["/"], nodeStats.Memory)
checkNetworkStats(t, "Node", seedRoot, nodeStats.Network)
systemSeeds := map[string]int{
@ -129,15 +143,21 @@ func TestBuildSummary(t *testing.T) {
kubestats.SystemContainerKubelet: seedKubelet,
kubestats.SystemContainerMisc: seedMisc,
}
systemContainerToNodeCgroup := map[string]string{
kubestats.SystemContainerRuntime: nodeConfig.RuntimeCgroupsName,
kubestats.SystemContainerKubelet: nodeConfig.KubeletCgroupsName,
kubestats.SystemContainerMisc: nodeConfig.SystemCgroupsName,
}
for _, sys := range nodeStats.SystemContainers {
name := sys.Name
info := infos[systemContainerToNodeCgroup[name]]
seed, found := systemSeeds[name]
if !found {
t.Errorf("Unknown SystemContainer: %q", name)
}
assert.EqualValues(t, testTime(creationTime, seed).Unix(), sys.StartTime.Time.Unix(), name+".StartTime")
checkCPUStats(t, name, seed, sys.CPU)
checkMemoryStats(t, name, seed, sys.Memory)
checkMemoryStats(t, name, seed, info, sys.Memory)
}
assert.Equal(t, 3, len(summary.Pods))
@ -156,16 +176,16 @@ func TestBuildSummary(t *testing.T) {
}
con := indexCon[cName00]
assert.EqualValues(t, testTime(creationTime, seedPod0Container0).Unix(), con.StartTime.Time.Unix())
checkCPUStats(t, "container", seedPod0Container0, con.CPU)
checkMemoryStats(t, "container", seedPod0Container0, con.Memory)
checkCPUStats(t, "Pod0Container0", seedPod0Container0, con.CPU)
checkMemoryStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Memory)
con = indexCon[cName01]
assert.EqualValues(t, testTime(creationTime, seedPod0Container1).Unix(), con.StartTime.Time.Unix())
checkCPUStats(t, "container", seedPod0Container1, con.CPU)
checkMemoryStats(t, "container", seedPod0Container1, con.Memory)
checkCPUStats(t, "Pod0Container1", seedPod0Container1, con.CPU)
checkMemoryStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Memory)
assert.EqualValues(t, testTime(creationTime, seedPod0Infra).Unix(), ps.StartTime.Time.Unix())
checkNetworkStats(t, "Pod", seedPod0Infra, ps.Network)
checkNetworkStats(t, "Pod0", seedPod0Infra, ps.Network)
// Validate Pod1 Results
ps, found = indexPods[prf1]
@ -173,9 +193,9 @@ func TestBuildSummary(t *testing.T) {
assert.Len(t, ps.Containers, 1)
con = ps.Containers[0]
assert.Equal(t, cName10, con.Name)
checkCPUStats(t, "container", seedPod1Container, con.CPU)
checkMemoryStats(t, "container", seedPod1Container, con.Memory)
checkNetworkStats(t, "Pod", seedPod1Infra, ps.Network)
checkCPUStats(t, "Pod1Container0", seedPod1Container, con.CPU)
checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory)
checkNetworkStats(t, "Pod1", seedPod1Infra, ps.Network)
// Validate Pod2 Results
ps, found = indexPods[prf2]
@ -183,9 +203,9 @@ func TestBuildSummary(t *testing.T) {
assert.Len(t, ps.Containers, 1)
con = ps.Containers[0]
assert.Equal(t, cName20, con.Name)
checkCPUStats(t, "container", seedPod2Container, con.CPU)
checkMemoryStats(t, "container", seedPod2Container, con.Memory)
checkNetworkStats(t, "Pod", seedPod2Infra, ps.Network)
checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU)
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network)
}
func generateCustomMetricSpec() []v1.MetricSpec {
@ -243,12 +263,17 @@ func summaryTestContainerInfo(seed int, podName string, podNamespace string, con
"io.kubernetes.container.name": containerName,
}
}
// by default, kernel will set memory.limit_in_bytes to 1 << 63 if not bounded
unlimitedMemory := uint64(1 << 63)
spec := v2.ContainerSpec{
CreationTime: testTime(creationTime, seed),
HasCpu: true,
HasMemory: true,
HasNetwork: true,
Labels: labels,
Memory: v2.MemorySpec{
Limit: unlimitedMemory,
},
CustomMetrics: generateCustomMetricSpec(),
}
@ -308,13 +333,19 @@ func checkCPUStats(t *testing.T, label string, seed int, stats *kubestats.CPUSta
assert.EqualValues(t, seed+offsetCPUUsageCoreSeconds, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")
}
func checkMemoryStats(t *testing.T, label string, seed int, stats *kubestats.MemoryStats) {
func checkMemoryStats(t *testing.T, label string, seed int, info v2.ContainerInfo, stats *kubestats.MemoryStats) {
assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".Mem.Time")
assert.EqualValues(t, seed+offsetMemUsageBytes, *stats.UsageBytes, label+".Mem.UsageBytes")
assert.EqualValues(t, seed+offsetMemWorkingSetBytes, *stats.WorkingSetBytes, label+".Mem.WorkingSetBytes")
assert.EqualValues(t, seed+offsetMemRSSBytes, *stats.RSSBytes, label+".Mem.RSSBytes")
assert.EqualValues(t, seed+offsetMemPageFaults, *stats.PageFaults, label+".Mem.PageFaults")
assert.EqualValues(t, seed+offsetMemMajorPageFaults, *stats.MajorPageFaults, label+".Mem.MajorPageFaults")
if !info.Spec.HasMemory || isMemoryUnlimited(info.Spec.Memory.Limit) {
assert.Nil(t, stats.AvailableBytes, label+".Mem.AvailableBytes")
} else {
expected := info.Spec.Memory.Limit - *stats.WorkingSetBytes
assert.EqualValues(t, expected, *stats.AvailableBytes, label+".Mem.AvailableBytes")
}
}
func TestCustomMetrics(t *testing.T) {