From b8fc546d60ebd22305550a5f4eaab8a83fde7ab0 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Thu, 27 Oct 2016 13:56:55 -0700 Subject: [PATCH] eviction manager ecivts pod using the most inodes. --- pkg/kubelet/api/v1alpha1/stats/types.go | 4 + pkg/kubelet/eviction/eviction_manager_test.go | 142 +++++++++--------- pkg/kubelet/eviction/helpers.go | 13 +- pkg/kubelet/eviction/helpers_test.go | 78 +++++++--- pkg/kubelet/server/stats/summary.go | 31 +++- test/e2e_node/summary_test.go | 5 + 6 files changed, 169 insertions(+), 104 deletions(-) diff --git a/pkg/kubelet/api/v1alpha1/stats/types.go b/pkg/kubelet/api/v1alpha1/stats/types.go index b65c36928c6..36a8e14962a 100644 --- a/pkg/kubelet/api/v1alpha1/stats/types.go +++ b/pkg/kubelet/api/v1alpha1/stats/types.go @@ -208,6 +208,10 @@ type FsStats struct { // Inodes represents the total inodes in the filesystem. // +optional Inodes *uint64 `json:"inodes,omitempty"` + // InodesUsed represents the inodes used by the filesystem + // This may not equal Inodes - InodesFree because this filesystem may share inodes with other "filesystems" + // e.g. For ContainerStats.Rootfs, this is the inodes used only by that container, and does not count inodes used by other containers. + InodesUsed *uint64 `json:"inodesUsed,omitempty"` } // UserDefinedMetricType defines how the metric should be interpreted by the user. diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index f35b0b1a99c..d74b39561ac 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -100,12 +100,12 @@ func TestMemoryPressure(t *testing.T) { limits api.ResourceList memoryWorkingSet string }{ - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "500Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "200Mi"}, + {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, + {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, + {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "800Mi"}, + {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "300Mi"}, + {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "500Mi"}, } pods := []*api.Pod{} podStats := map[*api.Pod]statsapi.PodStats{} @@ -114,6 +114,7 @@ func TestMemoryPressure(t *testing.T) { pods = append(pods, pod) podStats[pod] = podStat } + podToEvict := pods[5] activePodsFunc := func() []*api.Pod { return pods } @@ -190,7 +191,7 @@ func TestMemoryPressure(t *testing.T) { // verify no pod was yet killed because there has not yet been enough time passed. if podKiller.pod != nil { - t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod) + t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name) } // step forward in time pass the grace period @@ -204,8 +205,8 @@ func TestMemoryPressure(t *testing.T) { } // verify the right pod was killed with the right grace period. - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } if podKiller.gracePeriodOverride == nil { t.Errorf("Manager chose to kill pod but should have had a grace period override.") @@ -239,8 +240,8 @@ func TestMemoryPressure(t *testing.T) { } // check the right pod was killed - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } observedGracePeriod = *podKiller.gracePeriodOverride if observedGracePeriod != int64(0) { @@ -268,7 +269,7 @@ func TestMemoryPressure(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // the best-effort pod should not admit, burstable should @@ -292,7 +293,7 @@ func TestMemoryPressure(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // all pods should admit now @@ -355,12 +356,12 @@ func TestDiskPressureNodeFs(t *testing.T) { logsFsUsed string perLocalVolumeUsed string }{ - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "500Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "300Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "800Mi"}, {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "200Mi"}, + {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "800Mi"}, + {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "300Mi"}, + {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "800Mi"}, + {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "300Mi"}, + {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "500Mi"}, } pods := []*api.Pod{} podStats := map[*api.Pod]statsapi.PodStats{} @@ -369,6 +370,7 @@ func TestDiskPressureNodeFs(t *testing.T) { pods = append(pods, pod) podStats[pod] = podStat } + podToEvict := pods[5] activePodsFunc := func() []*api.Pod { return pods } @@ -441,7 +443,7 @@ func TestDiskPressureNodeFs(t *testing.T) { // verify no pod was yet killed because there has not yet been enough time passed. if podKiller.pod != nil { - t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod) + t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name) } // step forward in time pass the grace period @@ -455,8 +457,8 @@ func TestDiskPressureNodeFs(t *testing.T) { } // verify the right pod was killed with the right grace period. - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } if podKiller.gracePeriodOverride == nil { t.Errorf("Manager chose to kill pod but should have had a grace period override.") @@ -490,8 +492,8 @@ func TestDiskPressureNodeFs(t *testing.T) { } // check the right pod was killed - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } observedGracePeriod = *podKiller.gracePeriodOverride if observedGracePeriod != int64(0) { @@ -516,7 +518,7 @@ func TestDiskPressureNodeFs(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // try to admit our pod (should fail) @@ -537,7 +539,7 @@ func TestDiskPressureNodeFs(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // try to admit our pod (should succeed) @@ -579,12 +581,12 @@ func TestMinReclaim(t *testing.T) { limits api.ResourceList memoryWorkingSet string }{ - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "500Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "200Mi"}, + {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, + {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, + {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "800Mi"}, + {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "300Mi"}, + {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "500Mi"}, } pods := []*api.Pod{} podStats := map[*api.Pod]statsapi.PodStats{} @@ -593,6 +595,7 @@ func TestMinReclaim(t *testing.T) { pods = append(pods, pod) podStats[pod] = podStat } + podToEvict := pods[5] activePodsFunc := func() []*api.Pod { return pods } @@ -651,8 +654,8 @@ func TestMinReclaim(t *testing.T) { } // check the right pod was killed - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } observedGracePeriod := *podKiller.gracePeriodOverride if observedGracePeriod != int64(0) { @@ -671,8 +674,8 @@ func TestMinReclaim(t *testing.T) { } // check the right pod was killed - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } observedGracePeriod = *podKiller.gracePeriodOverride if observedGracePeriod != int64(0) { @@ -692,7 +695,7 @@ func TestMinReclaim(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // move the clock past transition period to ensure that we stop reporting pressure @@ -708,7 +711,7 @@ func TestMinReclaim(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } } @@ -755,12 +758,12 @@ func TestNodeReclaimFuncs(t *testing.T) { logsFsUsed string perLocalVolumeUsed string }{ - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "500Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "300Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "800Mi"}, {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "200Mi"}, + {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "800Mi"}, + {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "300Mi"}, + {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "800Mi"}, + {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "300Mi"}, + {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "500Mi"}, } pods := []*api.Pod{} podStats := map[*api.Pod]statsapi.PodStats{} @@ -769,6 +772,7 @@ func TestNodeReclaimFuncs(t *testing.T) { pods = append(pods, pod) podStats[pod] = podStat } + podToEvict := pods[5] activePodsFunc := func() []*api.Pod { return pods } @@ -834,7 +838,7 @@ func TestNodeReclaimFuncs(t *testing.T) { // verify no pod was killed because image gc was sufficient if podKiller.pod != nil { - t.Errorf("Manager should not have killed a pod, but killed: %v", podKiller.pod) + t.Errorf("Manager should not have killed a pod, but killed: %v", podKiller.pod.Name) } // reset state @@ -866,8 +870,8 @@ func TestNodeReclaimFuncs(t *testing.T) { } // check the right pod was killed - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } observedGracePeriod := *podKiller.gracePeriodOverride if observedGracePeriod != int64(0) { @@ -893,7 +897,7 @@ func TestNodeReclaimFuncs(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // move the clock past transition period to ensure that we stop reporting pressure @@ -915,17 +919,16 @@ func TestNodeReclaimFuncs(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } } -func TestDiskPressureNodeFsInodes(t *testing.T) { - // TODO(dashpole): we need to know inodes used when cadvisor supports per container stats - podMaker := func(name string, requests api.ResourceList, limits api.ResourceList) (*api.Pod, statsapi.PodStats) { +func TestInodePressureNodeFsInodes(t *testing.T) { + podMaker := func(name string, requests api.ResourceList, limits api.ResourceList, rootInodes, logInodes, volumeInodes string) (*api.Pod, statsapi.PodStats) { pod := newPod(name, []api.Container{ newContainer(name, requests, limits), }, nil) - podStats := newPodInodeStats(pod) + podStats := newPodInodeStats(pod, parseQuantity(rootInodes), parseQuantity(logInodes), parseQuantity(volumeInodes)) return pod, podStats } summaryStatsMaker := func(rootFsInodesFree, rootFsInodes string, podStats map[*api.Pod]statsapi.PodStats) *statsapi.Summary { @@ -947,26 +950,29 @@ func TestDiskPressureNodeFsInodes(t *testing.T) { } return result } - // TODO(dashpole): pass inodes used in future when supported by cadvisor. podsToMake := []struct { - name string - requests api.ResourceList - limits api.ResourceList + name string + requests api.ResourceList + limits api.ResourceList + rootFsInodesUsed string + logsFsInodesUsed string + perLocalVolumeInodesUsed string }{ - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", "")}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", "")}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi")}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi")}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi")}, - {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi")}, + {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsInodesUsed: "200Mi"}, + {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsInodesUsed: "800Mi"}, + {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "300Mi"}, + {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "800Mi"}, + {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "300Mi"}, + {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "800Mi"}, } pods := []*api.Pod{} podStats := map[*api.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits) + pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsInodesUsed, podToMake.logsFsInodesUsed, podToMake.perLocalVolumeInodesUsed) pods = append(pods, pod) podStats[pod] = podStat } + podToEvict := pods[5] activePodsFunc := func() []*api.Pod { return pods } @@ -1012,7 +1018,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) { } // create a best effort pod to test admission - podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", "")) + podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", ""), "0", "0", "0") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc) @@ -1039,7 +1045,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) { // verify no pod was yet killed because there has not yet been enough time passed. if podKiller.pod != nil { - t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod) + t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name) } // step forward in time pass the grace period @@ -1053,8 +1059,8 @@ func TestDiskPressureNodeFsInodes(t *testing.T) { } // verify the right pod was killed with the right grace period. - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } if podKiller.gracePeriodOverride == nil { t.Errorf("Manager chose to kill pod but should have had a grace period override.") @@ -1088,8 +1094,8 @@ func TestDiskPressureNodeFsInodes(t *testing.T) { } // check the right pod was killed - if podKiller.pod != pods[0] { - t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, pods[0]) + if podKiller.pod != podToEvict { + t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name) } observedGracePeriod = *podKiller.gracePeriodOverride if observedGracePeriod != int64(0) { @@ -1114,7 +1120,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // try to admit our pod (should fail) @@ -1135,7 +1141,7 @@ func TestDiskPressureNodeFsInodes(t *testing.T) { // no pod should have been killed if podKiller.pod != nil { - t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod) + t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name) } // try to admit our pod (should succeed) diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 0948a1df948..9ece5da99c8 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -319,14 +319,11 @@ func diskUsage(fsStats *statsapi.FsStats) *resource.Quantity { // inodeUsage converts inodes consumed into a resource quantity. func inodeUsage(fsStats *statsapi.FsStats) *resource.Quantity { - // TODO: cadvisor needs to support inodes used per container - // right now, cadvisor reports total inodes and inodes free per filesystem. - // this is insufficient to know how many inodes are consumed by the container. - // for example, with the overlay driver, the rootfs and each container filesystem - // will report the same total inode and inode free values but no way of knowing - // how many inodes consumed in that filesystem are charged to this container. - // for now, we report 0 as inode usage pending support in cadvisor. - return resource.NewQuantity(int64(0), resource.BinarySI) + if fsStats == nil || fsStats.InodesUsed == nil { + return &resource.Quantity{Format: resource.BinarySI} + } + usage := int64(*fsStats.InodesUsed) + return resource.NewQuantity(usage, resource.BinarySI) } // memoryUsage converts working set into a resource quantity. diff --git a/pkg/kubelet/eviction/helpers_test.go b/pkg/kubelet/eviction/helpers_test.go index 107872907d1..b53a5008baa 100644 --- a/pkg/kubelet/eviction/helpers_test.go +++ b/pkg/kubelet/eviction/helpers_test.go @@ -418,8 +418,17 @@ func TestOrderedByQoS(t *testing.T) { } } -// TestOrderedByDisk ensures we order pods by greediest disk consumer -func TestOrderedByDisk(t *testing.T) { +func TestOrderedbyDisk(t *testing.T) { + testOrderedByResource(t, resourceDisk, newPodDiskStats) +} + +func TestOrderedbyInodes(t *testing.T) { + testOrderedByResource(t, resourceInodes, newPodInodeStats) +} + +// testOrderedByDisk ensures we order pods by greediest resource consumer +func testOrderedByResource(t *testing.T, orderedByResource api.ResourceName, + newPodStatsFunc func(pod *api.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats) { pod1 := newPod("best-effort-high", []api.Container{ newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), }, []api.Volume{ @@ -463,19 +472,19 @@ func TestOrderedByDisk(t *testing.T) { }), }) stats := map[*api.Pod]statsapi.PodStats{ - pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi - pod2: newPodDiskStats(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi - pod3: newPodDiskStats(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi - pod4: newPodDiskStats(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi - pod5: newPodDiskStats(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi - pod6: newPodDiskStats(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi + pod1: newPodStatsFunc(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi + pod2: newPodStatsFunc(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi + pod3: newPodStatsFunc(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi + pod4: newPodStatsFunc(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi + pod5: newPodStatsFunc(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi + pod6: newPodStatsFunc(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi } statsFn := func(pod *api.Pod) (statsapi.PodStats, bool) { result, found := stats[pod] return result, found } pods := []*api.Pod{pod1, pod2, pod3, pod4, pod5, pod6} - orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods) + orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, orderedByResource)).Sort(pods) expected := []*api.Pod{pod6, pod5, pod4, pod3, pod2, pod1} for i := range expected { if pods[i] != expected[i] { @@ -484,8 +493,17 @@ func TestOrderedByDisk(t *testing.T) { } } -// TestOrderedByQoSDisk ensures we order pods by qos and then greediest disk consumer -func TestOrderedByQoSDisk(t *testing.T) { +func TestOrderedbyQoSDisk(t *testing.T) { + testOrderedByQoSResource(t, resourceDisk, newPodDiskStats) +} + +func TestOrderedbyQoSInodes(t *testing.T) { + testOrderedByQoSResource(t, resourceInodes, newPodInodeStats) +} + +// testOrderedByQoSDisk ensures we order pods by qos and then greediest resource consumer +func testOrderedByQoSResource(t *testing.T, orderedByResource api.ResourceName, + newPodStatsFunc func(pod *api.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats) { pod1 := newPod("best-effort-high", []api.Container{ newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), }, []api.Volume{ @@ -529,19 +547,19 @@ func TestOrderedByQoSDisk(t *testing.T) { }), }) stats := map[*api.Pod]statsapi.PodStats{ - pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi - pod2: newPodDiskStats(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi - pod3: newPodDiskStats(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi - pod4: newPodDiskStats(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi - pod5: newPodDiskStats(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi - pod6: newPodDiskStats(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi + pod1: newPodStatsFunc(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi + pod2: newPodStatsFunc(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi + pod3: newPodStatsFunc(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi + pod4: newPodStatsFunc(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi + pod5: newPodStatsFunc(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi + pod6: newPodStatsFunc(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi } statsFn := func(pod *api.Pod) (statsapi.PodStats, bool) { result, found := stats[pod] return result, found } pods := []*api.Pod{pod1, pod2, pod3, pod4, pod5, pod6} - orderedBy(qosComparator, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods) + orderedBy(qosComparator, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, orderedByResource)).Sort(pods) expected := []*api.Pod{pod2, pod1, pod4, pod3, pod6, pod5} for i := range expected { if pods[i] != expected[i] { @@ -1430,16 +1448,32 @@ func testCompareThresholdValue(t *testing.T) { } // newPodInodeStats returns stats with specified usage amounts. -// TODO: in future, this should take a value for inodesUsed per container. -func newPodInodeStats(pod *api.Pod) statsapi.PodStats { +func newPodInodeStats(pod *api.Pod, rootFsInodesUsed, logsInodesUsed, perLocalVolumeInodesUsed resource.Quantity) statsapi.PodStats { result := statsapi.PodStats{ PodRef: statsapi.PodReference{ Name: pod.Name, Namespace: pod.Namespace, UID: string(pod.UID), }, } + rootFsUsed := uint64(rootFsInodesUsed.Value()) + logsUsed := uint64(logsInodesUsed.Value()) for range pod.Spec.Containers { result.Containers = append(result.Containers, statsapi.ContainerStats{ - Rootfs: &statsapi.FsStats{}, + Rootfs: &statsapi.FsStats{ + InodesUsed: &rootFsUsed, + }, + Logs: &statsapi.FsStats{ + InodesUsed: &logsUsed, + }, + }) + } + + perLocalVolumeUsed := uint64(perLocalVolumeInodesUsed.Value()) + for _, volumeName := range localVolumeNames(pod) { + result.VolumeStats = append(result.VolumeStats, statsapi.VolumeStats{ + Name: volumeName, + FsStats: statsapi.FsStats{ + InodesUsed: &perLocalVolumeUsed, + }, }) } return result @@ -1528,10 +1562,12 @@ func newVolume(name string, volumeSource api.VolumeSource) api.Volume { } } +// newPod uses the name as the uid. Make names unique for testing. func newPod(name string, containers []api.Container, volumes []api.Volume) *api.Pod { return &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: name, + UID: types.UID(name), }, Spec: api.PodSpec{ Containers: containers, diff --git a/pkg/kubelet/server/stats/summary.go b/pkg/kubelet/server/stats/summary.go index 59144e47c2d..fc94034ff6d 100644 --- a/pkg/kubelet/server/stats/summary.go +++ b/pkg/kubelet/server/stats/summary.go @@ -115,6 +115,8 @@ func (sb *summaryBuilder) build() (*stats.Summary, error) { return nil, fmt.Errorf("Missing stats for root container") } + nodeFsInodesUsed := *sb.rootFsInfo.Inodes - *sb.rootFsInfo.InodesFree + imageFsInodesUsed := *sb.imageFsInfo.Inodes - *sb.imageFsInfo.InodesFree rootStats := sb.containerInfoV2ToStats("", &rootInfo) nodeStats := stats.NodeStats{ NodeName: sb.node.Name, @@ -126,7 +128,9 @@ func (sb *summaryBuilder) build() (*stats.Summary, error) { CapacityBytes: &sb.rootFsInfo.Capacity, UsedBytes: &sb.rootFsInfo.Usage, InodesFree: sb.rootFsInfo.InodesFree, - Inodes: sb.rootFsInfo.Inodes}, + Inodes: sb.rootFsInfo.Inodes, + InodesUsed: &nodeFsInodesUsed, + }, StartTime: rootStats.StartTime, Runtime: &stats.RuntimeStats{ ImageFs: &stats.FsStats{ @@ -135,6 +139,7 @@ func (sb *summaryBuilder) build() (*stats.Summary, error) { UsedBytes: &sb.imageStats.TotalStorageBytes, InodesFree: sb.imageFsInfo.InodesFree, Inodes: sb.imageFsInfo.Inodes, + InodesUsed: &imageFsInodesUsed, }, }, } @@ -174,6 +179,11 @@ func (sb *summaryBuilder) containerInfoV2FsStats( Inodes: sb.rootFsInfo.Inodes, } + if sb.rootFsInfo.Inodes != nil && sb.rootFsInfo.InodesFree != nil { + logsInodesUsed := *sb.rootFsInfo.Inodes - *sb.rootFsInfo.InodesFree + cs.Logs.InodesUsed = &logsInodesUsed + } + // The container rootFs lives on the imageFs devices (which may not be the node root fs) cs.Rootfs = &stats.FsStats{ AvailableBytes: &sb.imageFsInfo.Available, @@ -186,12 +196,19 @@ func (sb *summaryBuilder) containerInfoV2FsStats( return } cfs := lcs.Filesystem - if cfs != nil && cfs.BaseUsageBytes != nil { - rootfsUsage := *cfs.BaseUsageBytes - cs.Rootfs.UsedBytes = &rootfsUsage - if cfs.TotalUsageBytes != nil { - logsUsage := *cfs.TotalUsageBytes - *cfs.BaseUsageBytes - cs.Logs.UsedBytes = &logsUsage + + if cfs != nil { + if cfs.BaseUsageBytes != nil { + rootfsUsage := *cfs.BaseUsageBytes + cs.Rootfs.UsedBytes = &rootfsUsage + if cfs.TotalUsageBytes != nil { + logsUsage := *cfs.TotalUsageBytes - *cfs.BaseUsageBytes + cs.Logs.UsedBytes = &logsUsage + } + } + if cfs.InodeUsage != nil { + rootInodes := *cfs.InodeUsage + cs.Rootfs.InodesUsed = &rootInodes } } } diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index 9c8d4b3875e..51ff94b1442 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -111,6 +111,7 @@ var _ = framework.KubeDescribe("Summary API", func() { "UsedBytes": bounded(kb, 10*mb), "InodesFree": bounded(1E4, 1E8), "Inodes": bounded(1E4, 1E8), + "InodesUsed": bounded(0, 1E8), }), "Logs": ptrMatchAllFields(gstruct.Fields{ "AvailableBytes": fsCapacityBounds, @@ -118,6 +119,7 @@ var _ = framework.KubeDescribe("Summary API", func() { "UsedBytes": bounded(kb, 10*mb), "InodesFree": bounded(1E4, 1E8), "Inodes": bounded(1E4, 1E8), + "InodesUsed": bounded(0, 1E8), }), "UserDefinedMetrics": BeEmpty(), }), @@ -139,6 +141,7 @@ var _ = framework.KubeDescribe("Summary API", func() { // Inodes are not reported for Volumes. "InodesFree": BeNil(), "Inodes": BeNil(), + "InodesUsed": BeNil(), }), }), }), @@ -179,6 +182,7 @@ var _ = framework.KubeDescribe("Summary API", func() { "UsedBytes": bounded(kb, 10*gb), "InodesFree": bounded(1E4, 1E8), "Inodes": bounded(1E4, 1E8), + "InodesUsed": bounded(0, 1E8), }), "Runtime": ptrMatchAllFields(gstruct.Fields{ "ImageFs": ptrMatchAllFields(gstruct.Fields{ @@ -187,6 +191,7 @@ var _ = framework.KubeDescribe("Summary API", func() { "UsedBytes": bounded(kb, 10*gb), "InodesFree": bounded(1E4, 1E8), "Inodes": bounded(1E4, 1E8), + "InodesUsed": bounded(0, 1E8), }), }), }),