diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index f33aa0ca530..aee07089fc7 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -27,6 +27,7 @@ import ( utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/record" kubeapi "k8s.io/kubernetes/pkg/apis/core" + "k8s.io/kubernetes/pkg/features" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" "k8s.io/kubernetes/pkg/kubelet/lifecycle" @@ -184,15 +185,15 @@ type podToMake struct { // TestMemoryPressure func TestMemoryPressure(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.PodPriority): true}) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats podsToMake := []podToMake{ - {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "900Mi"}, - {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "50Mi"}, - {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "400Mi"}, - {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "400Mi"}, - {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, + {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"}, + {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"}, + {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -248,8 +249,8 @@ func TestMemoryPressure(t *testing.T) { } // create a best effort pod to test admission - bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0Gi") - burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") + bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi") + burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -402,15 +403,18 @@ func parseQuantity(value string) resource.Quantity { } func TestDiskPressureNodeFs(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{ + string(features.LocalStorageCapacityIsolation): true, + string(features.PodPriority): true, + }) podMaker := makePodWithDiskStats summaryStatsMaker := makeDiskStats podsToMake := []podToMake{ - {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"}, - {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"}, - {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "400Mi"}, - {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "400Mi"}, - {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -467,7 +471,7 @@ func TestDiskPressureNodeFs(t *testing.T) { } // create a best effort pod to test admission - podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0Gi", "0Gi", "0Gi") + podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -601,15 +605,15 @@ func TestDiskPressureNodeFs(t *testing.T) { // TestMinReclaim verifies that min-reclaim works as desired. func TestMinReclaim(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.PodPriority): true}) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats podsToMake := []podToMake{ - {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "900Mi"}, - {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "50Mi"}, - {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "400Mi"}, - {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "400Mi"}, - {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, + {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"}, + {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"}, + {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -741,15 +745,18 @@ func TestMinReclaim(t *testing.T) { } func TestNodeReclaimFuncs(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{ + string(features.PodPriority): true, + string(features.LocalStorageCapacityIsolation): true, + }) podMaker := makePodWithDiskStats summaryStatsMaker := makeDiskStats podsToMake := []podToMake{ - {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"}, - {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"}, - {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "400Mi"}, - {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "400Mi"}, - {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -915,7 +922,7 @@ func TestNodeReclaimFuncs(t *testing.T) { } func TestInodePressureNodeFsInodes(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.PodPriority): true}) podMaker := func(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootInodes, logInodes, volumeInodes string) (*v1.Pod, statsapi.PodStats) { pod := newPod(name, priority, []v1.Container{ newContainer(name, requests, limits), @@ -943,11 +950,11 @@ func TestInodePressureNodeFsInodes(t *testing.T) { return result } podsToMake := []podToMake{ - {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsInodesUsed: "900Mi"}, - {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "50Mi"}, - {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "400Mi"}, - {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "400Mi"}, - {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "100Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -1004,7 +1011,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { } // create a best effort pod to test admission - podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0", "0", "0") + podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0", "0", "0") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -1138,11 +1145,11 @@ func TestInodePressureNodeFsInodes(t *testing.T) { // TestCriticalPodsAreNotEvicted func TestCriticalPodsAreNotEvicted(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.PodPriority): true}) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats podsToMake := []podToMake{ - {name: "critical", priority: defaultPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, + {name: "critical", priority: defaultPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "800Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -1209,7 +1216,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { } // Enable critical pod annotation feature gate - utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True") + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.ExperimentalCriticalPodAnnotation): true}) // induce soft threshold fakeClock.Step(1 * time.Minute) summaryProvider.result = summaryStatsMaker("1500Mi", podStats) @@ -1254,7 +1261,7 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { } // Disable critical pod annotation feature gate - utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False") + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.ExperimentalCriticalPodAnnotation): false}) // induce memory pressure! fakeClock.Step(1 * time.Minute) @@ -1274,16 +1281,16 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { // TestAllocatableMemoryPressure func TestAllocatableMemoryPressure(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.SetFromMap(map[string]bool{string(features.PodPriority): true}) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats constantCapacity := "4Gi" podsToMake := []podToMake{ - {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "900Mi"}, - {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "50Mi"}, - {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "400Mi"}, - {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "400Mi"}, - {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, + {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"}, + {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"}, + {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} @@ -1332,8 +1339,8 @@ func TestAllocatableMemoryPressure(t *testing.T) { } // create a best effort pod to test admission - bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0Gi") - burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") + bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi") + burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -1353,7 +1360,7 @@ func TestAllocatableMemoryPressure(t *testing.T) { // induce memory pressure! fakeClock.Step(1 * time.Minute) - pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi"), "1Gi") + pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi", ""), newResourceList("100m", "1Gi", ""), "1Gi") podStats[pod] = podStat summaryProvider.result = summaryStatsMaker(constantCapacity, podStats) manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index f364a9ca130..f8be310b34c 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -390,20 +390,12 @@ func podDiskUsage(podStats statsapi.PodStats, pod *v1.Pod, statsToMeasure []fsSt // podMemoryUsage aggregates pod memory usage. func podMemoryUsage(podStats statsapi.PodStats) (v1.ResourceList, error) { - disk := resource.Quantity{Format: resource.BinarySI} memory := resource.Quantity{Format: resource.BinarySI} for _, container := range podStats.Containers { - // disk usage (if known) - for _, fsStats := range []*statsapi.FsStats{container.Rootfs, container.Logs} { - disk.Add(*diskUsage(fsStats)) - } // memory usage (if known) memory.Add(*memoryUsage(container.Memory)) } - return v1.ResourceList{ - v1.ResourceMemory: memory, - resourceDisk: disk, - }, nil + return v1.ResourceList{v1.ResourceMemory: memory}, nil } // localEphemeralVolumeNames returns the set of ephemeral volumes for the pod that are local @@ -543,89 +535,84 @@ func priority(p1, p2 *v1.Pod) int { // exceedMemoryRequests compares whether or not pods' memory usage exceeds their requests func exceedMemoryRequests(stats statsFunc) cmpFunc { return func(p1, p2 *v1.Pod) int { - p1Stats, found := stats(p1) - // if we have no usage stats for p1, we want p2 first - if !found { - return -1 + p1Stats, p1Found := stats(p1) + p2Stats, p2Found := stats(p2) + if !p1Found || !p2Found { + // prioritize evicting the pod for which no stats were found + return cmpBool(!p1Found, !p2Found) } - // if we have no usage stats for p2, but p1 has usage, we want p1 first. - p2Stats, found := stats(p2) - if !found { - return 1 - } - // if we cant get usage for p1 measured, we want p2 first - p1Usage, err := podMemoryUsage(p1Stats) - if err != nil { - return -1 - } - // if we cant get usage for p2 measured, we want p1 first - p2Usage, err := podMemoryUsage(p2Stats) - if err != nil { - return 1 + + p1Usage, p1Err := podMemoryUsage(p1Stats) + p2Usage, p2Err := podMemoryUsage(p2Stats) + if p1Err != nil || p2Err != nil { + // prioritize evicting the pod which had an error getting stats + return cmpBool(p1Err != nil, p2Err != nil) } + p1Memory := p1Usage[v1.ResourceMemory] p2Memory := p2Usage[v1.ResourceMemory] - p1ExceedsRequests := p1Memory.Cmp(podMemoryRequest(p1)) == 1 - p2ExceedsRequests := p2Memory.Cmp(podMemoryRequest(p2)) == 1 - if p1ExceedsRequests == p2ExceedsRequests { - return 0 - } - if p1ExceedsRequests && !p2ExceedsRequests { - // if p1 exceeds its requests, but p2 does not, then we want p2 first - return -1 - } - return 1 + p1ExceedsRequests := p1Memory.Cmp(podRequest(p1, v1.ResourceMemory)) == 1 + p2ExceedsRequests := p2Memory.Cmp(podRequest(p2, v1.ResourceMemory)) == 1 + // prioritize evicting the pod which exceeds its requests + return cmpBool(p1ExceedsRequests, p2ExceedsRequests) } } // memory compares pods by largest consumer of memory relative to request. func memory(stats statsFunc) cmpFunc { return func(p1, p2 *v1.Pod) int { - p1Stats, found := stats(p1) - // if we have no usage stats for p1, we want p2 first - if !found { - return -1 + p1Stats, p1Found := stats(p1) + p2Stats, p2Found := stats(p2) + if !p1Found || !p2Found { + // prioritize evicting the pod for which no stats were found + return cmpBool(!p1Found, !p2Found) } - // if we have no usage stats for p2, but p1 has usage, we want p1 first. - p2Stats, found := stats(p2) - if !found { - return 1 - } - // if we cant get usage for p1 measured, we want p2 first - p1Usage, err := podMemoryUsage(p1Stats) - if err != nil { - return -1 - } - // if we cant get usage for p2 measured, we want p1 first - p2Usage, err := podMemoryUsage(p2Stats) - if err != nil { - return 1 + + p1Usage, p1Err := podMemoryUsage(p1Stats) + p2Usage, p2Err := podMemoryUsage(p2Stats) + if p1Err != nil || p2Err != nil { + // prioritize evicting the pod which had an error getting stats + return cmpBool(p1Err != nil, p2Err != nil) } // adjust p1, p2 usage relative to the request (if any) p1Memory := p1Usage[v1.ResourceMemory] - p1Request := podMemoryRequest(p1) + p1Request := podRequest(p1, v1.ResourceMemory) p1Memory.Sub(p1Request) p2Memory := p2Usage[v1.ResourceMemory] - p2Request := podMemoryRequest(p2) + p2Request := podRequest(p2, v1.ResourceMemory) p2Memory.Sub(p2Request) - // if p2 is using more than p1, we want p2 first + // prioritize evicting the pod which has the larger consumption of memory return p2Memory.Cmp(p1Memory) } } -// podMemoryRequest returns the total memory request of a pod which is the +// podRequest returns the total resource request of a pod which is the // max(sum of init container requests, sum of container requests) -func podMemoryRequest(pod *v1.Pod) resource.Quantity { +func podRequest(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity { containerValue := resource.Quantity{Format: resource.BinarySI} + if resourceName == resourceDisk && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { + // if the local storage capacity isolation feature gate is disabled, pods request 0 disk + return containerValue + } for i := range pod.Spec.Containers { - containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.Memory()) + switch resourceName { + case v1.ResourceMemory: + containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.Memory()) + case resourceDisk: + containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.StorageEphemeral()) + } } initValue := resource.Quantity{Format: resource.BinarySI} for i := range pod.Spec.InitContainers { - initValue.Add(*pod.Spec.InitContainers[i].Resources.Requests.Memory()) + switch resourceName { + case v1.ResourceMemory: + containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.Memory()) + case resourceDisk: + containerValue.Add(*pod.Spec.Containers[i].Resources.Requests.StorageEphemeral()) + } } if containerValue.Cmp(initValue) > 0 { return containerValue @@ -633,39 +620,71 @@ func podMemoryRequest(pod *v1.Pod) resource.Quantity { return initValue } +// exceedDiskRequests compares whether or not pods' disk usage exceeds their requests +func exceedDiskRequests(stats statsFunc, fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) cmpFunc { + return func(p1, p2 *v1.Pod) int { + p1Stats, p1Found := stats(p1) + p2Stats, p2Found := stats(p2) + if !p1Found || !p2Found { + // prioritize evicting the pod for which no stats were found + return cmpBool(!p1Found, !p2Found) + } + + p1Usage, p1Err := podDiskUsage(p1Stats, p1, fsStatsToMeasure) + p2Usage, p2Err := podDiskUsage(p2Stats, p2, fsStatsToMeasure) + if p1Err != nil || p2Err != nil { + // prioritize evicting the pod which had an error getting stats + return cmpBool(p1Err != nil, p2Err != nil) + } + + p1Disk := p1Usage[diskResource] + p2Disk := p2Usage[diskResource] + p1ExceedsRequests := p1Disk.Cmp(podRequest(p1, diskResource)) == 1 + p2ExceedsRequests := p2Disk.Cmp(podRequest(p2, diskResource)) == 1 + // prioritize evicting the pod which exceeds its requests + return cmpBool(p1ExceedsRequests, p2ExceedsRequests) + } +} + // disk compares pods by largest consumer of disk relative to request for the specified disk resource. func disk(stats statsFunc, fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) cmpFunc { return func(p1, p2 *v1.Pod) int { - p1Stats, found := stats(p1) - // if we have no usage stats for p1, we want p2 first - if !found { - return -1 + p1Stats, p1Found := stats(p1) + p2Stats, p2Found := stats(p2) + if !p1Found || !p2Found { + // prioritize evicting the pod for which no stats were found + return cmpBool(!p1Found, !p2Found) } - // if we have no usage stats for p2, but p1 has usage, we want p1 first. - p2Stats, found := stats(p2) - if !found { - return 1 - } - // if we cant get usage for p1 measured, we want p2 first - p1Usage, err := podDiskUsage(p1Stats, p1, fsStatsToMeasure) - if err != nil { - return -1 - } - // if we cant get usage for p2 measured, we want p1 first - p2Usage, err := podDiskUsage(p2Stats, p2, fsStatsToMeasure) - if err != nil { - return 1 + p1Usage, p1Err := podDiskUsage(p1Stats, p1, fsStatsToMeasure) + p2Usage, p2Err := podDiskUsage(p2Stats, p2, fsStatsToMeasure) + if p1Err != nil || p2Err != nil { + // prioritize evicting the pod which had an error getting stats + return cmpBool(p1Err != nil, p2Err != nil) } - // disk is best effort, so we don't measure relative to a request. - // TODO: add disk as a guaranteed resource + // adjust p1, p2 usage relative to the request (if any) p1Disk := p1Usage[diskResource] p2Disk := p2Usage[diskResource] - // if p2 is using more than p1, we want p2 first + p1Request := podRequest(p1, resourceDisk) + p1Disk.Sub(p1Request) + p2Request := podRequest(p2, resourceDisk) + p2Disk.Sub(p2Request) + // prioritize evicting the pod which has the larger consumption of disk return p2Disk.Cmp(p1Disk) } } +// cmpBool compares booleans, placing true before false +func cmpBool(a, b bool) int { + if a == b { + return 0 + } + if !b { + return -1 + } + return 1 +} + // rankMemoryPressure orders the input pods for eviction in response to memory pressure. // It ranks by whether or not the pod's usage exceeds its requests, then by priority, and // finally by memory usage above requests. @@ -676,7 +695,7 @@ func rankMemoryPressure(pods []*v1.Pod, stats statsFunc) { // rankDiskPressureFunc returns a rankFunc that measures the specified fs stats. func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) rankFunc { return func(pods []*v1.Pod, stats statsFunc) { - orderedBy(priority, disk(stats, fsStatsToMeasure, diskResource)).Sort(pods) + orderedBy(exceedDiskRequests(stats, fsStatsToMeasure, diskResource), priority, disk(stats, fsStatsToMeasure, diskResource)).Sort(pods) } } diff --git a/pkg/kubelet/eviction/helpers_test.go b/pkg/kubelet/eviction/helpers_test.go index 2b2d5f98026..7af5c795ba5 100644 --- a/pkg/kubelet/eviction/helpers_test.go +++ b/pkg/kubelet/eviction/helpers_test.go @@ -409,17 +409,71 @@ func thresholdEqual(a evictionapi.Threshold, b evictionapi.Threshold) bool { compareThresholdValue(a.Value, b.Value) } -// TestOrderedByPriority ensures we order BestEffort < Burstable < Guaranteed +func TestOrderedByExceedsRequestMemory(t *testing.T) { + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) + below := newPod("below-requests", -1, []v1.Container{ + newContainer("below-requests", newResourceList("", "200Mi", ""), newResourceList("", "", "")), + }, nil) + exceeds := newPod("exceeds-requests", 1, []v1.Container{ + newContainer("exceeds-requests", newResourceList("", "100Mi", ""), newResourceList("", "", "")), + }, nil) + stats := map[*v1.Pod]statsapi.PodStats{ + below: newPodMemoryStats(below, resource.MustParse("199Mi")), // -1 relative to request + exceeds: newPodMemoryStats(exceeds, resource.MustParse("101Mi")), // 1 relative to request + } + statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { + result, found := stats[pod] + return result, found + } + pods := []*v1.Pod{below, exceeds} + orderedBy(exceedMemoryRequests(statsFn)).Sort(pods) + + expected := []*v1.Pod{exceeds, below} + for i := range expected { + if pods[i] != expected[i] { + t.Errorf("Expected pod: %s, but got: %s", expected[i].Name, pods[i].Name) + } + } +} + +func TestOrderedByExceedsRequestDisk(t *testing.T) { + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.LocalStorageCapacityIsolation)) + below := newPod("below-requests", -1, []v1.Container{ + newContainer("below-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("200Mi")}, newResourceList("", "", "")), + }, nil) + exceeds := newPod("exceeds-requests", 1, []v1.Container{ + newContainer("exceeds-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("100Mi")}, newResourceList("", "", "")), + }, nil) + stats := map[*v1.Pod]statsapi.PodStats{ + below: newPodDiskStats(below, resource.MustParse("100Mi"), resource.MustParse("99Mi"), resource.MustParse("0Mi")), // -1 relative to request + exceeds: newPodDiskStats(exceeds, resource.MustParse("90Mi"), resource.MustParse("11Mi"), resource.MustParse("0Mi")), // 1 relative to request + } + statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { + result, found := stats[pod] + return result, found + } + pods := []*v1.Pod{below, exceeds} + orderedBy(exceedDiskRequests(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods) + + expected := []*v1.Pod{exceeds, below} + for i := range expected { + if pods[i] != expected[i] { + t.Errorf("Expected pod: %s, but got: %s", expected[i].Name, pods[i].Name) + } + } +} + func TestOrderedByPriority(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) low := newPod("low-priority", -134, []v1.Container{ - newContainer("low-priority", newResourceList("", ""), newResourceList("", "")), + newContainer("low-priority", newResourceList("", "", ""), newResourceList("", "", "")), }, nil) medium := newPod("medium-priority", 1, []v1.Container{ - newContainer("medium-priority", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi")), + newContainer("medium-priority", newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", "")), }, nil) high := newPod("high-priority", 12534, []v1.Container{ - newContainer("high-priority", newResourceList("200m", "200Mi"), newResourceList("200m", "200Mi")), + newContainer("high-priority", newResourceList("200m", "200Mi", ""), newResourceList("200m", "200Mi", "")), }, nil) pods := []*v1.Pod{high, medium, low} @@ -433,17 +487,16 @@ func TestOrderedByPriority(t *testing.T) { } } -// TestOrderedByPriority ensures we order BestEffort < Burstable < Guaranteed func TestOrderedByPriorityDisabled(t *testing.T) { - enablePodPriority(false) + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=false", features.PodPriority)) low := newPod("low-priority", lowPriority, []v1.Container{ - newContainer("low-priority", newResourceList("", ""), newResourceList("", "")), + newContainer("low-priority", newResourceList("", "", ""), newResourceList("", "", "")), }, nil) medium := newPod("medium-priority", defaultPriority, []v1.Container{ - newContainer("medium-priority", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi")), + newContainer("medium-priority", newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", "")), }, nil) high := newPod("high-priority", highPriority, []v1.Container{ - newContainer("high-priority", newResourceList("200m", "200Mi"), newResourceList("200m", "200Mi")), + newContainer("high-priority", newResourceList("200m", "200Mi", ""), newResourceList("200m", "200Mi", "")), }, nil) pods := []*v1.Pod{high, medium, low} @@ -459,74 +512,64 @@ func TestOrderedByPriorityDisabled(t *testing.T) { } func TestOrderedbyDisk(t *testing.T) { - testOrderedByResource(t, resourceDisk, newPodDiskStats) -} - -func TestOrderedbyInodes(t *testing.T) { - testOrderedByResource(t, resourceInodes, newPodInodeStats) -} - -// testOrderedByDisk ensures we order pods by greediest resource consumer -func testOrderedByResource(t *testing.T, orderedByResource v1.ResourceName, - newPodStatsFunc func(pod *v1.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.LocalStorageCapacityIsolation)) pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ - newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), + newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{ - newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")), + newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) pod3 := newPod("burstable-high", defaultPriority, []v1.Container{ - newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + newContainer("burstable-high", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) pod4 := newPod("burstable-low", defaultPriority, []v1.Container{ - newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + newContainer("burstable-low", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{ - newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), + newContainer("guaranteed-high", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{ - newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), + newContainer("guaranteed-low", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) stats := map[*v1.Pod]statsapi.PodStats{ - pod1: newPodStatsFunc(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi - pod2: newPodStatsFunc(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi - pod3: newPodStatsFunc(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi - pod4: newPodStatsFunc(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi - pod5: newPodStatsFunc(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi - pod6: newPodStatsFunc(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi + pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("150Mi")), // 300Mi - 0 = 300Mi + pod2: newPodDiskStats(pod2, resource.MustParse("25Mi"), resource.MustParse("25Mi"), resource.MustParse("50Mi")), // 100Mi - 0 = 100Mi + pod3: newPodDiskStats(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi - 100Mi = 250Mi + pod4: newPodDiskStats(pod4, resource.MustParse("25Mi"), resource.MustParse("35Mi"), resource.MustParse("50Mi")), // 110Mi - 100Mi = 10Mi + pod5: newPodDiskStats(pod5, resource.MustParse("225Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 375Mi - 400Mi = -25Mi + pod6: newPodDiskStats(pod6, resource.MustParse("25Mi"), resource.MustParse("45Mi"), resource.MustParse("50Mi")), // 120Mi - 400Mi = -280Mi } statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { result, found := stats[pod] return result, found } pods := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6} - orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, orderedByResource)).Sort(pods) - expected := []*v1.Pod{pod6, pod5, pod4, pod3, pod2, pod1} + orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods) + expected := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6} for i := range expected { if pods[i] != expected[i] { t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name) @@ -534,58 +577,244 @@ func testOrderedByResource(t *testing.T, orderedByResource v1.ResourceName, } } -func TestOrderedbyPriorityDisk(t *testing.T) { - testOrderedByPriorityResource(t, resourceDisk, newPodDiskStats) -} - -func TestOrderedbyPriorityInodes(t *testing.T) { - testOrderedByPriorityResource(t, resourceInodes, newPodInodeStats) -} - -// testOrderedByPriorityDisk ensures we order pods by qos and then greediest resource consumer -func testOrderedByPriorityResource(t *testing.T, orderedByResource v1.ResourceName, - newPodStatsFunc func(pod *v1.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats) { - enablePodPriority(true) - pod1 := newPod("low-priority-high-usage", lowPriority, []v1.Container{ - newContainer("low-priority-high-usage", newResourceList("", ""), newResourceList("", "")), +// Tests that we correctly ignore disk requests when the local storage feature gate is disabled. +func TestOrderedbyDiskDisableLocalStorage(t *testing.T) { + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=false", features.LocalStorageCapacityIsolation)) + pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ + newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod2 := newPod("low-priority-low-usage", lowPriority, []v1.Container{ - newContainer("low-priority-low-usage", newResourceList("", ""), newResourceList("", "")), + pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{ + newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod3 := newPod("high-priority-high-usage", highPriority, []v1.Container{ - newContainer("high-priority-high-usage", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + pod3 := newPod("burstable-high", defaultPriority, []v1.Container{ + newContainer("burstable-high", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod4 := newPod("high-priority-low-usage", highPriority, []v1.Container{ - newContainer("high-priority-low-usage", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + pod4 := newPod("burstable-low", defaultPriority, []v1.Container{ + newContainer("burstable-low", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{ + newContainer("guaranteed-high", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{ + newContainer("guaranteed-low", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) stats := map[*v1.Pod]statsapi.PodStats{ - pod1: newPodStatsFunc(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("250Mi")), // 400Mi - pod2: newPodStatsFunc(pod2, resource.MustParse("60Mi"), resource.MustParse("30Mi"), resource.MustParse("10Mi")), // 100Mi - pod3: newPodStatsFunc(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi - pod4: newPodStatsFunc(pod4, resource.MustParse("10Mi"), resource.MustParse("40Mi"), resource.MustParse("100Mi")), // 150Mi + pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("150Mi")), // 300Mi + pod2: newPodDiskStats(pod2, resource.MustParse("25Mi"), resource.MustParse("25Mi"), resource.MustParse("50Mi")), // 100Mi + pod3: newPodDiskStats(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi + pod4: newPodDiskStats(pod4, resource.MustParse("25Mi"), resource.MustParse("35Mi"), resource.MustParse("50Mi")), // 110Mi + pod5: newPodDiskStats(pod5, resource.MustParse("225Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 375Mi + pod6: newPodDiskStats(pod6, resource.MustParse("25Mi"), resource.MustParse("45Mi"), resource.MustParse("50Mi")), // 120Mi + } + statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { + result, found := stats[pod] + return result, found + } + pods := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6} + orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceDisk)).Sort(pods) + expected := []*v1.Pod{pod5, pod3, pod1, pod6, pod4, pod2} + for i := range expected { + if pods[i] != expected[i] { + t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name) + } + } +} + +func TestOrderedbyInodes(t *testing.T) { + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.LocalStorageCapacityIsolation)) + low := newPod("low", defaultPriority, []v1.Container{ + newContainer("low", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + medium := newPod("medium", defaultPriority, []v1.Container{ + newContainer("medium", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + high := newPod("high", defaultPriority, []v1.Container{ + newContainer("high", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + stats := map[*v1.Pod]statsapi.PodStats{ + low: newPodInodeStats(low, resource.MustParse("50000"), resource.MustParse("100000"), resource.MustParse("50000")), // 200000 + medium: newPodInodeStats(medium, resource.MustParse("100000"), resource.MustParse("150000"), resource.MustParse("50000")), // 300000 + high: newPodInodeStats(high, resource.MustParse("200000"), resource.MustParse("150000"), resource.MustParse("50000")), // 400000 + } + statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { + result, found := stats[pod] + return result, found + } + pods := []*v1.Pod{low, medium, high} + orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)).Sort(pods) + expected := []*v1.Pod{high, medium, low} + for i := range expected { + if pods[i] != expected[i] { + t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name) + } + } +} + +// TestOrderedByPriorityDisk ensures we order pods by priority and then greediest resource consumer +func TestOrderedByPriorityDisk(t *testing.T) { + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.LocalStorageCapacityIsolation)) + pod1 := newPod("above-requests-low-priority-high-usage", lowPriority, []v1.Container{ + newContainer("above-requests-low-priority-high-usage", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod2 := newPod("above-requests-low-priority-low-usage", lowPriority, []v1.Container{ + newContainer("above-requests-low-priority-low-usage", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod3 := newPod("above-requests-high-priority-high-usage", highPriority, []v1.Container{ + newContainer("above-requests-high-priority-high-usage", newResourceList("", "", "100Mi"), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod4 := newPod("above-requests-high-priority-low-usage", highPriority, []v1.Container{ + newContainer("above-requests-high-priority-low-usage", newResourceList("", "", "100Mi"), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod5 := newPod("below-requests-low-priority-high-usage", lowPriority, []v1.Container{ + newContainer("below-requests-low-priority-high-usage", newResourceList("", "", "1Gi"), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod6 := newPod("below-requests-low-priority-low-usage", lowPriority, []v1.Container{ + newContainer("below-requests-low-priority-low-usage", newResourceList("", "", "1Gi"), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod7 := newPod("below-requests-high-priority-high-usage", highPriority, []v1.Container{ + newContainer("below-requests-high-priority-high-usage", newResourceList("", "", "1Gi"), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod8 := newPod("below-requests-high-priority-low-usage", highPriority, []v1.Container{ + newContainer("below-requests-high-priority-low-usage", newResourceList("", "", "1Gi"), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + stats := map[*v1.Pod]statsapi.PodStats{ + pod1: newPodDiskStats(pod1, resource.MustParse("200Mi"), resource.MustParse("100Mi"), resource.MustParse("200Mi")), // 500 relative to request + pod2: newPodDiskStats(pod2, resource.MustParse("10Mi"), resource.MustParse("10Mi"), resource.MustParse("30Mi")), // 50 relative to request + pod3: newPodDiskStats(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("250Mi")), // 500 relative to request + pod4: newPodDiskStats(pod4, resource.MustParse("90Mi"), resource.MustParse("50Mi"), resource.MustParse("10Mi")), // 50 relative to request + pod5: newPodDiskStats(pod5, resource.MustParse("500Mi"), resource.MustParse("200Mi"), resource.MustParse("100Mi")), // -200 relative to request + pod6: newPodDiskStats(pod6, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // -800 relative to request + pod7: newPodDiskStats(pod7, resource.MustParse("250Mi"), resource.MustParse("500Mi"), resource.MustParse("50Mi")), // -200 relative to request + pod8: newPodDiskStats(pod8, resource.MustParse("100Mi"), resource.MustParse("60Mi"), resource.MustParse("40Mi")), // -800 relative to request + } + statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { + result, found := stats[pod] + return result, found + } + pods := []*v1.Pod{pod8, pod7, pod6, pod5, pod4, pod3, pod2, pod1} + expected := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6, pod7, pod8} + fsStatsToMeasure := []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource} + orderedBy(exceedDiskRequests(statsFn, fsStatsToMeasure, resourceDisk), priority, disk(statsFn, fsStatsToMeasure, resourceDisk)).Sort(pods) + for i := range expected { + if pods[i] != expected[i] { + t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name) + } + } +} + +// TestOrderedByPriorityInodes ensures we order pods by priority and then greediest resource consumer +func TestOrderedByPriorityInodes(t *testing.T) { + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) + pod1 := newPod("low-priority-high-usage", lowPriority, []v1.Container{ + newContainer("low-priority-high-usage", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod2 := newPod("low-priority-low-usage", lowPriority, []v1.Container{ + newContainer("low-priority-low-usage", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod3 := newPod("high-priority-high-usage", highPriority, []v1.Container{ + newContainer("high-priority-high-usage", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + pod4 := newPod("high-priority-low-usage", highPriority, []v1.Container{ + newContainer("high-priority-low-usage", newResourceList("", "", ""), newResourceList("", "", "")), + }, []v1.Volume{ + newVolume("local-volume", v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }), + }) + stats := map[*v1.Pod]statsapi.PodStats{ + pod1: newPodInodeStats(pod1, resource.MustParse("50000"), resource.MustParse("100000"), resource.MustParse("250000")), // 400000 + pod2: newPodInodeStats(pod2, resource.MustParse("60000"), resource.MustParse("30000"), resource.MustParse("10000")), // 100000 + pod3: newPodInodeStats(pod3, resource.MustParse("150000"), resource.MustParse("150000"), resource.MustParse("50000")), // 350000 + pod4: newPodInodeStats(pod4, resource.MustParse("10000"), resource.MustParse("40000"), resource.MustParse("100000")), // 150000 } statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { result, found := stats[pod] return result, found } pods := []*v1.Pod{pod4, pod3, pod2, pod1} - orderedBy(priority, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, orderedByResource)).Sort(pods) + orderedBy(priority, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, resourceInodes)).Sort(pods) expected := []*v1.Pod{pod1, pod2, pod3, pod4} for i := range expected { if pods[i] != expected[i] { @@ -597,22 +826,22 @@ func testOrderedByPriorityResource(t *testing.T, orderedByResource v1.ResourceNa // TestOrderedByMemory ensures we order pods by greediest memory consumer relative to request. func TestOrderedByMemory(t *testing.T) { pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ - newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), + newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")), }, nil) pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{ - newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")), + newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")), }, nil) pod3 := newPod("burstable-high", defaultPriority, []v1.Container{ - newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + newContainer("burstable-high", newResourceList("", "100Mi", ""), newResourceList("", "1Gi", "")), }, nil) pod4 := newPod("burstable-low", defaultPriority, []v1.Container{ - newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + newContainer("burstable-low", newResourceList("", "100Mi", ""), newResourceList("", "1Gi", "")), }, nil) pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{ - newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), + newContainer("guaranteed-high", newResourceList("", "1Gi", ""), newResourceList("", "1Gi", "")), }, nil) pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{ - newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), + newContainer("guaranteed-low", newResourceList("", "1Gi", ""), newResourceList("", "1Gi", "")), }, nil) stats := map[*v1.Pod]statsapi.PodStats{ pod1: newPodMemoryStats(pod1, resource.MustParse("500Mi")), // 500 relative to request @@ -638,30 +867,30 @@ func TestOrderedByMemory(t *testing.T) { // TestOrderedByPriorityMemory ensures we order by priority and then memory consumption relative to request. func TestOrderedByPriorityMemory(t *testing.T) { - enablePodPriority(true) + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=true", features.PodPriority)) pod1 := newPod("above-requests-low-priority-high-usage", lowPriority, []v1.Container{ - newContainer("above-requests-low-priority-high-usage", newResourceList("", ""), newResourceList("", "")), + newContainer("above-requests-low-priority-high-usage", newResourceList("", "", ""), newResourceList("", "", "")), }, nil) pod2 := newPod("above-requests-low-priority-low-usage", lowPriority, []v1.Container{ - newContainer("above-requests-low-priority-low-usage", newResourceList("", ""), newResourceList("", "")), + newContainer("above-requests-low-priority-low-usage", newResourceList("", "", ""), newResourceList("", "", "")), }, nil) pod3 := newPod("above-requests-high-priority-high-usage", highPriority, []v1.Container{ - newContainer("above-requests-high-priority-high-usage", newResourceList("", "100Mi"), newResourceList("", "")), + newContainer("above-requests-high-priority-high-usage", newResourceList("", "100Mi", ""), newResourceList("", "", "")), }, nil) pod4 := newPod("above-requests-high-priority-low-usage", highPriority, []v1.Container{ - newContainer("above-requests-high-priority-low-usage", newResourceList("", "100Mi"), newResourceList("", "")), + newContainer("above-requests-high-priority-low-usage", newResourceList("", "100Mi", ""), newResourceList("", "", "")), }, nil) pod5 := newPod("below-requests-low-priority-high-usage", lowPriority, []v1.Container{ - newContainer("below-requests-low-priority-high-usage", newResourceList("", "1Gi"), newResourceList("", "")), + newContainer("below-requests-low-priority-high-usage", newResourceList("", "1Gi", ""), newResourceList("", "", "")), }, nil) pod6 := newPod("below-requests-low-priority-low-usage", lowPriority, []v1.Container{ - newContainer("below-requests-low-priority-low-usage", newResourceList("", "1Gi"), newResourceList("", "")), + newContainer("below-requests-low-priority-low-usage", newResourceList("", "1Gi", ""), newResourceList("", "", "")), }, nil) pod7 := newPod("below-requests-high-priority-high-usage", highPriority, []v1.Container{ - newContainer("below-requests-high-priority-high-usage", newResourceList("", "1Gi"), newResourceList("", "")), + newContainer("below-requests-high-priority-high-usage", newResourceList("", "1Gi", ""), newResourceList("", "", "")), }, nil) pod8 := newPod("below-requests-high-priority-low-usage", highPriority, []v1.Container{ - newContainer("below-requests-high-priority-low-usage", newResourceList("", "1Gi"), newResourceList("", "")), + newContainer("below-requests-high-priority-low-usage", newResourceList("", "1Gi", ""), newResourceList("", "", "")), }, nil) stats := map[*v1.Pod]statsapi.PodStats{ pod1: newPodMemoryStats(pod1, resource.MustParse("500Mi")), // 500 relative to request @@ -678,7 +907,6 @@ func TestOrderedByPriorityMemory(t *testing.T) { return result, found } pods := []*v1.Pod{pod8, pod7, pod6, pod5, pod4, pod3, pod2, pod1} - // pods := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6, pod7, pod8} expected := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6, pod7, pod8} orderedBy(exceedMemoryRequests(statsFn), priority, memory(statsFn)).Sort(pods) for i := range expected { @@ -1589,7 +1817,7 @@ func newPodMemoryStats(pod *v1.Pod, workingSet resource.Quantity) statsapi.PodSt return result } -func newResourceList(cpu, memory string) v1.ResourceList { +func newResourceList(cpu, memory, disk string) v1.ResourceList { res := v1.ResourceList{} if cpu != "" { res[v1.ResourceCPU] = resource.MustParse(cpu) @@ -1597,6 +1825,9 @@ func newResourceList(cpu, memory string) v1.ResourceList { if memory != "" { res[v1.ResourceMemory] = resource.MustParse(memory) } + if disk != "" { + res[v1.ResourceEphemeralStorage] = resource.MustParse(disk) + } return res } @@ -1681,7 +1912,3 @@ func (s1 thresholdList) Equal(s2 thresholdList) bool { } return true } - -func enablePodPriority(enabled bool) { - utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=%t", features.PodPriority, enabled)) -} diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 3e23280081d..d8760db6b45 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -50,6 +50,7 @@ const ( pressureDelay = 20 * time.Second testContextFmt = "when we run containers that should cause %s" noPressure = v1.NodeConditionType("NoPressure") + lotsOfDisk = 10240 // 10 Gb in Mb ) // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods. @@ -127,16 +128,16 @@ var _ = framework.KubeDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive expectedNodeCondition := v1.NodeDiskPressure Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { - diskConsumed := uint64(100000000) // At least 100 Mb for pods to consume + diskConsumed := resource.MustParse("100Mi") summary := eventuallyGetSummary() availableBytes := *(summary.Node.Fs.AvailableBytes) - initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-diskConsumed)} + initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))} initialConfig.EvictionMinimumReclaim = map[string]string{} }) runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{ { evictionPriority: 1, - pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}), + pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}), }, { evictionPriority: 0, @@ -155,10 +156,13 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup expectedNodeCondition := v1.NodeDiskPressure Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { - diskConsumed := uint64(100000000) // At least 100 Mb for pods to consume + diskConsumed := resource.MustParse("100Mi") summary := eventuallyGetSummary() availableBytes := *(summary.Node.Fs.AvailableBytes) - initialConfig.EvictionSoft = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-diskConsumed)} + if availableBytes <= uint64(diskConsumed.Value()) { + framework.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run") + } + initialConfig.EvictionSoft = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))} initialConfig.EvictionSoftGracePeriod = map[string]string{"nodefs.available": "1m"} // Defer to the pod default grace period initialConfig.EvictionMaxPodGracePeriod = 30 @@ -169,7 +173,7 @@ var _ = framework.KubeDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disrup runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{ { evictionPriority: 1, - pod: diskConsumingPod("container-disk-hog", 10000, nil, v1.ResourceRequirements{}), + pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}), }, { evictionPriority: 0, @@ -189,34 +193,35 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se initialConfig.EvictionHard = map[string]string{} }) sizeLimit := resource.MustParse("100Mi") - used := int64(200) // Consume 200 Mb + useOverLimit := 101 /* Mb */ + useUnderLimit := 99 /* Mb */ containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit} runEvictionTest(f, evictionTestTimeout, noPressure, logDiskMetrics, []podEvictSpec{ { evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation - pod: diskConsumingPod("emptydir-disk-sizelimit", used, &v1.VolumeSource{ + pod: diskConsumingPod("emptydir-disk-sizelimit", useOverLimit, &v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit}, }, v1.ResourceRequirements{}), }, { evictionPriority: 1, // This pod should be evicted because of memory emptyDir usage violation - pod: diskConsumingPod("emptydir-memory-sizelimit", used, &v1.VolumeSource{ + pod: diskConsumingPod("emptydir-memory-sizelimit", useOverLimit, &v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{Medium: "Memory", SizeLimit: &sizeLimit}, }, v1.ResourceRequirements{}), }, { evictionPriority: 1, // This pod should cross the container limit by writing to its writable layer. - pod: diskConsumingPod("container-disk-limit", used, nil, v1.ResourceRequirements{Limits: containerLimit}), + pod: diskConsumingPod("container-disk-limit", useOverLimit, nil, v1.ResourceRequirements{Limits: containerLimit}), }, { evictionPriority: 1, // This pod should hit the container limit by writing to an emptydir - pod: diskConsumingPod("container-emptydir-disk-limit", used, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}, + pod: diskConsumingPod("container-emptydir-disk-limit", useOverLimit, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}, v1.ResourceRequirements{Limits: containerLimit}), }, { evictionPriority: 0, // This pod should not be evicted because it uses less than its limit - pod: diskConsumingPod("emptydir-disk-below-sizelimit", int64(50), &v1.VolumeSource{ + pod: diskConsumingPod("emptydir-disk-below-sizelimit", useUnderLimit, &v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{SizeLimit: &sizeLimit}, }, v1.ResourceRequirements{}), }, @@ -224,11 +229,11 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se }) }) -// PriorityEvictionOrdering tests that the node responds to node memory pressure by evicting pods. +// PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods. // This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before // the higher priority pod. -var _ = framework.KubeDescribe("PriorityEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() { - f := framework.NewDefaultFramework("priority-eviction-ordering-test") +var _ = framework.KubeDescribe("PriorityMemoryEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() { + f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test") expectedNodeCondition := v1.NodeMemoryPressure pressureTimeout := 10 * time.Minute Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { @@ -237,6 +242,9 @@ var _ = framework.KubeDescribe("PriorityEvictionOrdering [Slow] [Serial] [Disrup memoryConsumed := resource.MustParse("600Mi") summary := eventuallyGetSummary() availableBytes := *(summary.Node.Memory.AvailableBytes) + if availableBytes <= uint64(memoryConsumed.Value()) { + framework.Skipf("Too little memory free on the host for the PriorityMemoryEvictionOrdering test to run") + } initialConfig.EvictionHard = map[string]string{"memory.available": fmt.Sprintf("%d", availableBytes-uint64(memoryConsumed.Value()))} initialConfig.EvictionMinimumReclaim = map[string]string{} }) @@ -267,6 +275,53 @@ var _ = framework.KubeDescribe("PriorityEvictionOrdering [Slow] [Serial] [Disrup }) }) +// PriorityLocalStorageEvictionOrdering tests that the node responds to node disk pressure by evicting pods. +// This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before +// the higher priority pod. +var _ = framework.KubeDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() { + f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test") + expectedNodeCondition := v1.NodeDiskPressure + pressureTimeout := 10 * time.Minute + Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + initialConfig.FeatureGates[string(features.PodPriority)] = true + initialConfig.FeatureGates[string(features.LocalStorageCapacityIsolation)] = true + diskConsumed := resource.MustParse("350Mi") + summary := eventuallyGetSummary() + availableBytes := *(summary.Node.Fs.AvailableBytes) + if availableBytes <= uint64(diskConsumed.Value()) { + framework.Skipf("Too little disk free on the host for the PriorityLocalStorageEvictionOrdering test to run") + } + initialConfig.EvictionHard = map[string]string{"nodefs.available": fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))} + initialConfig.EvictionMinimumReclaim = map[string]string{} + }) + specs := []podEvictSpec{ + { + evictionPriority: 2, + pod: diskConsumingPod("best-effort-disk", lotsOfDisk, nil, v1.ResourceRequirements{}), + }, + { + evictionPriority: 1, + pod: diskConsumingPod("high-priority-disk", lotsOfDisk, nil, v1.ResourceRequirements{}), + }, + { + evictionPriority: 0, + pod: diskConsumingPod("guaranteed-disk", 299 /* Mb */, nil, v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceEphemeralStorage: resource.MustParse("300Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceEphemeralStorage: resource.MustParse("300Mi"), + }, + }), + }, + } + systemPriority := int32(2147483647) + specs[1].pod.Spec.Priority = &systemPriority + runEvictionTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, specs) + }) +}) + // Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods type podEvictSpec struct { // P0 should never be evicted, P1 shouldn't evict before P2, etc. @@ -571,9 +626,9 @@ func inodeConsumingPod(name string, volumeSource *v1.VolumeSource) *v1.Pod { return podWithCommand(volumeSource, v1.ResourceRequirements{}, name, "i=0; while true; do touch %s${i}.txt; sleep 0.001; i=$((i+=1)); done;") } -func diskConsumingPod(name string, diskConsumedMB int64, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod { - // Each iteration writes 1Mb to the file - return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB/100)+" do dd if=/dev/urandom of=%s${i} bs=100 count=1000000; i=$(($i+1)); done; while true; do sleep 5; done") +func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod { + // Each iteration writes 1 Mb, so do diskConsumedMB iterations. + return podWithCommand(volumeSource, resources, name, fmt.Sprintf("i=0; while [ $i -lt %d ];", diskConsumedMB)+" do dd if=/dev/urandom of=%s${i} bs=1048576 count=1; i=$(($i+1)); done; while true; do sleep 5; done") } // podWithCommand returns a pod with the provided volumeSource and resourceRequirements.