diff --git a/pkg/kubelet/eviction/BUILD b/pkg/kubelet/eviction/BUILD index f3d0ae93d79..91e184c1952 100644 --- a/pkg/kubelet/eviction/BUILD +++ b/pkg/kubelet/eviction/BUILD @@ -15,6 +15,7 @@ go_test( library = ":go_default_library", deps = [ "//pkg/api:go_default_library", + "//pkg/features:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/eviction/api:go_default_library", @@ -58,6 +59,7 @@ go_library( "//pkg/kubelet/server/stats:go_default_library", "//pkg/kubelet/types:go_default_library", "//pkg/kubelet/util/format:go_default_library", + "//plugin/pkg/scheduler/util:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library", diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index ab9b0d2a20f..d8474814283 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -33,6 +33,12 @@ import ( kubelettypes "k8s.io/kubernetes/pkg/kubelet/types" ) +const ( + lowPriority = -1 + defaultPriority = 0 + highPriority = 1 +) + // mockPodKiller is used to testing which pod is killed type mockPodKiller struct { pod *v1.Pod @@ -99,16 +105,16 @@ func (m *mockDiskGC) DeleteAllUnusedContainers() error { return m.err } -func makePodWithMemoryStats(name string, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) { - pod := newPod(name, []v1.Container{ +func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) { + pod := newPod(name, priority, []v1.Container{ newContainer(name, requests, limits), }, nil) podStats := newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet)) return pod, podStats } -func makePodWithDiskStats(name string, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*v1.Pod, statsapi.PodStats) { - pod := newPod(name, []v1.Container{ +func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*v1.Pod, statsapi.PodStats) { + pod := newPod(name, priority, []v1.Container{ newContainer(name, requests, limits), }, nil) podStats := newPodDiskStats(pod, parseQuantity(rootFsUsed), parseQuantity(logsUsed), parseQuantity(perLocalVolumeUsed)) @@ -164,6 +170,7 @@ func makeDiskStats(rootFsAvailableBytes, imageFsAvailableBytes string, podStats type podToMake struct { name string + priority int32 requests v1.ResourceList limits v1.ResourceList memoryWorkingSet string @@ -177,24 +184,24 @@ type podToMake struct { // TestMemoryPressure func TestMemoryPressure(t *testing.T) { + enablePodPriority(true) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "200Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "800Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "300Mi"}, - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "500Mi"}, + {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "900Mi"}, + {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "50Mi"}, + {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "400Mi"}, + {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[4] activePodsFunc := func() []*v1.Pod { return pods } @@ -241,8 +248,8 @@ func TestMemoryPressure(t *testing.T) { } // create a best effort pod to test admission - bestEffortPodToAdmit, _ := podMaker("best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi") - burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") + bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0Gi") + burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -395,24 +402,24 @@ func parseQuantity(value string) resource.Quantity { } func TestDiskPressureNodeFs(t *testing.T) { + enablePodPriority(true) podMaker := makePodWithDiskStats summaryStatsMaker := makeDiskStats podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "200Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "300Mi"}, - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "500Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "400Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[0] activePodsFunc := func() []*v1.Pod { return pods } @@ -460,7 +467,7 @@ func TestDiskPressureNodeFs(t *testing.T) { } // create a best effort pod to test admission - podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", ""), "0Gi", "0Gi", "0Gi") + podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0Gi", "0Gi", "0Gi") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -594,24 +601,24 @@ func TestDiskPressureNodeFs(t *testing.T) { // TestMinReclaim verifies that min-reclaim works as desired. func TestMinReclaim(t *testing.T) { + enablePodPriority(true) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "200Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "800Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "300Mi"}, - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "500Mi"}, + {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "900Mi"}, + {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "50Mi"}, + {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "400Mi"}, + {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[4] activePodsFunc := func() []*v1.Pod { return pods } @@ -734,24 +741,24 @@ func TestMinReclaim(t *testing.T) { } func TestNodeReclaimFuncs(t *testing.T) { + enablePodPriority(true) podMaker := makePodWithDiskStats summaryStatsMaker := makeDiskStats podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "200Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "300Mi"}, - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "500Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsUsed: "400Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), perLocalVolumeUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[0] activePodsFunc := func() []*v1.Pod { return pods } @@ -908,8 +915,9 @@ func TestNodeReclaimFuncs(t *testing.T) { } func TestInodePressureNodeFsInodes(t *testing.T) { - podMaker := func(name string, requests v1.ResourceList, limits v1.ResourceList, rootInodes, logInodes, volumeInodes string) (*v1.Pod, statsapi.PodStats) { - pod := newPod(name, []v1.Container{ + enablePodPriority(true) + podMaker := func(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootInodes, logInodes, volumeInodes string) (*v1.Pod, statsapi.PodStats) { + pod := newPod(name, priority, []v1.Container{ newContainer(name, requests, limits), }, nil) podStats := newPodInodeStats(pod, parseQuantity(rootInodes), parseQuantity(logInodes), parseQuantity(volumeInodes)) @@ -935,21 +943,20 @@ func TestInodePressureNodeFsInodes(t *testing.T) { return result } podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsInodesUsed: "200Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsInodesUsed: "800Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "800Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "300Mi"}, - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "800Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsInodesUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), rootFsInodesUsed: "400Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsInodesUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsInodesUsed, podToMake.logsFsInodesUsed, podToMake.perLocalVolumeInodesUsed) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsInodesUsed, podToMake.logsFsInodesUsed, podToMake.perLocalVolumeInodesUsed) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[0] activePodsFunc := func() []*v1.Pod { return pods } @@ -997,7 +1004,7 @@ func TestInodePressureNodeFsInodes(t *testing.T) { } // create a best effort pod to test admission - podToAdmit, _ := podMaker("pod-to-admit", newResourceList("", ""), newResourceList("", ""), "0", "0", "0") + podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0", "0", "0") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -1131,15 +1138,16 @@ func TestInodePressureNodeFsInodes(t *testing.T) { // TestCriticalPodsAreNotEvicted func TestCriticalPodsAreNotEvicted(t *testing.T) { + enablePodPriority(true) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats podsToMake := []podToMake{ - {name: "critical", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, + {name: "critical", priority: defaultPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "800Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) pods = append(pods, pod) podStats[pod] = podStat } @@ -1266,25 +1274,25 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) { // TestAllocatableMemoryPressure func TestAllocatableMemoryPressure(t *testing.T) { + enablePodPriority(true) podMaker := makePodWithMemoryStats summaryStatsMaker := makeMemoryStats constantCapacity := "4Gi" podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "200Mi"}, - {name: "guaranteed-high", requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "400Mi"}, - {name: "burstable-low", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "300Mi"}, - {name: "burstable-high", requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "500Mi"}, - {name: "best-effort-low", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, - {name: "best-effort-high", requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "200Mi"}, + {name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), memoryWorkingSet: "900Mi"}, + {name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "50Mi"}, + {name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), memoryWorkingSet: "400Mi"}, + {name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "400Mi"}, + {name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), memoryWorkingSet: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[4] activePodsFunc := func() []*v1.Pod { return pods } @@ -1324,8 +1332,8 @@ func TestAllocatableMemoryPressure(t *testing.T) { } // create a best effort pod to test admission - bestEffortPodToAdmit, _ := podMaker("best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi") - burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") + bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", ""), newResourceList("", ""), "0Gi") + burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -1345,7 +1353,7 @@ func TestAllocatableMemoryPressure(t *testing.T) { // induce memory pressure! fakeClock.Step(1 * time.Minute) - pod, podStat := podMaker("guaranteed-high-2", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi"), "1Gi") + pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi"), "1Gi") podStats[pod] = podStat summaryProvider.result = summaryStatsMaker(constantCapacity, podStats) manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -1432,25 +1440,25 @@ func TestAllocatableMemoryPressure(t *testing.T) { // TestAllocatableNodeFsPressure func TestAllocatableNodeFsPressure(t *testing.T) { utilfeature.DefaultFeatureGate.Set("LocalStorageCapacityIsolation=True") + enablePodPriority(true) podMaker := makePodWithDiskStats summaryStatsMaker := makeDiskStats podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), rootFsUsed: "200Mi"}, - {name: "guaranteed-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "burstable-low", requests: newEphemeralStorageResourceList("300Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("300Mi", "200m", "1Gi"), logsFsUsed: "300Mi"}, - {name: "burstable-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("800Mi", "200m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "best-effort-low", requests: newEphemeralStorageResourceList("300Mi", "", ""), limits: newEphemeralStorageResourceList("300Mi", "", ""), logsFsUsed: "300Mi"}, - {name: "best-effort-high", requests: newEphemeralStorageResourceList("800Mi", "", ""), limits: newEphemeralStorageResourceList("800Mi", "", ""), rootFsUsed: "800Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "2Gi"), rootFsUsed: "1750Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[0] activePodsFunc := func() []*v1.Pod { return pods } @@ -1490,8 +1498,8 @@ func TestAllocatableNodeFsPressure(t *testing.T) { } // create a best effort pod to test admission - bestEffortPodToAdmit, _ := podMaker("best-admit", newEphemeralStorageResourceList("", "", ""), newEphemeralStorageResourceList("", "", ""), "0Gi", "", "") - burstablePodToAdmit, _ := podMaker("burst-admit", newEphemeralStorageResourceList("1Gi", "", ""), newEphemeralStorageResourceList("1Gi", "", ""), "1Gi", "", "") + bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newEphemeralStorageResourceList("", "", ""), newEphemeralStorageResourceList("", "", ""), "0Gi", "", "") + burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newEphemeralStorageResourceList("1Gi", "", ""), newEphemeralStorageResourceList("1Gi", "", ""), "1Gi", "", "") // synchronize manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider) @@ -1511,7 +1519,7 @@ func TestAllocatableNodeFsPressure(t *testing.T) { // induce disk pressure! fakeClock.Step(1 * time.Minute) - pod, podStat := podMaker("guaranteed-high-2", newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "") + pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "") podStats[pod] = podStat pods = append(pods, pod) summaryProvider.result = summaryStatsMaker("6Gi", "6Gi", podStats) @@ -1585,24 +1593,24 @@ func TestAllocatableNodeFsPressure(t *testing.T) { func TestNodeReclaimForAllocatableFuncs(t *testing.T) { utilfeature.DefaultFeatureGate.Set("LocalStorageCapacityIsolation=True") + enablePodPriority(true) podMaker := makePodWithDiskStats summaryStatsMaker := makeDiskStats podsToMake := []podToMake{ - {name: "guaranteed-low", requests: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), rootFsUsed: "200Mi"}, - {name: "guaranteed-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "burstable-low", requests: newEphemeralStorageResourceList("300Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("300Mi", "200m", "1Gi"), logsFsUsed: "300Mi"}, - {name: "burstable-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("800Mi", "200m", "1Gi"), rootFsUsed: "800Mi"}, - {name: "best-effort-low", requests: newEphemeralStorageResourceList("300Mi", "", ""), limits: newEphemeralStorageResourceList("300Mi", "", ""), logsFsUsed: "300Mi"}, - {name: "best-effort-high", requests: newEphemeralStorageResourceList("800Mi", "", ""), limits: newEphemeralStorageResourceList("800Mi", "", ""), rootFsUsed: "800Mi"}, + {name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi"), limits: newResourceList("100m", "1Gi"), rootFsUsed: "900Mi"}, + {name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "1Gi"), logsFsUsed: "50Mi"}, + {name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi"), limits: newResourceList("200m", "2Gi"), rootFsUsed: "1750Mi"}, + {name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "400Mi"}, + {name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", ""), limits: newResourceList("", ""), rootFsUsed: "100Mi"}, } pods := []*v1.Pod{} podStats := map[*v1.Pod]statsapi.PodStats{} for _, podToMake := range podsToMake { - pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) + pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed) pods = append(pods, pod) podStats[pod] = podStat } - podToEvict := pods[5] + podToEvict := pods[0] activePodsFunc := func() []*v1.Pod { return pods } @@ -1653,7 +1661,7 @@ func TestNodeReclaimForAllocatableFuncs(t *testing.T) { // induce hard threshold fakeClock.Step(1 * time.Minute) - pod, podStat := podMaker("guaranteed-high-2", newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "") + pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "") podStats[pod] = podStat pods = append(pods, pod) summaryProvider.result = summaryStatsMaker("6Gi", "6Gi", podStats) diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index 5618e3d0b81..6778ce530db 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -28,12 +28,12 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" - v1qos "k8s.io/kubernetes/pkg/api/v1/helper/qos" "k8s.io/kubernetes/pkg/features" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/kubelet/cm" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" "k8s.io/kubernetes/pkg/kubelet/server/stats" + schedulerutils "k8s.io/kubernetes/plugin/pkg/scheduler/util" ) const ( @@ -588,27 +588,59 @@ func (ms *multiSorter) Less(i, j int) bool { return ms.cmp[k](p1, p2) < 0 } -// qosComparator compares pods by QoS (BestEffort < Burstable < Guaranteed) -func qosComparator(p1, p2 *v1.Pod) int { - qosP1 := v1qos.GetPodQOS(p1) - qosP2 := v1qos.GetPodQOS(p2) - // its a tie - if qosP1 == qosP2 { +// priority compares pods by Priority, if priority is enabled. +func priority(p1, p2 *v1.Pod) int { + if !utilfeature.DefaultFeatureGate.Enabled(features.PodPriority) { + // If priority is not enabled, all pods are equal. return 0 } - // if p1 is best effort, we know p2 is burstable or guaranteed - if qosP1 == v1.PodQOSBestEffort { - return -1 + priority1 := schedulerutils.GetPodPriority(p1) + priority2 := schedulerutils.GetPodPriority(p2) + if priority1 == priority2 { + return 0 } - // we know p1 and p2 are not besteffort, so if p1 is burstable, p2 must be guaranteed - if qosP1 == v1.PodQOSBurstable { - if qosP2 == v1.PodQOSGuaranteed { + if priority1 > priority2 { + return 1 + } + return -1 +} + +// exceedMemoryRequests compares whether or not pods' memory usage exceeds their requests +func exceedMemoryRequests(stats statsFunc) cmpFunc { + return func(p1, p2 *v1.Pod) int { + p1Stats, found := stats(p1) + // if we have no usage stats for p1, we want p2 first + if !found { + return -1 + } + // if we have no usage stats for p2, but p1 has usage, we want p1 first. + p2Stats, found := stats(p2) + if !found { + return 1 + } + // if we cant get usage for p1 measured, we want p2 first + p1Usage, err := podMemoryUsage(p1Stats) + if err != nil { + return -1 + } + // if we cant get usage for p2 measured, we want p1 first + p2Usage, err := podMemoryUsage(p2Stats) + if err != nil { + return 1 + } + p1Memory := p1Usage[v1.ResourceMemory] + p2Memory := p2Usage[v1.ResourceMemory] + p1ExceedsRequests := p1Memory.Cmp(podMemoryRequest(p1)) == 1 + p2ExceedsRequests := p2Memory.Cmp(podMemoryRequest(p2)) == 1 + if p1ExceedsRequests == p2ExceedsRequests { + return 0 + } + if p1ExceedsRequests && !p2ExceedsRequests { + // if p1 exceeds its requests, but p2 does not, then we want p2 first return -1 } return 1 } - // ok, p1 must be guaranteed. - return 1 } // memory compares pods by largest consumer of memory relative to request. @@ -700,14 +732,16 @@ func disk(stats statsFunc, fsStatsToMeasure []fsStatsType, diskResource v1.Resou } // rankMemoryPressure orders the input pods for eviction in response to memory pressure. +// It ranks by whether or not the pod's usage exceeds its requests, then by priority, and +// finally by memory usage above requests. func rankMemoryPressure(pods []*v1.Pod, stats statsFunc) { - orderedBy(qosComparator, memory(stats)).Sort(pods) + orderedBy(exceedMemoryRequests(stats), priority, memory(stats)).Sort(pods) } // rankDiskPressureFunc returns a rankFunc that measures the specified fs stats. func rankDiskPressureFunc(fsStatsToMeasure []fsStatsType, diskResource v1.ResourceName) rankFunc { return func(pods []*v1.Pod, stats statsFunc) { - orderedBy(qosComparator, disk(stats, fsStatsToMeasure, diskResource)).Sort(pods) + orderedBy(priority, disk(stats, fsStatsToMeasure, diskResource)).Sort(pods) } } diff --git a/pkg/kubelet/eviction/helpers_test.go b/pkg/kubelet/eviction/helpers_test.go index eb0c1b1dcb5..448c89a4ba0 100644 --- a/pkg/kubelet/eviction/helpers_test.go +++ b/pkg/kubelet/eviction/helpers_test.go @@ -26,7 +26,9 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/features" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/kubelet/cm" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" @@ -435,22 +437,48 @@ func thresholdEqual(a evictionapi.Threshold, b evictionapi.Threshold) bool { compareThresholdValue(a.Value, b.Value) } -// TestOrderedByQoS ensures we order BestEffort < Burstable < Guaranteed -func TestOrderedByQoS(t *testing.T) { - bestEffort := newPod("best-effort", []v1.Container{ - newContainer("best-effort", newResourceList("", ""), newResourceList("", "")), +// TestOrderedByPriority ensures we order BestEffort < Burstable < Guaranteed +func TestOrderedByPriority(t *testing.T) { + enablePodPriority(true) + low := newPod("low-priority", -134, []v1.Container{ + newContainer("low-priority", newResourceList("", ""), newResourceList("", "")), }, nil) - burstable := newPod("burstable", []v1.Container{ - newContainer("burstable", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi")), + medium := newPod("medium-priority", 1, []v1.Container{ + newContainer("medium-priority", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi")), }, nil) - guaranteed := newPod("guaranteed", []v1.Container{ - newContainer("guaranteed", newResourceList("200m", "200Mi"), newResourceList("200m", "200Mi")), + high := newPod("high-priority", 12534, []v1.Container{ + newContainer("high-priority", newResourceList("200m", "200Mi"), newResourceList("200m", "200Mi")), }, nil) - pods := []*v1.Pod{guaranteed, burstable, bestEffort} - orderedBy(qosComparator).Sort(pods) + pods := []*v1.Pod{high, medium, low} + orderedBy(priority).Sort(pods) - expected := []*v1.Pod{bestEffort, burstable, guaranteed} + expected := []*v1.Pod{low, medium, high} + for i := range expected { + if pods[i] != expected[i] { + t.Errorf("Expected pod: %s, but got: %s", expected[i].Name, pods[i].Name) + } + } +} + +// TestOrderedByPriority ensures we order BestEffort < Burstable < Guaranteed +func TestOrderedByPriorityDisabled(t *testing.T) { + enablePodPriority(false) + low := newPod("low-priority", lowPriority, []v1.Container{ + newContainer("low-priority", newResourceList("", ""), newResourceList("", "")), + }, nil) + medium := newPod("medium-priority", defaultPriority, []v1.Container{ + newContainer("medium-priority", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi")), + }, nil) + high := newPod("high-priority", highPriority, []v1.Container{ + newContainer("high-priority", newResourceList("200m", "200Mi"), newResourceList("200m", "200Mi")), + }, nil) + + pods := []*v1.Pod{high, medium, low} + orderedBy(priority).Sort(pods) + + // orderedBy(priority) should not change the input ordering, since we did not enable the PodPriority feature gate + expected := []*v1.Pod{high, medium, low} for i := range expected { if pods[i] != expected[i] { t.Errorf("Expected pod: %s, but got: %s", expected[i].Name, pods[i].Name) @@ -469,42 +497,43 @@ func TestOrderedbyInodes(t *testing.T) { // testOrderedByDisk ensures we order pods by greediest resource consumer func testOrderedByResource(t *testing.T, orderedByResource v1.ResourceName, newPodStatsFunc func(pod *v1.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats) { - pod1 := newPod("best-effort-high", []v1.Container{ + enablePodPriority(true) + pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod2 := newPod("best-effort-low", []v1.Container{ + pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{ newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod3 := newPod("burstable-high", []v1.Container{ + pod3 := newPod("burstable-high", defaultPriority, []v1.Container{ newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod4 := newPod("burstable-low", []v1.Container{ + pod4 := newPod("burstable-low", defaultPriority, []v1.Container{ newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod5 := newPod("guaranteed-high", []v1.Container{ + pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{ newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod6 := newPod("guaranteed-low", []v1.Container{ + pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{ newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ @@ -533,74 +562,59 @@ func testOrderedByResource(t *testing.T, orderedByResource v1.ResourceName, } } -func TestOrderedbyQoSDisk(t *testing.T) { - testOrderedByQoSResource(t, resourceDisk, newPodDiskStats) +func TestOrderedbyPriorityDisk(t *testing.T) { + testOrderedByPriorityResource(t, resourceDisk, newPodDiskStats) } -func TestOrderedbyQoSInodes(t *testing.T) { - testOrderedByQoSResource(t, resourceInodes, newPodInodeStats) +func TestOrderedbyPriorityInodes(t *testing.T) { + testOrderedByPriorityResource(t, resourceInodes, newPodInodeStats) } -// testOrderedByQoSDisk ensures we order pods by qos and then greediest resource consumer -func testOrderedByQoSResource(t *testing.T, orderedByResource v1.ResourceName, +// testOrderedByPriorityDisk ensures we order pods by qos and then greediest resource consumer +func testOrderedByPriorityResource(t *testing.T, orderedByResource v1.ResourceName, newPodStatsFunc func(pod *v1.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resource.Quantity) statsapi.PodStats) { - pod1 := newPod("best-effort-high", []v1.Container{ - newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), + enablePodPriority(true) + pod1 := newPod("low-priority-high-usage", lowPriority, []v1.Container{ + newContainer("low-priority-high-usage", newResourceList("", ""), newResourceList("", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod2 := newPod("best-effort-low", []v1.Container{ - newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")), + pod2 := newPod("low-priority-low-usage", lowPriority, []v1.Container{ + newContainer("low-priority-low-usage", newResourceList("", ""), newResourceList("", "")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod3 := newPod("burstable-high", []v1.Container{ - newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + pod3 := newPod("high-priority-high-usage", highPriority, []v1.Container{ + newContainer("high-priority-high-usage", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) - pod4 := newPod("burstable-low", []v1.Container{ - newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - pod5 := newPod("guaranteed-high", []v1.Container{ - newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - pod6 := newPod("guaranteed-low", []v1.Container{ - newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), + pod4 := newPod("high-priority-low-usage", highPriority, []v1.Container{ + newContainer("high-priority-low-usage", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), }, []v1.Volume{ newVolume("local-volume", v1.VolumeSource{ EmptyDir: &v1.EmptyDirVolumeSource{}, }), }) stats := map[*v1.Pod]statsapi.PodStats{ - pod1: newPodStatsFunc(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 200Mi - pod2: newPodStatsFunc(pod2, resource.MustParse("100Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 300Mi - pod3: newPodStatsFunc(pod3, resource.MustParse("200Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 400Mi - pod4: newPodStatsFunc(pod4, resource.MustParse("300Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 450Mi - pod5: newPodStatsFunc(pod5, resource.MustParse("400Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 550Mi - pod6: newPodStatsFunc(pod6, resource.MustParse("500Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 650Mi + pod1: newPodStatsFunc(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("250Mi")), // 400Mi + pod2: newPodStatsFunc(pod2, resource.MustParse("60Mi"), resource.MustParse("30Mi"), resource.MustParse("10Mi")), // 100Mi + pod3: newPodStatsFunc(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi + pod4: newPodStatsFunc(pod4, resource.MustParse("10Mi"), resource.MustParse("40Mi"), resource.MustParse("100Mi")), // 150Mi } statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { result, found := stats[pod] return result, found } - pods := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6} - orderedBy(qosComparator, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, orderedByResource)).Sort(pods) - expected := []*v1.Pod{pod2, pod1, pod4, pod3, pod6, pod5} + pods := []*v1.Pod{pod4, pod3, pod2, pod1} + orderedBy(priority, disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, orderedByResource)).Sort(pods) + expected := []*v1.Pod{pod1, pod2, pod3, pod4} for i := range expected { if pods[i] != expected[i] { t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name) @@ -610,22 +624,22 @@ func testOrderedByQoSResource(t *testing.T, orderedByResource v1.ResourceName, // TestOrderedByMemory ensures we order pods by greediest memory consumer relative to request. func TestOrderedByMemory(t *testing.T) { - pod1 := newPod("best-effort-high", []v1.Container{ + pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), }, nil) - pod2 := newPod("best-effort-low", []v1.Container{ + pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{ newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")), }, nil) - pod3 := newPod("burstable-high", []v1.Container{ + pod3 := newPod("burstable-high", defaultPriority, []v1.Container{ newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), }, nil) - pod4 := newPod("burstable-low", []v1.Container{ + pod4 := newPod("burstable-low", defaultPriority, []v1.Container{ newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), }, nil) - pod5 := newPod("guaranteed-high", []v1.Container{ + pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{ newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), }, nil) - pod6 := newPod("guaranteed-low", []v1.Container{ + pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{ newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), }, nil) stats := map[*v1.Pod]statsapi.PodStats{ @@ -650,41 +664,51 @@ func TestOrderedByMemory(t *testing.T) { } } -// TestOrderedByQoSMemory ensures we order by qosComparator and then memory consumption relative to request. -func TestOrderedByQoSMemory(t *testing.T) { - pod1 := newPod("best-effort-high", []v1.Container{ - newContainer("best-effort-high", newResourceList("", ""), newResourceList("", "")), +// TestOrderedByPriorityMemory ensures we order by priority and then memory consumption relative to request. +func TestOrderedByPriorityMemory(t *testing.T) { + enablePodPriority(true) + pod1 := newPod("above-requests-low-priority-high-usage", lowPriority, []v1.Container{ + newContainer("above-requests-low-priority-high-usage", newResourceList("", ""), newResourceList("", "")), }, nil) - pod2 := newPod("best-effort-low", []v1.Container{ - newContainer("best-effort-low", newResourceList("", ""), newResourceList("", "")), + pod2 := newPod("above-requests-low-priority-low-usage", lowPriority, []v1.Container{ + newContainer("above-requests-low-priority-low-usage", newResourceList("", ""), newResourceList("", "")), }, nil) - pod3 := newPod("burstable-high", []v1.Container{ - newContainer("burstable-high", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + pod3 := newPod("above-requests-high-priority-high-usage", highPriority, []v1.Container{ + newContainer("above-requests-high-priority-high-usage", newResourceList("", "100Mi"), newResourceList("", "")), }, nil) - pod4 := newPod("burstable-low", []v1.Container{ - newContainer("burstable-low", newResourceList("100m", "100Mi"), newResourceList("200m", "1Gi")), + pod4 := newPod("above-requests-high-priority-low-usage", highPriority, []v1.Container{ + newContainer("above-requests-high-priority-low-usage", newResourceList("", "100Mi"), newResourceList("", "")), }, nil) - pod5 := newPod("guaranteed-high", []v1.Container{ - newContainer("guaranteed-high", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), + pod5 := newPod("below-requests-low-priority-high-usage", lowPriority, []v1.Container{ + newContainer("below-requests-low-priority-high-usage", newResourceList("", "1Gi"), newResourceList("", "")), }, nil) - pod6 := newPod("guaranteed-low", []v1.Container{ - newContainer("guaranteed-low", newResourceList("100m", "1Gi"), newResourceList("100m", "1Gi")), + pod6 := newPod("below-requests-low-priority-low-usage", lowPriority, []v1.Container{ + newContainer("below-requests-low-priority-low-usage", newResourceList("", "1Gi"), newResourceList("", "")), + }, nil) + pod7 := newPod("below-requests-high-priority-high-usage", highPriority, []v1.Container{ + newContainer("below-requests-high-priority-high-usage", newResourceList("", "1Gi"), newResourceList("", "")), + }, nil) + pod8 := newPod("below-requests-high-priority-low-usage", highPriority, []v1.Container{ + newContainer("below-requests-high-priority-low-usage", newResourceList("", "1Gi"), newResourceList("", "")), }, nil) stats := map[*v1.Pod]statsapi.PodStats{ pod1: newPodMemoryStats(pod1, resource.MustParse("500Mi")), // 500 relative to request pod2: newPodMemoryStats(pod2, resource.MustParse("50Mi")), // 50 relative to request - pod3: newPodMemoryStats(pod3, resource.MustParse("50Mi")), // -50 relative to request - pod4: newPodMemoryStats(pod4, resource.MustParse("300Mi")), // 200 relative to request + pod3: newPodMemoryStats(pod3, resource.MustParse("600Mi")), // 500 relative to request + pod4: newPodMemoryStats(pod4, resource.MustParse("150Mi")), // 50 relative to request pod5: newPodMemoryStats(pod5, resource.MustParse("800Mi")), // -200 relative to request pod6: newPodMemoryStats(pod6, resource.MustParse("200Mi")), // -800 relative to request + pod7: newPodMemoryStats(pod7, resource.MustParse("800Mi")), // -200 relative to request + pod8: newPodMemoryStats(pod8, resource.MustParse("200Mi")), // -800 relative to request } statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { result, found := stats[pod] return result, found } - pods := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6} - expected := []*v1.Pod{pod1, pod2, pod4, pod3, pod5, pod6} - orderedBy(qosComparator, memory(statsFn)).Sort(pods) + pods := []*v1.Pod{pod8, pod7, pod6, pod5, pod4, pod3, pod2, pod1} + // pods := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6, pod7, pod8} + expected := []*v1.Pod{pod1, pod2, pod3, pod4, pod5, pod6, pod7, pod8} + orderedBy(exceedMemoryRequests(statsFn), priority, memory(statsFn)).Sort(pods) for i := range expected { if pods[i] != expected[i] { t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name) @@ -1640,7 +1664,7 @@ func newVolume(name string, volumeSource v1.VolumeSource) v1.Volume { } // newPod uses the name as the uid. Make names unique for testing. -func newPod(name string, containers []v1.Container, volumes []v1.Volume) *v1.Pod { +func newPod(name string, priority int32, containers []v1.Container, volumes []v1.Volume) *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -1649,6 +1673,7 @@ func newPod(name string, containers []v1.Container, volumes []v1.Volume) *v1.Pod Spec: v1.PodSpec{ Containers: containers, Volumes: volumes, + Priority: &priority, }, } } @@ -1684,3 +1709,7 @@ func (s1 thresholdList) Equal(s2 thresholdList) bool { } return true } + +func enablePodPriority(enabled bool) { + utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=%t", features.PodPriority, enabled)) +} diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 211ecd4fec8..fad4579d3c2 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -256,6 +256,49 @@ var _ = framework.KubeDescribe("LocalStorageCapacityIsolationEviction [Slow] [Se }) }) +// PriorityEvictionOrdering tests that the node responds to node memory pressure by evicting pods. +// This test tests that the guaranteed pod is never evicted, and that the lower-priority pod is evicted before +// the higher priority pod. +var _ = framework.KubeDescribe("PriorityEvictionOrdering [Slow] [Serial] [Disruptive] [Flaky]", func() { + f := framework.NewDefaultFramework("priority-eviction-ordering-test") + expectedNodeCondition := v1.NodeMemoryPressure + pressureTimeout := 10 * time.Minute + Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() { + tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) { + initialConfig.FeatureGates[string(features.PodPriority)] = true + memoryConsumed := resource.MustParse("600Mi") + summary := eventuallyGetSummary() + availableBytes := *(summary.Node.Memory.AvailableBytes) + initialConfig.EvictionHard = fmt.Sprintf("memory.available<%d", availableBytes-uint64(memoryConsumed.Value())) + initialConfig.EvictionMinimumReclaim = "" + }) + specs := []podEvictSpec{ + { + evictionPriority: 2, + pod: getMemhogPod("memory-hog-pod", "memory-hog", v1.ResourceRequirements{}), + }, + { + evictionPriority: 1, + pod: getMemhogPod("high-priority-memory-hog-pod", "high-priority-memory-hog", v1.ResourceRequirements{}), + }, + { + evictionPriority: 0, + pod: getMemhogPod("guaranteed-pod", "guaranteed-pod", v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("300Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("300Mi"), + }, + }), + }, + } + systemPriority := int32(2147483647) + specs[1].pod.Spec.Priority = &systemPriority + runEvictionTest(f, pressureTimeout, expectedNodeCondition, logMemoryMetrics, specs) + }) +}) + // Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods type podEvictSpec struct { // P0 should never be evicted, P1 shouldn't evict before P2, etc. diff --git a/test/e2e_node/jenkins/jenkins-flaky.properties b/test/e2e_node/jenkins/jenkins-flaky.properties index 90f877d7db2..824c1309dcf 100644 --- a/test/e2e_node/jenkins/jenkins-flaky.properties +++ b/test/e2e_node/jenkins/jenkins-flaky.properties @@ -4,7 +4,7 @@ GCE_ZONE=us-central1-f GCE_PROJECT=k8s-jkns-ci-node-e2e CLEANUP=true GINKGO_FLAGS='--focus="\[Flaky\]"' -TEST_ARGS='--feature-gates=DynamicKubeletConfig=true,LocalStorageCapacityIsolation=true' +TEST_ARGS='--feature-gates=DynamicKubeletConfig=true,LocalStorageCapacityIsolation=true,PodPriority=true' KUBELET_ARGS='--cgroups-per-qos=true --cgroup-root=/' PARALLELISM=1 TIMEOUT=3h