Kubelet admits critical pods even under memory pressure

This commit is contained in:
bprashanth
2016-12-15 12:25:07 -08:00
parent 32946c5bd0
commit afd676d94c
5 changed files with 36 additions and 11 deletions

View File

@@ -28,6 +28,7 @@ go_library(
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
@@ -55,6 +56,7 @@ go_test(
"//pkg/client/record:go_default_library",
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/quota:go_default_library",
"//pkg/types:go_default_library",
"//pkg/util/clock:go_default_library",

View File

@@ -28,6 +28,7 @@ import (
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
"k8s.io/kubernetes/pkg/kubelet/util/format"
@@ -108,7 +109,7 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
// the node has memory pressure, admit if not best-effort
if hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) {
notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod)
if notBestEffort {
if notBestEffort || kubepod.IsCriticalPod(attrs.Pod) {
return lifecycle.PodAdmitResult{Admit: true}
}
}

View File

@@ -25,6 +25,7 @@ import (
"k8s.io/kubernetes/pkg/client/record"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/clock"
)
@@ -210,6 +211,8 @@ func TestMemoryPressure(t *testing.T) {
// create a best effort pod to test admission
bestEffortPodToAdmit, _ := podMaker("best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi")
burstablePodToAdmit, _ := podMaker("burst-admit", newResourceList("100m", "100Mi"), newResourceList("200m", "200Mi"), "0Gi")
criticalBestEffortPodToAdmit, _ := podMaker("critical-best-admit", newResourceList("", ""), newResourceList("", ""), "0Gi")
criticalBestEffortPodToAdmit.ObjectMeta.Annotations = map[string]string{kubetypes.CriticalPodAnnotationKey: ""}
// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc)
@@ -220,8 +223,8 @@ func TestMemoryPressure(t *testing.T) {
}
// try to admit our pods (they should succeed)
expected := []bool{true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
expected := []bool{true, true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
@@ -296,9 +299,10 @@ func TestMemoryPressure(t *testing.T) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
}
// the best-effort pod should not admit, burstable should
expected = []bool{false, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
// the best-effort pod without critical annotation should not admit,
// burstable and critical pods should
expected = []bool{false, true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
@@ -320,9 +324,9 @@ func TestMemoryPressure(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
}
// the best-effort pod should not admit, burstable should
expected = []bool{false, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
// the best-effort pod should not admit, burstable and critical pods should
expected = []bool{false, true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
@@ -345,8 +349,8 @@ func TestMemoryPressure(t *testing.T) {
}
// all pods should admit now
expected = []bool{true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
expected = []bool{true, true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit, criticalBestEffortPodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}

View File

@@ -21,6 +21,7 @@ import (
"k8s.io/kubernetes/pkg/api/v1"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/types"
)
@@ -306,3 +307,11 @@ func (pm *basicManager) GetPodByMirrorPod(mirrorPod *v1.Pod) (*v1.Pod, bool) {
pod, ok := pm.podByFullName[kubecontainer.GetPodFullName(mirrorPod)]
return pod, ok
}
// IsCriticalPod returns true if the pod bears the critical pod annotation
// key. Both the rescheduler and the kubelet use this key to make admission
// and scheduling decisions.
func IsCriticalPod(pod *v1.Pod) bool {
_, ok := pod.Annotations[kubetypes.CriticalPodAnnotationKey]
return ok
}

View File

@@ -27,6 +27,15 @@ const ConfigMirrorAnnotationKey = "kubernetes.io/config.mirror"
const ConfigFirstSeenAnnotationKey = "kubernetes.io/config.seen"
const ConfigHashAnnotationKey = "kubernetes.io/config.hash"
// This key needs to sync with the key used by the rescheduler, which currently
// lives in contrib. Its presence indicates 2 things, as far as the kubelet is
// concerned:
// 1. Resource related admission checks will prioritize the admission of
// pods bearing the key, over pods without the key, regardless of QoS.
// 2. The OOM score of pods bearing the key will be <= pods without
// the key (where the <= part is determied by QoS).
const CriticalPodAnnotationKey = "scheduler.alpha.kubernetes.io/critical-pod"
// PodOperation defines what changes will be made on a pod configuration.
type PodOperation int