Merge pull request #39114 from dchen1107/kube-proxy

Automatic merge from submit-queue (batch tested with PRs 39114, 36004)

assign -998 as the oom_score_adj for critical pods (e.g. kube-proxy)

I also validated this with a testing cluster: Fresh built cluster, and kill kube-proxy pod, etc. 

```
root      2660  2643  0 Dec21 ?        00:00:00 /bin/sh -c kube-proxy --master=https://104.198.79.64 --kubeconfig=/var/lib/kube-proxy/kubeconfig  --cluster-cidr=10.180.0.0/14 --resource-container="" --v=4   1>>/var/log/kube-proxy.log 2>&1
root      2667  2660  0 Dec21 ?        00:03:14 kube-proxy --master=https://104.198.79.64 --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.180.0.0/14 --resource-container= --v=4
# cat /proc/2660/oom_score_adj 
-998
# cat /proc/2667/oom_score_adj 
-998
```

In this pr, I also include a small fix for import cycle issue. The right fix should remove the dependency on qos package from pkg/apis/componentconfig/v1alpha1. But since we plan to cherrypick this pr to both 1.5 and 1.4 (possible), I want touch the source as little as possible. 

Partial fix: #38322
This commit is contained in:
Kubernetes Submit Queue 2016-12-21 18:51:28 -08:00 committed by GitHub
commit 66152b9066
8 changed files with 51 additions and 14 deletions

View File

@ -28,9 +28,9 @@ go_library(
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/kubelet/server/stats:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/quota/evaluator/core:go_default_library",
"//pkg/util/clock:go_default_library",

View File

@ -28,9 +28,9 @@ import (
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/util/clock"
"k8s.io/kubernetes/pkg/util/wait"
@ -109,7 +109,7 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
// the node has memory pressure, admit if not best-effort
if hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) {
notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod)
if notBestEffort || kubepod.IsCriticalPod(attrs.Pod) {
if notBestEffort || kubetypes.IsCriticalPod(attrs.Pod) {
return lifecycle.PodAdmitResult{Admit: true}
}
}

View File

@ -1915,7 +1915,7 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
var criticalPods []*v1.Pod
var nonCriticalPods []*v1.Pod
for _, p := range pods {
if kubepod.IsCriticalPod(p) {
if kubetypes.IsCriticalPod(p) {
criticalPods = append(criticalPods, p)
} else {
nonCriticalPods = append(nonCriticalPods, p)

View File

@ -21,7 +21,6 @@ import (
"k8s.io/kubernetes/pkg/api/v1"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/types"
)
@ -307,11 +306,3 @@ func (pm *basicManager) GetPodByMirrorPod(mirrorPod *v1.Pod) (*v1.Pod, bool) {
pod, ok := pm.podByFullName[kubecontainer.GetPodFullName(mirrorPod)]
return pod, ok
}
// IsCriticalPod returns true if the pod bears the critical pod annotation
// key. Both the rescheduler and the kubelet use this key to make admission
// and scheduling decisions.
func IsCriticalPod(pod *v1.Pod) bool {
_, ok := pod.Annotations[kubetypes.CriticalPodAnnotationKey]
return ok
}

View File

@ -21,6 +21,7 @@ go_library(
"//pkg/api:go_default_library",
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/util/sets:go_default_library",
],
)
@ -36,5 +37,6 @@ go_test(
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/types:go_default_library",
],
)

View File

@ -16,14 +16,20 @@ limitations under the License.
package qos
import "k8s.io/kubernetes/pkg/api/v1"
import (
"k8s.io/kubernetes/pkg/api/v1"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
const (
// PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make
// sense to set sandbox level oom score, e.g. a sandbox could only be a namespace
// without a process.
// TODO: Handle infra container oom score adj in a runtime agnostic way.
// TODO: Should handle critical pod oom score adj with a proper preemption priority.
// This is the workaround for https://github.com/kubernetes/kubernetes/issues/38322.
PodInfraOOMAdj int = -998
CriticalPodOOMAdj int = -998
KubeletOOMScoreAdj int = -999
DockerOOMScoreAdj int = -999
KubeProxyOOMScoreAdj int = -999
@ -38,6 +44,10 @@ const (
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
// See https://lwn.net/Articles/391222/ for more information.
func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int {
if kubetypes.IsCriticalPod(pod) {
return CriticalPodOOMAdj
}
switch GetPodQOS(pod) {
case Guaranteed:
// Guaranteed containers should be the last to get killed.

View File

@ -22,6 +22,7 @@ import (
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
const (
@ -135,6 +136,25 @@ var (
},
},
}
criticalPodWithNoLimit = v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: map[string]string{
kubetypes.CriticalPodAnnotationKey: "",
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount - 1)),
v1.ResourceName(v1.ResourceCPU): resource.MustParse("5m"),
},
},
},
},
},
}
)
type oomTest struct {
@ -188,6 +208,12 @@ func TestGetContainerOOMScoreAdjust(t *testing.T) {
lowOOMScoreAdj: 2,
highOOMScoreAdj: 2,
},
{
pod: &criticalPodWithNoLimit,
memoryCapacity: standardMemoryAmount,
lowOOMScoreAdj: -998,
highOOMScoreAdj: -998,
},
}
for _, test := range oomTests {
oomScoreAdj := GetContainerOOMScoreAdjust(test.pod, &test.pod.Spec.Containers[0], test.memoryCapacity)

View File

@ -140,3 +140,11 @@ func (sp SyncPodType) String() string {
return "unknown"
}
}
// IsCriticalPod returns true if the pod bears the critical pod annotation
// key. Both the rescheduler and the kubelet use this key to make admission
// and scheduling decisions.
func IsCriticalPod(pod *v1.Pod) bool {
_, ok := pod.Annotations[CriticalPodAnnotationKey]
return ok
}