mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #39114 from dchen1107/kube-proxy
Automatic merge from submit-queue (batch tested with PRs 39114, 36004) assign -998 as the oom_score_adj for critical pods (e.g. kube-proxy) I also validated this with a testing cluster: Fresh built cluster, and kill kube-proxy pod, etc. ``` root 2660 2643 0 Dec21 ? 00:00:00 /bin/sh -c kube-proxy --master=https://104.198.79.64 --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.180.0.0/14 --resource-container="" --v=4 1>>/var/log/kube-proxy.log 2>&1 root 2667 2660 0 Dec21 ? 00:03:14 kube-proxy --master=https://104.198.79.64 --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.180.0.0/14 --resource-container= --v=4 # cat /proc/2660/oom_score_adj -998 # cat /proc/2667/oom_score_adj -998 ``` In this pr, I also include a small fix for import cycle issue. The right fix should remove the dependency on qos package from pkg/apis/componentconfig/v1alpha1. But since we plan to cherrypick this pr to both 1.5 and 1.4 (possible), I want touch the source as little as possible. Partial fix: #38322
This commit is contained in:
commit
66152b9066
@ -28,9 +28,9 @@ go_library(
|
||||
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/pod:go_default_library",
|
||||
"//pkg/kubelet/qos:go_default_library",
|
||||
"//pkg/kubelet/server/stats:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/kubelet/util/format:go_default_library",
|
||||
"//pkg/quota/evaluator/core:go_default_library",
|
||||
"//pkg/util/clock:go_default_library",
|
||||
|
@ -28,9 +28,9 @@ import (
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||
"k8s.io/kubernetes/pkg/util/clock"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
@ -109,7 +109,7 @@ func (m *managerImpl) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAd
|
||||
// the node has memory pressure, admit if not best-effort
|
||||
if hasNodeCondition(m.nodeConditions, v1.NodeMemoryPressure) {
|
||||
notBestEffort := qos.BestEffort != qos.GetPodQOS(attrs.Pod)
|
||||
if notBestEffort || kubepod.IsCriticalPod(attrs.Pod) {
|
||||
if notBestEffort || kubetypes.IsCriticalPod(attrs.Pod) {
|
||||
return lifecycle.PodAdmitResult{Admit: true}
|
||||
}
|
||||
}
|
||||
|
@ -1915,7 +1915,7 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
|
||||
var criticalPods []*v1.Pod
|
||||
var nonCriticalPods []*v1.Pod
|
||||
for _, p := range pods {
|
||||
if kubepod.IsCriticalPod(p) {
|
||||
if kubetypes.IsCriticalPod(p) {
|
||||
criticalPods = append(criticalPods, p)
|
||||
} else {
|
||||
nonCriticalPods = append(nonCriticalPods, p)
|
||||
|
@ -21,7 +21,6 @@ import (
|
||||
|
||||
"k8s.io/kubernetes/pkg/api/v1"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/types"
|
||||
)
|
||||
|
||||
@ -307,11 +306,3 @@ func (pm *basicManager) GetPodByMirrorPod(mirrorPod *v1.Pod) (*v1.Pod, bool) {
|
||||
pod, ok := pm.podByFullName[kubecontainer.GetPodFullName(mirrorPod)]
|
||||
return pod, ok
|
||||
}
|
||||
|
||||
// IsCriticalPod returns true if the pod bears the critical pod annotation
|
||||
// key. Both the rescheduler and the kubelet use this key to make admission
|
||||
// and scheduling decisions.
|
||||
func IsCriticalPod(pod *v1.Pod) bool {
|
||||
_, ok := pod.Annotations[kubetypes.CriticalPodAnnotationKey]
|
||||
return ok
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ go_library(
|
||||
"//pkg/api:go_default_library",
|
||||
"//pkg/api/resource:go_default_library",
|
||||
"//pkg/api/v1:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/util/sets:go_default_library",
|
||||
],
|
||||
)
|
||||
@ -36,5 +37,6 @@ go_test(
|
||||
deps = [
|
||||
"//pkg/api/resource:go_default_library",
|
||||
"//pkg/api/v1:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
],
|
||||
)
|
||||
|
@ -16,14 +16,20 @@ limitations under the License.
|
||||
|
||||
package qos
|
||||
|
||||
import "k8s.io/kubernetes/pkg/api/v1"
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/api/v1"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
)
|
||||
|
||||
const (
|
||||
// PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make
|
||||
// sense to set sandbox level oom score, e.g. a sandbox could only be a namespace
|
||||
// without a process.
|
||||
// TODO: Handle infra container oom score adj in a runtime agnostic way.
|
||||
// TODO: Should handle critical pod oom score adj with a proper preemption priority.
|
||||
// This is the workaround for https://github.com/kubernetes/kubernetes/issues/38322.
|
||||
PodInfraOOMAdj int = -998
|
||||
CriticalPodOOMAdj int = -998
|
||||
KubeletOOMScoreAdj int = -999
|
||||
DockerOOMScoreAdj int = -999
|
||||
KubeProxyOOMScoreAdj int = -999
|
||||
@ -38,6 +44,10 @@ const (
|
||||
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
|
||||
// See https://lwn.net/Articles/391222/ for more information.
|
||||
func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int {
|
||||
if kubetypes.IsCriticalPod(pod) {
|
||||
return CriticalPodOOMAdj
|
||||
}
|
||||
|
||||
switch GetPodQOS(pod) {
|
||||
case Guaranteed:
|
||||
// Guaranteed containers should be the last to get killed.
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
"k8s.io/kubernetes/pkg/api/v1"
|
||||
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -135,6 +136,25 @@ var (
|
||||
},
|
||||
},
|
||||
}
|
||||
criticalPodWithNoLimit = v1.Pod{
|
||||
ObjectMeta: v1.ObjectMeta{
|
||||
Annotations: map[string]string{
|
||||
kubetypes.CriticalPodAnnotationKey: "",
|
||||
},
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Resources: v1.ResourceRequirements{
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceName(v1.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount - 1)),
|
||||
v1.ResourceName(v1.ResourceCPU): resource.MustParse("5m"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
type oomTest struct {
|
||||
@ -188,6 +208,12 @@ func TestGetContainerOOMScoreAdjust(t *testing.T) {
|
||||
lowOOMScoreAdj: 2,
|
||||
highOOMScoreAdj: 2,
|
||||
},
|
||||
{
|
||||
pod: &criticalPodWithNoLimit,
|
||||
memoryCapacity: standardMemoryAmount,
|
||||
lowOOMScoreAdj: -998,
|
||||
highOOMScoreAdj: -998,
|
||||
},
|
||||
}
|
||||
for _, test := range oomTests {
|
||||
oomScoreAdj := GetContainerOOMScoreAdjust(test.pod, &test.pod.Spec.Containers[0], test.memoryCapacity)
|
||||
|
@ -140,3 +140,11 @@ func (sp SyncPodType) String() string {
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// IsCriticalPod returns true if the pod bears the critical pod annotation
|
||||
// key. Both the rescheduler and the kubelet use this key to make admission
|
||||
// and scheduling decisions.
|
||||
func IsCriticalPod(pod *v1.Pod) bool {
|
||||
_, ok := pod.Annotations[CriticalPodAnnotationKey]
|
||||
return ok
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user