mirror of
https://github.com/k3s-io/kubernetes.git
synced 2026-01-21 13:49:13 +00:00
Revert "Revert "[kubelet] Fix oom-score-adj policy in kubelet""
This commit is contained in:
@@ -326,6 +326,7 @@ func (cm *containerManagerImpl) setupNode() error {
|
||||
|
||||
systemContainers := []*systemContainer{}
|
||||
if cm.ContainerRuntime == "docker" {
|
||||
dockerVersion := getDockerVersion(cm.cadvisorInterface)
|
||||
if cm.RuntimeCgroupsName != "" {
|
||||
cont := newSystemCgroups(cm.RuntimeCgroupsName)
|
||||
var capacity = api.ResourceList{}
|
||||
@@ -351,13 +352,17 @@ func (cm *containerManagerImpl) setupNode() error {
|
||||
},
|
||||
},
|
||||
}
|
||||
dockerVersion := getDockerVersion(cm.cadvisorInterface)
|
||||
cont.ensureStateFunc = func(manager *fs.Manager) error {
|
||||
return ensureDockerInContainer(dockerVersion, -900, dockerContainer)
|
||||
return ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer)
|
||||
}
|
||||
systemContainers = append(systemContainers, cont)
|
||||
} else {
|
||||
cm.periodicTasks = append(cm.periodicTasks, func() {
|
||||
glog.V(10).Infof("Adding docker daemon periodic tasks")
|
||||
if err := ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil {
|
||||
glog.Error(err)
|
||||
return
|
||||
}
|
||||
cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
|
||||
if err != nil {
|
||||
glog.Error(err)
|
||||
@@ -401,11 +406,15 @@ func (cm *containerManagerImpl) setupNode() error {
|
||||
},
|
||||
}
|
||||
cont.ensureStateFunc = func(_ *fs.Manager) error {
|
||||
return manager.Apply(os.Getpid())
|
||||
return ensureProcessInContainerWithOOMScore(os.Getpid(), qos.KubeletOOMScoreAdj, &manager)
|
||||
}
|
||||
systemContainers = append(systemContainers, cont)
|
||||
} else {
|
||||
cm.periodicTasks = append(cm.periodicTasks, func() {
|
||||
if err := ensureProcessInContainerWithOOMScore(os.Getpid(), qos.KubeletOOMScoreAdj, nil); err != nil {
|
||||
glog.Error(err)
|
||||
return
|
||||
}
|
||||
cont, err := getContainer(os.Getpid())
|
||||
if err != nil {
|
||||
glog.Errorf("failed to find cgroups of kubelet - %v", err)
|
||||
@@ -516,16 +525,18 @@ func (cm *containerManagerImpl) SystemCgroupsLimit() api.ResourceList {
|
||||
}
|
||||
|
||||
func isProcessRunningInHost(pid int) (bool, error) {
|
||||
// Get init mount namespace. Mount namespace is unique for all containers.
|
||||
initMntNs, err := os.Readlink("/proc/1/ns/mnt")
|
||||
// Get init pid namespace.
|
||||
initPidNs, err := os.Readlink("/proc/1/ns/pid")
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to find mount namespace of init process")
|
||||
return false, fmt.Errorf("failed to find pid namespace of init process")
|
||||
}
|
||||
processMntNs, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/mnt", pid))
|
||||
glog.V(10).Infof("init pid ns is %q", initPidNs)
|
||||
processPidNs, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", pid))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to find mount namespace of process %q", pid)
|
||||
return false, fmt.Errorf("failed to find pid namespace of process %q", pid)
|
||||
}
|
||||
return initMntNs == processMntNs, nil
|
||||
glog.V(10).Infof("Pid %d pid ns is %q", pid, processPidNs)
|
||||
return initPidNs == processPidNs, nil
|
||||
}
|
||||
|
||||
func getPidFromPidFile(pidFile string) (int, error) {
|
||||
@@ -567,7 +578,6 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
|
||||
if dockerVersion.GTE(containerdVersion) {
|
||||
dockerProcs = append(dockerProcs, process{containerdProcessName, containerdPidFile})
|
||||
}
|
||||
|
||||
var errs []error
|
||||
for _, proc := range dockerProcs {
|
||||
pids, err := getPidsForProcess(proc.name, proc.file)
|
||||
@@ -578,7 +588,7 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
|
||||
|
||||
// Move if the pid is not already in the desired container.
|
||||
for _, pid := range pids {
|
||||
if err := ensureProcessInContainer(pid, oomScoreAdj, manager); err != nil {
|
||||
if err := ensureProcessInContainerWithOOMScore(pid, oomScoreAdj, manager); err != nil {
|
||||
errs = append(errs, fmt.Errorf("errors moving %q pid: %v", proc.name, err))
|
||||
}
|
||||
}
|
||||
@@ -586,32 +596,37 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
|
||||
return utilerrors.NewAggregate(errs)
|
||||
}
|
||||
|
||||
func ensureProcessInContainer(pid int, oomScoreAdj int, manager *fs.Manager) error {
|
||||
func ensureProcessInContainerWithOOMScore(pid int, oomScoreAdj int, manager *fs.Manager) error {
|
||||
if runningInHost, err := isProcessRunningInHost(pid); err != nil {
|
||||
// Err on the side of caution. Avoid moving the docker daemon unless we are able to identify its context.
|
||||
return err
|
||||
} else if !runningInHost {
|
||||
// Process is running inside a container. Don't touch that.
|
||||
glog.V(2).Infof("pid %d is not running in the host namespaces", pid)
|
||||
return nil
|
||||
}
|
||||
|
||||
var errs []error
|
||||
cont, err := getContainer(pid)
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf("failed to find container of PID %d: %v", pid, err))
|
||||
}
|
||||
|
||||
if cont != manager.Cgroups.Name {
|
||||
err = manager.Apply(pid)
|
||||
if manager != nil {
|
||||
cont, err := getContainer(pid)
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf("failed to move PID %d (in %q) to %q", pid, cont, manager.Cgroups.Name))
|
||||
errs = append(errs, fmt.Errorf("failed to find container of PID %d: %v", pid, err))
|
||||
}
|
||||
|
||||
if cont != manager.Cgroups.Name {
|
||||
err = manager.Apply(pid)
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf("failed to move PID %d (in %q) to %q: %v", pid, cont, manager.Cgroups.Name, err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also apply oom-score-adj to processes
|
||||
oomAdjuster := oom.NewOOMAdjuster()
|
||||
glog.V(5).Infof("attempting to apply oom_score_adj of %d to pid %d", oomScoreAdj, pid)
|
||||
if err := oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err != nil {
|
||||
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d", oomScoreAdj, pid))
|
||||
glog.V(3).Infof("Failed to apply oom_score_adj %d for pid %d: %v", oomScoreAdj, pid, err)
|
||||
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d: %v", oomScoreAdj, pid, err))
|
||||
}
|
||||
return utilerrors.NewAggregate(errs)
|
||||
}
|
||||
|
||||
@@ -21,8 +21,9 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
PodInfraOOMAdj int = -999
|
||||
PodInfraOOMAdj int = -998
|
||||
KubeletOOMScoreAdj int = -999
|
||||
DockerOOMScoreAdj int = -999
|
||||
KubeProxyOOMScoreAdj int = -999
|
||||
guaranteedOOMScoreAdj int = -998
|
||||
besteffortOOMScoreAdj int = 1000
|
||||
@@ -53,10 +54,10 @@ func GetContainerOOMScoreAdjust(pod *api.Pod, container *api.Container, memoryCa
|
||||
// Note that this is a heuristic, it won't work if a container has many small processes.
|
||||
memoryRequest := container.Resources.Requests.Memory().Value()
|
||||
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
|
||||
// A guaranteed pod using 100% of memory can have an OOM score of 1. Ensure
|
||||
// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure
|
||||
// that burstable pods have a higher OOM score adjustment.
|
||||
if oomScoreAdjust < 2 {
|
||||
return 2
|
||||
if int(oomScoreAdjust) < (1000 + guaranteedOOMScoreAdj) {
|
||||
return (1000 + guaranteedOOMScoreAdj)
|
||||
}
|
||||
// Give burstable pods a higher chance of survival over besteffort pods.
|
||||
if int(oomScoreAdjust) == besteffortOOMScoreAdj {
|
||||
|
||||
@@ -20,6 +20,7 @@ package oom
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
@@ -65,28 +66,24 @@ func applyOOMScoreAdj(pid int, oomScoreAdj int) error {
|
||||
maxTries := 2
|
||||
oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
|
||||
value := strconv.Itoa(oomScoreAdj)
|
||||
glog.V(4).Infof("attempting to set %q to %q", oomScoreAdjPath, value)
|
||||
var err error
|
||||
for i := 0; i < maxTries; i++ {
|
||||
f, err := os.Open(oomScoreAdjPath)
|
||||
err = ioutil.WriteFile(oomScoreAdjPath, []byte(value), 0700)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
glog.V(2).Infof("%q does not exist", oomScoreAdjPath)
|
||||
return os.ErrNotExist
|
||||
}
|
||||
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||
continue
|
||||
}
|
||||
if _, err := f.Write([]byte(value)); err != nil {
|
||||
// we can ignore the return value of f.Close() here.
|
||||
f.Close()
|
||||
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||
continue
|
||||
}
|
||||
if err = f.Close(); err != nil {
|
||||
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||
|
||||
glog.V(3).Info(err)
|
||||
continue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
glog.V(2).Infof("failed to set %q to %q: %v", oomScoreAdjPath, value, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user