mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #12182 from AnanyaKumar/qos-node
Add QoS support on node
This commit is contained in:
commit
950ec96db0
@ -28,11 +28,13 @@ import (
|
||||
"k8s.io/kubernetes/pkg/client"
|
||||
"k8s.io/kubernetes/pkg/client/clientcmd"
|
||||
clientcmdapi "k8s.io/kubernetes/pkg/client/clientcmd/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/proxy"
|
||||
"k8s.io/kubernetes/pkg/proxy/config"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/exec"
|
||||
"k8s.io/kubernetes/pkg/util/iptables"
|
||||
"k8s.io/kubernetes/pkg/util/oom"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/spf13/pflag"
|
||||
@ -56,7 +58,7 @@ func NewProxyServer() *ProxyServer {
|
||||
BindAddress: util.IP(net.ParseIP("0.0.0.0")),
|
||||
HealthzPort: 10249,
|
||||
HealthzBindAddress: util.IP(net.ParseIP("127.0.0.1")),
|
||||
OOMScoreAdj: -899,
|
||||
OOMScoreAdj: qos.KubeProxyOomScoreAdj,
|
||||
ResourceContainer: "/kube-proxy",
|
||||
}
|
||||
}
|
||||
@ -76,7 +78,8 @@ func (s *ProxyServer) AddFlags(fs *pflag.FlagSet) {
|
||||
// Run runs the specified ProxyServer. This should never exit.
|
||||
func (s *ProxyServer) Run(_ []string) error {
|
||||
// TODO(vmarmol): Use container config for this.
|
||||
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
|
||||
oomAdjuster := oom.NewOomAdjuster()
|
||||
if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
|
||||
glog.V(2).Info(err)
|
||||
}
|
||||
|
||||
|
@ -48,10 +48,12 @@ import (
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/master/ports"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/mount"
|
||||
nodeutil "k8s.io/kubernetes/pkg/util/node"
|
||||
"k8s.io/kubernetes/pkg/util/oom"
|
||||
"k8s.io/kubernetes/pkg/volume"
|
||||
|
||||
"github.com/golang/glog"
|
||||
@ -167,7 +169,7 @@ func NewKubeletServer() *KubeletServer {
|
||||
HealthzPort: 10248,
|
||||
HealthzBindAddress: util.IP(net.ParseIP("127.0.0.1")),
|
||||
RegisterNode: true, // will be ignored if no apiserver is configured
|
||||
OOMScoreAdj: -900,
|
||||
OOMScoreAdj: qos.KubeletOomScoreAdj,
|
||||
MasterServiceNamespace: api.NamespaceDefault,
|
||||
ImageGCHighThresholdPercent: 90,
|
||||
ImageGCLowThresholdPercent: 80,
|
||||
@ -400,7 +402,8 @@ func (s *KubeletServer) Run(kcfg *KubeletConfig) error {
|
||||
glog.V(2).Infof("Using root directory: %v", s.RootDirectory)
|
||||
|
||||
// TODO(vmarmol): Do this through container config.
|
||||
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
|
||||
oomAdjuster := oom.NewOomAdjuster()
|
||||
if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
|
||||
glog.Warning(err)
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/mount"
|
||||
"k8s.io/kubernetes/pkg/util/oom"
|
||||
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
@ -125,7 +126,8 @@ func (s *KubeletExecutorServer) syncExternalShutdownWatcher() (io.Closer, error)
|
||||
func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
|
||||
rand.Seed(time.Now().UTC().UnixNano())
|
||||
|
||||
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
|
||||
oomAdjuster := oom.NewOomAdjuster()
|
||||
if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
|
||||
log.Info(err)
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/errors"
|
||||
"k8s.io/kubernetes/pkg/util/oom"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -229,7 +230,8 @@ func ensureDockerInContainer(cadvisor cadvisor.Interface, oomScoreAdj int, manag
|
||||
}
|
||||
|
||||
// Also apply oom_score_adj to processes
|
||||
if err := util.ApplyOomScoreAdj(pid, oomScoreAdj); err != nil {
|
||||
oomAdjuster := oom.NewOomAdjuster()
|
||||
if err := oomAdjuster.ApplyOomScoreAdj(pid, oomScoreAdj); err != nil {
|
||||
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d", oomScoreAdj, pid))
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ import (
|
||||
|
||||
"github.com/docker/docker/pkg/jsonmessage"
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
"k8s.io/kubernetes/pkg/credentialprovider"
|
||||
@ -655,7 +656,7 @@ func TestFindContainersByPod(t *testing.T) {
|
||||
}
|
||||
fakeClient := &FakeDockerClient{}
|
||||
np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil))
|
||||
containerManager := NewFakeDockerManager(fakeClient, &record.FakeRecorder{}, nil, nil, PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, nil)
|
||||
containerManager := NewFakeDockerManager(fakeClient, &record.FakeRecorder{}, nil, nil, &cadvisorApi.MachineInfo{}, PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, nil)
|
||||
for i, test := range tests {
|
||||
fakeClient.ContainerList = test.containerList
|
||||
fakeClient.ExitedContainerList = test.exitedContainerList
|
||||
|
@ -17,11 +17,14 @@ limitations under the License.
|
||||
package dockertools
|
||||
|
||||
import (
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||
"k8s.io/kubernetes/pkg/kubelet/prober"
|
||||
kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/util/oom"
|
||||
"k8s.io/kubernetes/pkg/util/procfs"
|
||||
)
|
||||
|
||||
func NewFakeDockerManager(
|
||||
@ -29,6 +32,7 @@ func NewFakeDockerManager(
|
||||
recorder record.EventRecorder,
|
||||
readinessManager *kubecontainer.ReadinessManager,
|
||||
containerRefManager *kubecontainer.RefManager,
|
||||
machineInfo *cadvisorApi.MachineInfo,
|
||||
podInfraContainerImage string,
|
||||
qps float32,
|
||||
burst int,
|
||||
@ -39,8 +43,11 @@ func NewFakeDockerManager(
|
||||
httpClient kubeletTypes.HttpGetter,
|
||||
runtimeHooks kubecontainer.RuntimeHooks) *DockerManager {
|
||||
|
||||
dm := NewDockerManager(client, recorder, readinessManager, containerRefManager, podInfraContainerImage, qps,
|
||||
burst, containerLogsDir, osInterface, networkPlugin, generator, httpClient, runtimeHooks, &NativeExecHandler{})
|
||||
fakeOomAdjuster := oom.NewFakeOomAdjuster()
|
||||
fakeProcFs := procfs.NewFakeProcFs()
|
||||
dm := NewDockerManager(client, recorder, readinessManager, containerRefManager, machineInfo, podInfraContainerImage, qps,
|
||||
burst, containerLogsDir, osInterface, networkPlugin, generator, httpClient, runtimeHooks, &NativeExecHandler{},
|
||||
fakeOomAdjuster, fakeProcFs)
|
||||
dm.puller = &FakeDockerPuller{}
|
||||
dm.prober = prober.New(nil, readinessManager, containerRefManager, recorder)
|
||||
return dm
|
||||
|
@ -34,6 +34,7 @@ import (
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
"github.com/golang/glog"
|
||||
"github.com/golang/groupcache/lru"
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/latest"
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
@ -42,20 +43,17 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||
"k8s.io/kubernetes/pkg/kubelet/prober"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/probe"
|
||||
"k8s.io/kubernetes/pkg/securitycontext"
|
||||
"k8s.io/kubernetes/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/oom"
|
||||
"k8s.io/kubernetes/pkg/util/procfs"
|
||||
)
|
||||
|
||||
const (
|
||||
// The oom_score_adj of the POD infrastructure container. The default is 0 for
|
||||
// any other docker containers, so any value below that makes it *less* likely
|
||||
// to get OOM killed.
|
||||
podOomScoreAdj = -100
|
||||
userContainerOomScoreAdj = 0
|
||||
|
||||
maxReasonCacheEntries = 200
|
||||
|
||||
kubernetesPodLabel = "io.kubernetes.pod.data"
|
||||
@ -75,6 +73,7 @@ type DockerManager struct {
|
||||
readinessManager *kubecontainer.ReadinessManager
|
||||
containerRefManager *kubecontainer.RefManager
|
||||
os kubecontainer.OSInterface
|
||||
machineInfo *cadvisorApi.MachineInfo
|
||||
|
||||
// The image name of the pod infra container.
|
||||
podInfraContainerImage string
|
||||
@ -114,6 +113,12 @@ type DockerManager struct {
|
||||
|
||||
// Handler used to execute commands in containers.
|
||||
execHandler ExecHandler
|
||||
|
||||
// Used to set OOM scores of processes.
|
||||
oomAdjuster *oom.OomAdjuster
|
||||
|
||||
// Get information from /proc mount.
|
||||
procFs procfs.ProcFsInterface
|
||||
}
|
||||
|
||||
func NewDockerManager(
|
||||
@ -121,6 +126,7 @@ func NewDockerManager(
|
||||
recorder record.EventRecorder,
|
||||
readinessManager *kubecontainer.ReadinessManager,
|
||||
containerRefManager *kubecontainer.RefManager,
|
||||
machineInfo *cadvisorApi.MachineInfo,
|
||||
podInfraContainerImage string,
|
||||
qps float32,
|
||||
burst int,
|
||||
@ -130,7 +136,9 @@ func NewDockerManager(
|
||||
generator kubecontainer.RunContainerOptionsGenerator,
|
||||
httpClient kubeletTypes.HttpGetter,
|
||||
runtimeHooks kubecontainer.RuntimeHooks,
|
||||
execHandler ExecHandler) *DockerManager {
|
||||
execHandler ExecHandler,
|
||||
oomAdjuster *oom.OomAdjuster,
|
||||
procFs procfs.ProcFsInterface) *DockerManager {
|
||||
// Work out the location of the Docker runtime, defaulting to /var/lib/docker
|
||||
// if there are any problems.
|
||||
dockerRoot := "/var/lib/docker"
|
||||
@ -169,6 +177,7 @@ func NewDockerManager(
|
||||
readinessManager: readinessManager,
|
||||
containerRefManager: containerRefManager,
|
||||
os: osInterface,
|
||||
machineInfo: machineInfo,
|
||||
podInfraContainerImage: podInfraContainerImage,
|
||||
reasonCache: reasonCache,
|
||||
puller: newDockerPuller(client, qps, burst),
|
||||
@ -179,6 +188,8 @@ func NewDockerManager(
|
||||
generator: generator,
|
||||
runtimeHooks: runtimeHooks,
|
||||
execHandler: execHandler,
|
||||
oomAdjuster: oomAdjuster,
|
||||
procFs: procFs,
|
||||
}
|
||||
dm.runner = lifecycle.NewHandlerRunner(httpClient, dm, dm)
|
||||
dm.prober = prober.New(dm, readinessManager, containerRefManager, recorder)
|
||||
@ -1242,32 +1253,42 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
|
||||
glog.Errorf("Failed to create symbolic link to the log file of pod %q container %q: %v", podFullName, container.Name, err)
|
||||
}
|
||||
|
||||
// Set OOM score of POD container to lower than those of the other containers
|
||||
// which have OOM score 0 by default in the pod. This ensures that it is
|
||||
// killed only as a last resort.
|
||||
// Container information is used in adjusting OOM scores and adding ndots.
|
||||
containerInfo, err := dm.client.InspectContainer(string(id))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Ensure the PID actually exists, else we'll move ourselves.
|
||||
if containerInfo.State.Pid == 0 {
|
||||
return "", fmt.Errorf("failed to get init PID for Docker container %q", string(id))
|
||||
}
|
||||
|
||||
// Set OOM score of the container based on the priority of the container.
|
||||
// Processes in lower-priority pods should be killed first if the system runs out of memory.
|
||||
// The main pod infrastructure container is considered high priority, since if it is killed the
|
||||
// whole pod will die.
|
||||
var oomScoreAdj int
|
||||
if container.Name == PodInfraContainerName {
|
||||
util.ApplyOomScoreAdj(containerInfo.State.Pid, podOomScoreAdj)
|
||||
oomScoreAdj = qos.PodInfraOomAdj
|
||||
} else {
|
||||
oomScoreAdj = qos.GetContainerOomScoreAdjust(container, dm.machineInfo.MemoryCapacity)
|
||||
}
|
||||
cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err = dm.oomAdjuster.ApplyOomScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// currently, Docker does not have a flag by which the ndots option can be passed.
|
||||
// (A seperate issue has been filed with Docker to add a ndots flag)
|
||||
// The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
|
||||
// This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
|
||||
// we modify it when the pause container is created since it is the first container created in the pod since it holds
|
||||
// the networking namespace.
|
||||
if container.Name == PodInfraContainerName {
|
||||
err = addNDotsOption(containerInfo.ResolvConfPath)
|
||||
} else {
|
||||
// Children processes of docker daemon will inheritant the OOM score from docker
|
||||
// daemon process. We explicitly apply OOM score 0 by default to the user
|
||||
// containers to avoid daemons or POD containers are killed by oom killer.
|
||||
util.ApplyOomScoreAdj(containerInfo.State.Pid, userContainerOomScoreAdj)
|
||||
}
|
||||
|
||||
return kubeletTypes.DockerID(id), err
|
||||
|
@ -31,6 +31,7 @@ import (
|
||||
"time"
|
||||
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/testapi"
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
@ -114,6 +115,7 @@ func newTestDockerManagerWithHTTPClient(fakeHTTPClient *fakeHTTP) (*DockerManage
|
||||
fakeRecorder,
|
||||
readinessManager,
|
||||
containerRefManager,
|
||||
&cadvisorApi.MachineInfo{},
|
||||
PodInfraContainerImage,
|
||||
0, 0, "",
|
||||
kubecontainer.FakeOS{},
|
||||
|
@ -34,6 +34,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
apierrors "k8s.io/kubernetes/pkg/api/errors"
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
@ -60,13 +61,13 @@ import (
|
||||
utilErrors "k8s.io/kubernetes/pkg/util/errors"
|
||||
"k8s.io/kubernetes/pkg/util/mount"
|
||||
nodeutil "k8s.io/kubernetes/pkg/util/node"
|
||||
"k8s.io/kubernetes/pkg/util/oom"
|
||||
"k8s.io/kubernetes/pkg/util/procfs"
|
||||
"k8s.io/kubernetes/pkg/version"
|
||||
"k8s.io/kubernetes/pkg/volume"
|
||||
"k8s.io/kubernetes/pkg/watch"
|
||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
||||
"k8s.io/kubernetes/third_party/golang/expansion"
|
||||
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -274,6 +275,14 @@ func NewMainKubelet(
|
||||
klet.networkPlugin = plug
|
||||
}
|
||||
|
||||
machineInfo, err := klet.GetCachedMachineInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
oomAdjuster := oom.NewOomAdjuster()
|
||||
procFs := procfs.NewProcFs()
|
||||
|
||||
// Initialize the runtime.
|
||||
switch containerRuntime {
|
||||
case "docker":
|
||||
@ -283,6 +292,7 @@ func NewMainKubelet(
|
||||
recorder,
|
||||
readinessManager,
|
||||
containerRefManager,
|
||||
machineInfo,
|
||||
podInfraContainerImage,
|
||||
pullQPS,
|
||||
pullBurst,
|
||||
@ -292,7 +302,9 @@ func NewMainKubelet(
|
||||
klet,
|
||||
klet.httpClient,
|
||||
newKubeletRuntimeHooks(recorder),
|
||||
dockerExecHandler)
|
||||
dockerExecHandler,
|
||||
oomAdjuster,
|
||||
procFs)
|
||||
case "rkt":
|
||||
conf := &rkt.Config{InsecureSkipVerify: true}
|
||||
rktRuntime, err := rkt.New(
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
"time"
|
||||
|
||||
docker "github.com/fsouza/go-dockerclient"
|
||||
cadvisorApi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/client/record"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
@ -44,7 +45,7 @@ func newPod(uid, name string) *api.Pod {
|
||||
func createFakeRuntimeCache(fakeRecorder *record.FakeRecorder) kubecontainer.RuntimeCache {
|
||||
fakeDocker := &dockertools.FakeDockerClient{}
|
||||
np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil))
|
||||
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
|
||||
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, &cadvisorApi.MachineInfo{}, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
|
||||
return kubecontainer.NewFakeRuntimeCache(dockerManager)
|
||||
}
|
||||
|
||||
@ -224,7 +225,7 @@ func TestFakePodWorkers(t *testing.T) {
|
||||
fakeDocker := &dockertools.FakeDockerClient{}
|
||||
fakeRecorder := &record.FakeRecorder{}
|
||||
np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil))
|
||||
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
|
||||
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, &cadvisorApi.MachineInfo{}, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
|
||||
fakeRuntimeCache := kubecontainer.NewFakeRuntimeCache(dockerManager)
|
||||
|
||||
kubeletForRealWorkers := &simpleFakeKubelet{}
|
||||
|
25
pkg/kubelet/qos/doc.go
Normal file
25
pkg/kubelet/qos/doc.go
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// package qos contains helper functions for quality of service.
|
||||
// For each resource (memory, CPU) Kubelet supports three classes of containers.
|
||||
// Memory guaranteed containers will receive the highest priority and will get all the resources
|
||||
// they need.
|
||||
// Burstable containers will be guaranteed their request and can “burst” and use more resources
|
||||
// when available.
|
||||
// Best-Effort containers, which don’t specify a request, can use resources only if not being used
|
||||
// by other pods.
|
||||
package qos
|
75
pkg/kubelet/qos/memory_policy.go
Normal file
75
pkg/kubelet/qos/memory_policy.go
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package qos
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
)
|
||||
|
||||
const (
|
||||
PodInfraOomAdj int = -999
|
||||
KubeletOomScoreAdj int = -999
|
||||
KubeProxyOomScoreAdj int = -999
|
||||
)
|
||||
|
||||
// isMemoryBestEffort returns true if the container's memory requirements are best-effort.
|
||||
func isMemoryBestEffort(container *api.Container) bool {
|
||||
// A container is memory best-effort if its memory request is unspecified or 0.
|
||||
// If a request is specified, then the user expects some kind of resource guarantee.
|
||||
return container.Resources.Requests.Memory().Value() == 0
|
||||
}
|
||||
|
||||
// isMemoryGuaranteed returns true if the container's memory requirements are Guaranteed.
|
||||
func isMemoryGuaranteed(container *api.Container) bool {
|
||||
// A container is memory guaranteed if its memory request == memory limit.
|
||||
// If memory request == memory limit, the user is very confident of resource consumption.
|
||||
memoryRequestValue := container.Resources.Requests.Memory().Value()
|
||||
memoryLimitValue := container.Resources.Limits.Memory().Value()
|
||||
return memoryRequestValue == memoryLimitValue && memoryRequestValue != 0
|
||||
}
|
||||
|
||||
// GetContainerOomAdjust returns the amount by which the OOM score of all processes in the
|
||||
// container should be adjusted. The OOM score of a process is the percentage of memory it consumes
|
||||
// multiplied by 100 (barring exceptional cases) + a configurable quantity which is between -1000
|
||||
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
|
||||
// See https://lwn.net/Articles/391222/ for more information.
|
||||
func GetContainerOomScoreAdjust(container *api.Container, memoryCapacity int64) int {
|
||||
if isMemoryGuaranteed(container) {
|
||||
// Memory guaranteed containers should be the last to get killed.
|
||||
return -999
|
||||
} else if isMemoryBestEffort(container) {
|
||||
// Memory best-effort containers should be the first to be killed.
|
||||
return 1000
|
||||
} else {
|
||||
// Burstable containers are a middle tier, between Guaranteed and Best-Effort. Ideally,
|
||||
// we want to protect Burstable containers that consume less memory than requested.
|
||||
// The formula below is a heuristic. A container requesting for 10% of a system's
|
||||
// memory will have an oom score adjust of 900. If a process in container Y
|
||||
// uses over 10% of memory, its OOM score will be 1000. The idea is that containers
|
||||
// which use more than their request will have an OOM score of 1000 and will be prime
|
||||
// targets for OOM kills.
|
||||
// Note that this is a heuristic, it won't work if a container has many small processes.
|
||||
memoryRequest := container.Resources.Requests.Memory().Value()
|
||||
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
|
||||
// A memory guaranteed container using 100% of memory can have an OOM score of 1. Ensure
|
||||
// that memory burstable containers have a higher OOM score.
|
||||
if oomScoreAdjust < 2 {
|
||||
return 2
|
||||
}
|
||||
return int(oomScoreAdjust)
|
||||
}
|
||||
}
|
187
pkg/kubelet/qos/memory_policy_test.go
Normal file
187
pkg/kubelet/qos/memory_policy_test.go
Normal file
@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package qos
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
)
|
||||
|
||||
const (
|
||||
standardMemoryAmount = 8000000000
|
||||
)
|
||||
|
||||
var (
|
||||
zeroRequestMemoryBestEffort = api.Container{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("5m"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("0G"),
|
||||
},
|
||||
Limits: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("5m"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
edgeMemoryBestEffort = api.Container{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("0G"),
|
||||
},
|
||||
Limits: api.ResourceList{
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("0G"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
noRequestMemoryBestEffort = api.Container{
|
||||
Resources: api.ResourceRequirements{
|
||||
Limits: api.ResourceList{
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
noLimitMemoryBestEffort = api.Container{}
|
||||
|
||||
memoryGuaranteed = api.Container{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
Limits: api.ResourceList{
|
||||
api.ResourceName(api.ResourceCPU): resource.MustParse("5m"),
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
memoryBurstable = api.Container{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount / 2)),
|
||||
},
|
||||
Limits: api.ResourceList{
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
memoryBurstableNoLimit = api.Container{
|
||||
Resources: api.ResourceRequirements{
|
||||
Requests: api.ResourceList{
|
||||
api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount - 1)),
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func TestIsMemoryBestEffort(t *testing.T) {
|
||||
validCases := []api.Container{zeroRequestMemoryBestEffort, noRequestMemoryBestEffort, noLimitMemoryBestEffort, edgeMemoryBestEffort}
|
||||
for _, container := range validCases {
|
||||
if !isMemoryBestEffort(&container) {
|
||||
t.Errorf("container %+v is memory best-effort", container)
|
||||
}
|
||||
}
|
||||
invalidCases := []api.Container{memoryGuaranteed, memoryBurstable}
|
||||
for _, container := range invalidCases {
|
||||
if isMemoryBestEffort(&container) {
|
||||
t.Errorf("container %+v is not memory best-effort", container)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsMemoryGuaranteed(t *testing.T) {
|
||||
validCases := []api.Container{memoryGuaranteed}
|
||||
for _, container := range validCases {
|
||||
if !isMemoryGuaranteed(&container) {
|
||||
t.Errorf("container %+v is memory guaranteed", container)
|
||||
}
|
||||
}
|
||||
invalidCases := []api.Container{zeroRequestMemoryBestEffort, noRequestMemoryBestEffort, noLimitMemoryBestEffort, edgeMemoryBestEffort, memoryBurstable}
|
||||
for _, container := range invalidCases {
|
||||
if isMemoryGuaranteed(&container) {
|
||||
t.Errorf("container %+v is not memory guaranteed", container)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type oomTest struct {
|
||||
container *api.Container
|
||||
memoryCapacity int64
|
||||
lowOomScoreAdj int // The max oom_score_adj score the container should be assigned.
|
||||
highOomScoreAdj int // The min oom_score_adj score the container should be assigned.
|
||||
}
|
||||
|
||||
func TestGetContainerOomScoreAdjust(t *testing.T) {
|
||||
|
||||
oomTests := []oomTest{
|
||||
{
|
||||
container: &zeroRequestMemoryBestEffort,
|
||||
memoryCapacity: 4000000000,
|
||||
lowOomScoreAdj: 1000,
|
||||
highOomScoreAdj: 1000,
|
||||
},
|
||||
{
|
||||
container: &edgeMemoryBestEffort,
|
||||
memoryCapacity: 8000000000,
|
||||
lowOomScoreAdj: 1000,
|
||||
highOomScoreAdj: 1000,
|
||||
},
|
||||
{
|
||||
container: &noRequestMemoryBestEffort,
|
||||
memoryCapacity: 7230457451,
|
||||
lowOomScoreAdj: 1000,
|
||||
highOomScoreAdj: 1000,
|
||||
},
|
||||
{
|
||||
container: &noLimitMemoryBestEffort,
|
||||
memoryCapacity: 4000000000,
|
||||
lowOomScoreAdj: 1000,
|
||||
highOomScoreAdj: 1000,
|
||||
},
|
||||
{
|
||||
container: &memoryGuaranteed,
|
||||
memoryCapacity: 123456789,
|
||||
lowOomScoreAdj: -999,
|
||||
highOomScoreAdj: -999,
|
||||
},
|
||||
{
|
||||
container: &memoryBurstable,
|
||||
memoryCapacity: standardMemoryAmount,
|
||||
lowOomScoreAdj: 495,
|
||||
highOomScoreAdj: 505,
|
||||
},
|
||||
{
|
||||
container: &memoryBurstableNoLimit,
|
||||
memoryCapacity: standardMemoryAmount,
|
||||
lowOomScoreAdj: 2,
|
||||
highOomScoreAdj: 2,
|
||||
},
|
||||
}
|
||||
for _, test := range oomTests {
|
||||
oomScoreAdj := GetContainerOomScoreAdjust(test.container, test.memoryCapacity)
|
||||
if oomScoreAdj < test.lowOomScoreAdj || oomScoreAdj > test.highOomScoreAdj {
|
||||
t.Errorf("oom_score_adj should be between %d and %d, but was %d", test.lowOomScoreAdj, test.highOomScoreAdj, oomScoreAdj)
|
||||
}
|
||||
}
|
||||
}
|
@ -154,6 +154,7 @@ func TestRunOnce(t *testing.T) {
|
||||
kb.recorder,
|
||||
kb.readinessManager,
|
||||
kb.containerRefManager,
|
||||
&cadvisorApi.MachineInfo{},
|
||||
dockertools.PodInfraContainerImage,
|
||||
0,
|
||||
0,
|
||||
|
18
pkg/util/oom/doc.go
Normal file
18
pkg/util/oom/doc.go
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package oom implements utility functions relating to out of memory management.
|
||||
package oom
|
26
pkg/util/oom/oom.go
Normal file
26
pkg/util/oom/oom.go
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package oom
|
||||
|
||||
// This is a struct instead of an interface to allow injection of process ID listers and
|
||||
// applying OOM score in tests.
|
||||
// TODO: make this an interface, and inject a mock ioutil struct for testing.
|
||||
type OomAdjuster struct {
|
||||
pidLister func(cgroupName string) ([]int, error)
|
||||
ApplyOomScoreAdj func(pid int, oomScoreAdj int) error
|
||||
ApplyOomScoreAdjContainer func(cgroupName string, oomScoreAdj, maxTries int) error
|
||||
}
|
34
pkg/util/oom/oom_fake.go
Normal file
34
pkg/util/oom/oom_fake.go
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package oom
|
||||
|
||||
type FakeOomAdjuster struct{}
|
||||
|
||||
func NewFakeOomAdjuster() *OomAdjuster {
|
||||
return &OomAdjuster{
|
||||
ApplyOomScoreAdj: fakeApplyOomScoreAdj,
|
||||
ApplyOomScoreAdjContainer: fakeApplyOomScoreAdjContainer,
|
||||
}
|
||||
}
|
||||
|
||||
func fakeApplyOomScoreAdj(pid int, oomScoreAdj int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func fakeApplyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error {
|
||||
return nil
|
||||
}
|
112
pkg/util/oom/oom_linux.go
Normal file
112
pkg/util/oom/oom_linux.go
Normal file
@ -0,0 +1,112 @@
|
||||
// +build cgo,linux
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package oom
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path"
|
||||
"strconv"
|
||||
|
||||
"github.com/docker/libcontainer/cgroups/fs"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
func NewOomAdjuster() *OomAdjuster {
|
||||
oomAdjuster := &OomAdjuster{
|
||||
pidLister: getPids,
|
||||
ApplyOomScoreAdj: applyOomScoreAdj,
|
||||
}
|
||||
oomAdjuster.ApplyOomScoreAdjContainer = oomAdjuster.applyOomScoreAdjContainer
|
||||
return oomAdjuster
|
||||
}
|
||||
|
||||
func getPids(cgroupName string) ([]int, error) {
|
||||
fsManager := fs.Manager{
|
||||
Cgroups: &configs.Cgroup{
|
||||
Name: cgroupName,
|
||||
},
|
||||
}
|
||||
return fsManager.GetPids()
|
||||
}
|
||||
|
||||
// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
|
||||
func applyOomScoreAdj(pid int, oomScoreAdj int) error {
|
||||
if pid < 0 {
|
||||
return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid)
|
||||
}
|
||||
|
||||
var pidStr string
|
||||
if pid == 0 {
|
||||
pidStr = "self"
|
||||
} else {
|
||||
pidStr = strconv.Itoa(pid)
|
||||
}
|
||||
|
||||
oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
|
||||
maxTries := 2
|
||||
var err error
|
||||
for i := 0; i < maxTries; i++ {
|
||||
_, readErr := ioutil.ReadFile(oomScoreAdjPath)
|
||||
if readErr != nil {
|
||||
err = fmt.Errorf("failed to read oom_score_adj: %v", readErr)
|
||||
} else if writeErr := ioutil.WriteFile(oomScoreAdjPath, []byte(strconv.Itoa(oomScoreAdj)), 0700); writeErr != nil {
|
||||
err = fmt.Errorf("failed to set oom_score_adj to %d: %v", oomScoreAdj, writeErr)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Writes 'value' to /proc/<pid>/oom_score_adj for all processes in cgroup cgroupName.
|
||||
// Keeps trying to write until the process list of the cgroup stabilizes, or until maxTries tries.
|
||||
func (oomAdjuster *OomAdjuster) applyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error {
|
||||
adjustedProcessSet := make(map[int]bool)
|
||||
for i := 0; i < maxTries; i++ {
|
||||
continueAdjusting := false
|
||||
pidList, err := oomAdjuster.pidLister(cgroupName)
|
||||
if err != nil {
|
||||
continueAdjusting = true
|
||||
glog.Errorf("Error getting process list for cgroup %s: %+v", cgroupName, err)
|
||||
} else if len(pidList) == 0 {
|
||||
continueAdjusting = true
|
||||
} else {
|
||||
for _, pid := range pidList {
|
||||
if !adjustedProcessSet[pid] {
|
||||
continueAdjusting = true
|
||||
if err = oomAdjuster.ApplyOomScoreAdj(pid, oomScoreAdj); err == nil {
|
||||
adjustedProcessSet[pid] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !continueAdjusting {
|
||||
return nil
|
||||
}
|
||||
// There's a slight race. A process might have forked just before we write its OOM score adjust.
|
||||
// The fork might copy the parent process's old OOM score, then this function might execute and
|
||||
// update the parent's OOM score, but the forked process id might not be reflected in cgroup.procs
|
||||
// for a short amount of time. So this function might return without changing the forked process's
|
||||
// OOM score. Very unlikely race, so ignoring this for now.
|
||||
}
|
||||
return fmt.Errorf("exceeded maxTries, some processes might not have desired OOM score")
|
||||
}
|
110
pkg/util/oom/oom_linux_test.go
Normal file
110
pkg/util/oom/oom_linux_test.go
Normal file
@ -0,0 +1,110 @@
|
||||
// +build cgo,linux
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package oom
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Converts a sequence of PID lists into a PID lister.
|
||||
// The PID lister returns pidListSequence[i] on the ith call. If i >= length of pidListSequence
|
||||
// then return the last element of pidListSequence (the sequence is considered to have) stabilized.
|
||||
func sequenceToPidLister(pidListSequence [][]int) func(string) ([]int, error) {
|
||||
var numCalls int
|
||||
return func(cgroupName string) ([]int, error) {
|
||||
numCalls++
|
||||
if len(pidListSequence) == 0 {
|
||||
return []int{}, nil
|
||||
} else if numCalls > len(pidListSequence) {
|
||||
return pidListSequence[len(pidListSequence)-1], nil
|
||||
}
|
||||
return pidListSequence[numCalls-1], nil
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that applyOomScoreAdjContainer correctly applies OOM scores to relevant processes, or
|
||||
// returns the right error.
|
||||
func applyOomScoreAdjContainerTester(pidListSequence [][]int, maxTries int, appliedPids []int, expectedError bool, t *testing.T) {
|
||||
pidOoms := make(map[int]bool)
|
||||
|
||||
// Mock ApplyOomScoreAdj and pidLister.
|
||||
oomAdjuster := NewOomAdjuster()
|
||||
oomAdjuster.ApplyOomScoreAdj = func(pid int, oomScoreAdj int) error {
|
||||
pidOoms[pid] = true
|
||||
return nil
|
||||
}
|
||||
oomAdjuster.pidLister = sequenceToPidLister(pidListSequence)
|
||||
err := oomAdjuster.ApplyOomScoreAdjContainer("", 100, maxTries)
|
||||
|
||||
// Check error value.
|
||||
if expectedError && err == nil {
|
||||
t.Errorf("Expected error %+v when running ApplyOomScoreAdjContainer but got no error", expectedError)
|
||||
return
|
||||
} else if !expectedError && err != nil {
|
||||
t.Errorf("Expected no error but got error %+v when running ApplyOomScoreAdjContainer", err)
|
||||
return
|
||||
} else if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Check that OOM scores were applied to the right processes.
|
||||
if len(appliedPids) != len(pidOoms) {
|
||||
t.Errorf("Applied OOM scores to incorrect number of processes")
|
||||
return
|
||||
}
|
||||
for _, pid := range appliedPids {
|
||||
if !pidOoms[pid] {
|
||||
t.Errorf("Failed to apply OOM scores to process %d", pid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestOomScoreAdjContainer(t *testing.T) {
|
||||
pidListSequenceEmpty := [][]int{}
|
||||
applyOomScoreAdjContainerTester(pidListSequenceEmpty, 3, nil, true, t)
|
||||
|
||||
pidListSequence1 := [][]int{
|
||||
{1, 2},
|
||||
}
|
||||
applyOomScoreAdjContainerTester(pidListSequence1, 1, nil, true, t)
|
||||
applyOomScoreAdjContainerTester(pidListSequence1, 2, []int{1, 2}, false, t)
|
||||
applyOomScoreAdjContainerTester(pidListSequence1, 3, []int{1, 2}, false, t)
|
||||
|
||||
pidListSequence3 := [][]int{
|
||||
{1, 2},
|
||||
{1, 2, 4, 5},
|
||||
{2, 1, 4, 5, 3},
|
||||
}
|
||||
applyOomScoreAdjContainerTester(pidListSequence3, 1, nil, true, t)
|
||||
applyOomScoreAdjContainerTester(pidListSequence3, 2, nil, true, t)
|
||||
applyOomScoreAdjContainerTester(pidListSequence3, 3, nil, true, t)
|
||||
applyOomScoreAdjContainerTester(pidListSequence3, 4, []int{1, 2, 3, 4, 5}, false, t)
|
||||
|
||||
pidListSequenceLag := [][]int{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{1, 2, 4},
|
||||
{1, 2, 4, 5},
|
||||
}
|
||||
for i := 1; i < 5; i++ {
|
||||
applyOomScoreAdjContainerTester(pidListSequenceLag, i, nil, true, t)
|
||||
}
|
||||
applyOomScoreAdjContainerTester(pidListSequenceLag, 6, []int{1, 2, 4, 5}, false, t)
|
||||
}
|
40
pkg/util/oom/oom_unsupported.go
Normal file
40
pkg/util/oom/oom_unsupported.go
Normal file
@ -0,0 +1,40 @@
|
||||
// +build !cgo !linux
|
||||
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package oom
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var unsupportedErr = errors.New("setting OOM scores is unsupported in this build")
|
||||
|
||||
func NewOomAdjuster() *OomAdjuster {
|
||||
return &OomAdjuster{
|
||||
ApplyOomScoreAdj: unsupportedApplyOomScoreAdj,
|
||||
ApplyOomScoreAdjContainer: unsupportedApplyOomScoreAdjContainer,
|
||||
}
|
||||
}
|
||||
|
||||
func unsupportedApplyOomScoreAdj(pid int, oomScoreAdj int) error {
|
||||
return unsupportedErr
|
||||
}
|
||||
|
||||
func unsupportedApplyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error {
|
||||
return unsupportedErr
|
||||
}
|
18
pkg/util/procfs/doc.go
Normal file
18
pkg/util/procfs/doc.go
Normal file
@ -0,0 +1,18 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package procfs implements utility functions relating to the /proc mount.
|
||||
package procfs
|
10
pkg/util/procfs/example_proc_cgroup
Normal file
10
pkg/util/procfs/example_proc_cgroup
Normal file
@ -0,0 +1,10 @@
|
||||
11:name=systemd:/user/1000.user/c1.session
|
||||
10:hugetlb:/user/1000.user/c1.session
|
||||
9:perf_event:/user/1000.user/c1.session
|
||||
8:blkio:/user/1000.user/c1.session
|
||||
7:freezer:/user/1000.user/c1.session
|
||||
6:devices:/user/1000.user/c1.session
|
||||
5:memory:/user/1000.user/c1.session
|
||||
4:cpuacct:/user/1000.user/c1.session
|
||||
3:cpu:/user/1000.user/c1.session
|
||||
2:cpuset:/
|
54
pkg/util/procfs/procfs.go
Normal file
54
pkg/util/procfs/procfs.go
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package procfs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type ProcFs struct{}
|
||||
|
||||
func NewProcFs() ProcFsInterface {
|
||||
return &ProcFs{}
|
||||
}
|
||||
|
||||
func containerNameFromProcCgroup(content string) (string, error) {
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
entries := strings.SplitN(line, ":", 3)
|
||||
if len(entries) == 3 && entries[1] == "devices" {
|
||||
return strings.TrimSpace(entries[2]), nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("could not find devices cgroup location")
|
||||
}
|
||||
|
||||
// getFullContainerName gets the container name given the root process id of the container.
|
||||
// Eg. If the devices cgroup for the container is stored in /sys/fs/cgroup/devices/docker/nginx,
|
||||
// return docker/nginx. Assumes that the process is part of exactly one cgroup hierarchy.
|
||||
func (pfs *ProcFs) GetFullContainerName(pid int) (string, error) {
|
||||
filePath := path.Join("/proc", strconv.Itoa(pid), "cgroup")
|
||||
content, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return containerNameFromProcCgroup(string(content))
|
||||
}
|
30
pkg/util/procfs/procfs_fake.go
Normal file
30
pkg/util/procfs/procfs_fake.go
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package procfs
|
||||
|
||||
type FakeProcFs struct{}
|
||||
|
||||
func NewFakeProcFs() ProcFsInterface {
|
||||
return &FakeProcFs{}
|
||||
}
|
||||
|
||||
// getFullContainerName gets the container name given the root process id of the container.
|
||||
// Eg. If the devices cgroup for the container is stored in /sys/fs/cgroup/devices/docker/nginx,
|
||||
// return docker/nginx. Assumes that the process is part of exactly one cgroup hierarchy.
|
||||
func (fakePfs *FakeProcFs) GetFullContainerName(pid int) (string, error) {
|
||||
return "", nil
|
||||
}
|
22
pkg/util/procfs/procfs_interface.go
Normal file
22
pkg/util/procfs/procfs_interface.go
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package procfs
|
||||
|
||||
type ProcFsInterface interface {
|
||||
// getFullContainerName gets the container name given the root process id of the container.
|
||||
GetFullContainerName(pid int) (string, error)
|
||||
}
|
58
pkg/util/procfs/procfs_test.go
Normal file
58
pkg/util/procfs/procfs_test.go
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package procfs
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func verifyContainerName(procCgroupText, expectedName string, expectedErr bool, t *testing.T) {
|
||||
name, err := containerNameFromProcCgroup(procCgroupText)
|
||||
if expectedErr && err == nil {
|
||||
t.Errorf("Expected error but did not get error in verifyContainerName")
|
||||
return
|
||||
} else if !expectedErr && err != nil {
|
||||
t.Errorf("Expected no error, but got error %+v in verifyContainerName", err)
|
||||
return
|
||||
} else if expectedErr {
|
||||
return
|
||||
}
|
||||
if name != expectedName {
|
||||
t.Errorf("Expected container name %s but got name %s", expectedName, name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainerNameFromProcCgroup(t *testing.T) {
|
||||
procCgroupValid := "2:devices:docker/kubelet"
|
||||
verifyContainerName(procCgroupValid, "docker/kubelet", false, t)
|
||||
|
||||
procCgroupEmpty := ""
|
||||
verifyContainerName(procCgroupEmpty, "", true, t)
|
||||
|
||||
content, err := ioutil.ReadFile("example_proc_cgroup")
|
||||
if err != nil {
|
||||
t.Errorf("Could not read example /proc cgroup file")
|
||||
}
|
||||
verifyContainerName(string(content), "/user/1000.user/c1.session", false, t)
|
||||
|
||||
procCgroupNoDevice := "2:freezer:docker/kubelet\n5:cpuacct:pkg/kubectl"
|
||||
verifyContainerName(procCgroupNoDevice, "", true, t)
|
||||
|
||||
procCgroupInvalid := "devices:docker/kubelet\ncpuacct:pkg/kubectl"
|
||||
verifyContainerName(procCgroupInvalid, "", true, t)
|
||||
}
|
@ -22,7 +22,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
@ -212,34 +211,6 @@ func UsingSystemdInitSystem() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
|
||||
func ApplyOomScoreAdj(pid int, value int) error {
|
||||
if value < -1000 || value > 1000 {
|
||||
return fmt.Errorf("invalid value(%d) specified for oom_score_adj. Values must be within the range [-1000, 1000]", value)
|
||||
}
|
||||
if pid < 0 {
|
||||
return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid)
|
||||
}
|
||||
|
||||
var pidStr string
|
||||
if pid == 0 {
|
||||
pidStr = "self"
|
||||
} else {
|
||||
pidStr = strconv.Itoa(pid)
|
||||
}
|
||||
|
||||
oom_value, err := ioutil.ReadFile(path.Join("/proc", pidStr, "oom_score_adj"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read oom_score_adj: %v", err)
|
||||
} else if string(oom_value) != strconv.Itoa(value) {
|
||||
if err := ioutil.WriteFile(path.Join("/proc", pidStr, "oom_score_adj"), []byte(strconv.Itoa(value)), 0700); err != nil {
|
||||
return fmt.Errorf("failed to set oom_score_adj to %d: %v", value, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Tests whether all pointer fields in a struct are nil. This is useful when,
|
||||
// for example, an API struct is handled by plugins which need to distinguish
|
||||
// "no plugin accepted this spec" from "this spec is empty".
|
||||
|
Loading…
Reference in New Issue
Block a user