From 6ef3de1d5f1d4b19c154ba15f2bd4686b6036f0c Mon Sep 17 00:00:00 2001 From: Ananya Kumar Date: Mon, 3 Aug 2015 17:28:33 -0700 Subject: [PATCH] Add QoS support on node --- cmd/kube-proxy/app/server.go | 7 +- cmd/kubelet/app/server.go | 7 +- contrib/mesos/pkg/executor/service/service.go | 4 +- pkg/kubelet/container_manager_linux.go | 4 +- pkg/kubelet/dockertools/docker_test.go | 3 +- pkg/kubelet/dockertools/fake_manager.go | 11 +- pkg/kubelet/dockertools/manager.go | 77 +++++--- pkg/kubelet/dockertools/manager_test.go | 2 + pkg/kubelet/kubelet.go | 18 +- pkg/kubelet/pod_workers_test.go | 5 +- pkg/kubelet/qos/doc.go | 25 +++ pkg/kubelet/qos/memory_policy.go | 75 +++++++ pkg/kubelet/qos/memory_policy_test.go | 187 ++++++++++++++++++ pkg/kubelet/runonce_test.go | 1 + pkg/util/oom/doc.go | 18 ++ pkg/util/oom/oom.go | 26 +++ pkg/util/oom/oom_fake.go | 34 ++++ pkg/util/oom/oom_linux.go | 112 +++++++++++ pkg/util/oom/oom_linux_test.go | 110 +++++++++++ pkg/util/oom/oom_unsupported.go | 40 ++++ pkg/util/procfs/doc.go | 18 ++ pkg/util/procfs/example_proc_cgroup | 10 + pkg/util/procfs/procfs.go | 54 +++++ pkg/util/procfs/procfs_fake.go | 30 +++ pkg/util/procfs/procfs_interface.go | 22 +++ pkg/util/procfs/procfs_test.go | 58 ++++++ pkg/util/util.go | 29 --- 27 files changed, 916 insertions(+), 71 deletions(-) create mode 100644 pkg/kubelet/qos/doc.go create mode 100644 pkg/kubelet/qos/memory_policy.go create mode 100644 pkg/kubelet/qos/memory_policy_test.go create mode 100644 pkg/util/oom/doc.go create mode 100644 pkg/util/oom/oom.go create mode 100644 pkg/util/oom/oom_fake.go create mode 100644 pkg/util/oom/oom_linux.go create mode 100644 pkg/util/oom/oom_linux_test.go create mode 100644 pkg/util/oom/oom_unsupported.go create mode 100644 pkg/util/procfs/doc.go create mode 100644 pkg/util/procfs/example_proc_cgroup create mode 100644 pkg/util/procfs/procfs.go create mode 100644 pkg/util/procfs/procfs_fake.go create mode 100644 pkg/util/procfs/procfs_interface.go create mode 100644 pkg/util/procfs/procfs_test.go diff --git a/cmd/kube-proxy/app/server.go b/cmd/kube-proxy/app/server.go index b4643620a78..accc4bb1191 100644 --- a/cmd/kube-proxy/app/server.go +++ b/cmd/kube-proxy/app/server.go @@ -28,11 +28,13 @@ import ( "k8s.io/kubernetes/pkg/client" "k8s.io/kubernetes/pkg/client/clientcmd" clientcmdapi "k8s.io/kubernetes/pkg/client/clientcmd/api" + "k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/proxy" "k8s.io/kubernetes/pkg/proxy/config" "k8s.io/kubernetes/pkg/util" "k8s.io/kubernetes/pkg/util/exec" "k8s.io/kubernetes/pkg/util/iptables" + "k8s.io/kubernetes/pkg/util/oom" "github.com/golang/glog" "github.com/spf13/pflag" @@ -56,7 +58,7 @@ func NewProxyServer() *ProxyServer { BindAddress: util.IP(net.ParseIP("0.0.0.0")), HealthzPort: 10249, HealthzBindAddress: util.IP(net.ParseIP("127.0.0.1")), - OOMScoreAdj: -899, + OOMScoreAdj: qos.KubeProxyOomScoreAdj, ResourceContainer: "/kube-proxy", } } @@ -76,7 +78,8 @@ func (s *ProxyServer) AddFlags(fs *pflag.FlagSet) { // Run runs the specified ProxyServer. This should never exit. func (s *ProxyServer) Run(_ []string) error { // TODO(vmarmol): Use container config for this. - if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { + oomAdjuster := oom.NewOomAdjuster() + if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { glog.V(2).Info(err) } diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 8ac5d642db1..a195d3e8fea 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -48,10 +48,12 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/dockertools" "k8s.io/kubernetes/pkg/kubelet/network" + "k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/master/ports" "k8s.io/kubernetes/pkg/util" "k8s.io/kubernetes/pkg/util/mount" nodeutil "k8s.io/kubernetes/pkg/util/node" + "k8s.io/kubernetes/pkg/util/oom" "k8s.io/kubernetes/pkg/volume" "github.com/golang/glog" @@ -167,7 +169,7 @@ func NewKubeletServer() *KubeletServer { HealthzPort: 10248, HealthzBindAddress: util.IP(net.ParseIP("127.0.0.1")), RegisterNode: true, // will be ignored if no apiserver is configured - OOMScoreAdj: -900, + OOMScoreAdj: qos.KubeletOomScoreAdj, MasterServiceNamespace: api.NamespaceDefault, ImageGCHighThresholdPercent: 90, ImageGCLowThresholdPercent: 80, @@ -400,7 +402,8 @@ func (s *KubeletServer) Run(kcfg *KubeletConfig) error { glog.V(2).Infof("Using root directory: %v", s.RootDirectory) // TODO(vmarmol): Do this through container config. - if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { + oomAdjuster := oom.NewOomAdjuster() + if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { glog.Warning(err) } diff --git a/contrib/mesos/pkg/executor/service/service.go b/contrib/mesos/pkg/executor/service/service.go index 8e42367c8c7..0833aad9d1f 100644 --- a/contrib/mesos/pkg/executor/service/service.go +++ b/contrib/mesos/pkg/executor/service/service.go @@ -47,6 +47,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/dockertools" "k8s.io/kubernetes/pkg/util" "k8s.io/kubernetes/pkg/util/mount" + "k8s.io/kubernetes/pkg/util/oom" "github.com/spf13/pflag" ) @@ -125,7 +126,8 @@ func (s *KubeletExecutorServer) syncExternalShutdownWatcher() (io.Closer, error) func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error { rand.Seed(time.Now().UTC().UnixNano()) - if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { + oomAdjuster := oom.NewOomAdjuster() + if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { log.Info(err) } diff --git a/pkg/kubelet/container_manager_linux.go b/pkg/kubelet/container_manager_linux.go index 79b76bb0497..768598c846d 100644 --- a/pkg/kubelet/container_manager_linux.go +++ b/pkg/kubelet/container_manager_linux.go @@ -35,6 +35,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/util" "k8s.io/kubernetes/pkg/util/errors" + "k8s.io/kubernetes/pkg/util/oom" ) const ( @@ -229,7 +230,8 @@ func ensureDockerInContainer(cadvisor cadvisor.Interface, oomScoreAdj int, manag } // Also apply oom_score_adj to processes - if err := util.ApplyOomScoreAdj(pid, oomScoreAdj); err != nil { + oomAdjuster := oom.NewOomAdjuster() + if err := oomAdjuster.ApplyOomScoreAdj(pid, oomScoreAdj); err != nil { errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d", oomScoreAdj, pid)) } } diff --git a/pkg/kubelet/dockertools/docker_test.go b/pkg/kubelet/dockertools/docker_test.go index 7a2b332ae2b..c5991cb2f22 100644 --- a/pkg/kubelet/dockertools/docker_test.go +++ b/pkg/kubelet/dockertools/docker_test.go @@ -27,6 +27,7 @@ import ( "github.com/docker/docker/pkg/jsonmessage" docker "github.com/fsouza/go-dockerclient" + cadvisorApi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/client/record" "k8s.io/kubernetes/pkg/credentialprovider" @@ -655,7 +656,7 @@ func TestFindContainersByPod(t *testing.T) { } fakeClient := &FakeDockerClient{} np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil)) - containerManager := NewFakeDockerManager(fakeClient, &record.FakeRecorder{}, nil, nil, PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, nil) + containerManager := NewFakeDockerManager(fakeClient, &record.FakeRecorder{}, nil, nil, &cadvisorApi.MachineInfo{}, PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, nil) for i, test := range tests { fakeClient.ContainerList = test.containerList fakeClient.ExitedContainerList = test.exitedContainerList diff --git a/pkg/kubelet/dockertools/fake_manager.go b/pkg/kubelet/dockertools/fake_manager.go index 95c54174bdb..386ae597f03 100644 --- a/pkg/kubelet/dockertools/fake_manager.go +++ b/pkg/kubelet/dockertools/fake_manager.go @@ -17,11 +17,14 @@ limitations under the License. package dockertools import ( + cadvisorApi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/client/record" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/network" "k8s.io/kubernetes/pkg/kubelet/prober" kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types" + "k8s.io/kubernetes/pkg/util/oom" + "k8s.io/kubernetes/pkg/util/procfs" ) func NewFakeDockerManager( @@ -29,6 +32,7 @@ func NewFakeDockerManager( recorder record.EventRecorder, readinessManager *kubecontainer.ReadinessManager, containerRefManager *kubecontainer.RefManager, + machineInfo *cadvisorApi.MachineInfo, podInfraContainerImage string, qps float32, burst int, @@ -39,8 +43,11 @@ func NewFakeDockerManager( httpClient kubeletTypes.HttpGetter, runtimeHooks kubecontainer.RuntimeHooks) *DockerManager { - dm := NewDockerManager(client, recorder, readinessManager, containerRefManager, podInfraContainerImage, qps, - burst, containerLogsDir, osInterface, networkPlugin, generator, httpClient, runtimeHooks, &NativeExecHandler{}) + fakeOomAdjuster := oom.NewFakeOomAdjuster() + fakeProcFs := procfs.NewFakeProcFs() + dm := NewDockerManager(client, recorder, readinessManager, containerRefManager, machineInfo, podInfraContainerImage, qps, + burst, containerLogsDir, osInterface, networkPlugin, generator, httpClient, runtimeHooks, &NativeExecHandler{}, + fakeOomAdjuster, fakeProcFs) dm.puller = &FakeDockerPuller{} dm.prober = prober.New(nil, readinessManager, containerRefManager, recorder) return dm diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index e16711184e0..a2ce208ca12 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -34,6 +34,7 @@ import ( docker "github.com/fsouza/go-dockerclient" "github.com/golang/glog" "github.com/golang/groupcache/lru" + cadvisorApi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/latest" "k8s.io/kubernetes/pkg/client/record" @@ -42,20 +43,17 @@ import ( "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/network" "k8s.io/kubernetes/pkg/kubelet/prober" + "k8s.io/kubernetes/pkg/kubelet/qos" kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/probe" "k8s.io/kubernetes/pkg/securitycontext" "k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/util" + "k8s.io/kubernetes/pkg/util/oom" + "k8s.io/kubernetes/pkg/util/procfs" ) const ( - // The oom_score_adj of the POD infrastructure container. The default is 0 for - // any other docker containers, so any value below that makes it *less* likely - // to get OOM killed. - podOomScoreAdj = -100 - userContainerOomScoreAdj = 0 - maxReasonCacheEntries = 200 kubernetesPodLabel = "io.kubernetes.pod.data" @@ -75,6 +73,7 @@ type DockerManager struct { readinessManager *kubecontainer.ReadinessManager containerRefManager *kubecontainer.RefManager os kubecontainer.OSInterface + machineInfo *cadvisorApi.MachineInfo // The image name of the pod infra container. podInfraContainerImage string @@ -114,6 +113,12 @@ type DockerManager struct { // Handler used to execute commands in containers. execHandler ExecHandler + + // Used to set OOM scores of processes. + oomAdjuster *oom.OomAdjuster + + // Get information from /proc mount. + procFs procfs.ProcFsInterface } func NewDockerManager( @@ -121,6 +126,7 @@ func NewDockerManager( recorder record.EventRecorder, readinessManager *kubecontainer.ReadinessManager, containerRefManager *kubecontainer.RefManager, + machineInfo *cadvisorApi.MachineInfo, podInfraContainerImage string, qps float32, burst int, @@ -130,7 +136,9 @@ func NewDockerManager( generator kubecontainer.RunContainerOptionsGenerator, httpClient kubeletTypes.HttpGetter, runtimeHooks kubecontainer.RuntimeHooks, - execHandler ExecHandler) *DockerManager { + execHandler ExecHandler, + oomAdjuster *oom.OomAdjuster, + procFs procfs.ProcFsInterface) *DockerManager { // Work out the location of the Docker runtime, defaulting to /var/lib/docker // if there are any problems. dockerRoot := "/var/lib/docker" @@ -164,11 +172,12 @@ func NewDockerManager( reasonCache := stringCache{cache: lru.New(maxReasonCacheEntries)} dm := &DockerManager{ - client: client, - recorder: recorder, - readinessManager: readinessManager, - containerRefManager: containerRefManager, - os: osInterface, + client: client, + recorder: recorder, + readinessManager: readinessManager, + containerRefManager: containerRefManager, + os: osInterface, + machineInfo: machineInfo, podInfraContainerImage: podInfraContainerImage, reasonCache: reasonCache, puller: newDockerPuller(client, qps, burst), @@ -179,6 +188,8 @@ func NewDockerManager( generator: generator, runtimeHooks: runtimeHooks, execHandler: execHandler, + oomAdjuster: oomAdjuster, + procFs: procFs, } dm.runner = lifecycle.NewHandlerRunner(httpClient, dm, dm) dm.prober = prober.New(dm, readinessManager, containerRefManager, recorder) @@ -1231,32 +1242,42 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe glog.Errorf("Failed to create symbolic link to the log file of pod %q container %q: %v", podFullName, container.Name, err) } - // Set OOM score of POD container to lower than those of the other containers - // which have OOM score 0 by default in the pod. This ensures that it is - // killed only as a last resort. + // Container information is used in adjusting OOM scores and adding ndots. containerInfo, err := dm.client.InspectContainer(string(id)) if err != nil { return "", err } - // Ensure the PID actually exists, else we'll move ourselves. if containerInfo.State.Pid == 0 { return "", fmt.Errorf("failed to get init PID for Docker container %q", string(id)) } + + // Set OOM score of the container based on the priority of the container. + // Processes in lower-priority pods should be killed first if the system runs out of memory. + // The main pod infrastructure container is considered high priority, since if it is killed the + // whole pod will die. + var oomScoreAdj int if container.Name == PodInfraContainerName { - util.ApplyOomScoreAdj(containerInfo.State.Pid, podOomScoreAdj) - // currently, Docker does not have a flag by which the ndots option can be passed. - // (A seperate issue has been filed with Docker to add a ndots flag) - // The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker. - // This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod. - // we modify it when the pause container is created since it is the first container created in the pod since it holds - // the networking namespace. - err = addNDotsOption(containerInfo.ResolvConfPath) + oomScoreAdj = qos.PodInfraOomAdj } else { - // Children processes of docker daemon will inheritant the OOM score from docker - // daemon process. We explicitly apply OOM score 0 by default to the user - // containers to avoid daemons or POD containers are killed by oom killer. - util.ApplyOomScoreAdj(containerInfo.State.Pid, userContainerOomScoreAdj) + oomScoreAdj = qos.GetContainerOomScoreAdjust(container, dm.machineInfo.MemoryCapacity) + } + cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid) + if err != nil { + return "", err + } + if err = dm.oomAdjuster.ApplyOomScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil { + return "", err + } + + // currently, Docker does not have a flag by which the ndots option can be passed. + // (A seperate issue has been filed with Docker to add a ndots flag) + // The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker. + // This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod. + // we modify it when the pause container is created since it is the first container created in the pod since it holds + // the networking namespace. + if container.Name == PodInfraContainerName { + err = addNDotsOption(containerInfo.ResolvConfPath) } return kubeletTypes.DockerID(id), err diff --git a/pkg/kubelet/dockertools/manager_test.go b/pkg/kubelet/dockertools/manager_test.go index 096d1e4efee..c0cd6f78ba9 100644 --- a/pkg/kubelet/dockertools/manager_test.go +++ b/pkg/kubelet/dockertools/manager_test.go @@ -31,6 +31,7 @@ import ( "time" docker "github.com/fsouza/go-dockerclient" + cadvisorApi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/testapi" "k8s.io/kubernetes/pkg/client/record" @@ -114,6 +115,7 @@ func newTestDockerManagerWithHTTPClient(fakeHTTPClient *fakeHTTP) (*DockerManage fakeRecorder, readinessManager, containerRefManager, + &cadvisorApi.MachineInfo{}, PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 179a89200cc..27dcb2343fa 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -34,6 +34,7 @@ import ( "time" "github.com/golang/glog" + cadvisorApi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/api" apierrors "k8s.io/kubernetes/pkg/api/errors" "k8s.io/kubernetes/pkg/api/resource" @@ -60,13 +61,13 @@ import ( utilErrors "k8s.io/kubernetes/pkg/util/errors" "k8s.io/kubernetes/pkg/util/mount" nodeutil "k8s.io/kubernetes/pkg/util/node" + "k8s.io/kubernetes/pkg/util/oom" + "k8s.io/kubernetes/pkg/util/procfs" "k8s.io/kubernetes/pkg/version" "k8s.io/kubernetes/pkg/volume" "k8s.io/kubernetes/pkg/watch" "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates" "k8s.io/kubernetes/third_party/golang/expansion" - - cadvisorApi "github.com/google/cadvisor/info/v1" ) const ( @@ -274,6 +275,14 @@ func NewMainKubelet( klet.networkPlugin = plug } + machineInfo, err := klet.GetCachedMachineInfo() + if err != nil { + return nil, err + } + + oomAdjuster := oom.NewOomAdjuster() + procFs := procfs.NewProcFs() + // Initialize the runtime. switch containerRuntime { case "docker": @@ -283,6 +292,7 @@ func NewMainKubelet( recorder, readinessManager, containerRefManager, + machineInfo, podInfraContainerImage, pullQPS, pullBurst, @@ -292,7 +302,9 @@ func NewMainKubelet( klet, klet.httpClient, newKubeletRuntimeHooks(recorder), - dockerExecHandler) + dockerExecHandler, + oomAdjuster, + procFs) case "rkt": conf := &rkt.Config{InsecureSkipVerify: true} rktRuntime, err := rkt.New( diff --git a/pkg/kubelet/pod_workers_test.go b/pkg/kubelet/pod_workers_test.go index cbb86e794ca..8bc1e2ccba6 100644 --- a/pkg/kubelet/pod_workers_test.go +++ b/pkg/kubelet/pod_workers_test.go @@ -24,6 +24,7 @@ import ( "time" docker "github.com/fsouza/go-dockerclient" + cadvisorApi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/client/record" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -44,7 +45,7 @@ func newPod(uid, name string) *api.Pod { func createFakeRuntimeCache(fakeRecorder *record.FakeRecorder) kubecontainer.RuntimeCache { fakeDocker := &dockertools.FakeDockerClient{} np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil)) - dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder)) + dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, &cadvisorApi.MachineInfo{}, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder)) return kubecontainer.NewFakeRuntimeCache(dockerManager) } @@ -224,7 +225,7 @@ func TestFakePodWorkers(t *testing.T) { fakeDocker := &dockertools.FakeDockerClient{} fakeRecorder := &record.FakeRecorder{} np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil)) - dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder)) + dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, &cadvisorApi.MachineInfo{}, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder)) fakeRuntimeCache := kubecontainer.NewFakeRuntimeCache(dockerManager) kubeletForRealWorkers := &simpleFakeKubelet{} diff --git a/pkg/kubelet/qos/doc.go b/pkg/kubelet/qos/doc.go new file mode 100644 index 00000000000..04a25c90796 --- /dev/null +++ b/pkg/kubelet/qos/doc.go @@ -0,0 +1,25 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// package qos contains helper functions for quality of service. +// For each resource (memory, CPU) Kubelet supports three classes of containers. +// Memory guaranteed containers will receive the highest priority and will get all the resources +// they need. +// Burstable containers will be guaranteed their request and can “burst” and use more resources +// when available. +// Best-Effort containers, which don’t specify a request, can use resources only if not being used +// by other pods. +package qos diff --git a/pkg/kubelet/qos/memory_policy.go b/pkg/kubelet/qos/memory_policy.go new file mode 100644 index 00000000000..ac542407a90 --- /dev/null +++ b/pkg/kubelet/qos/memory_policy.go @@ -0,0 +1,75 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package qos + +import ( + "k8s.io/kubernetes/pkg/api" +) + +const ( + PodInfraOomAdj int = -999 + KubeletOomScoreAdj int = -999 + KubeProxyOomScoreAdj int = -999 +) + +// isMemoryBestEffort returns true if the container's memory requirements are best-effort. +func isMemoryBestEffort(container *api.Container) bool { + // A container is memory best-effort if its memory request is unspecified or 0. + // If a request is specified, then the user expects some kind of resource guarantee. + return container.Resources.Requests.Memory().Value() == 0 +} + +// isMemoryGuaranteed returns true if the container's memory requirements are Guaranteed. +func isMemoryGuaranteed(container *api.Container) bool { + // A container is memory guaranteed if its memory request == memory limit. + // If memory request == memory limit, the user is very confident of resource consumption. + memoryRequestValue := container.Resources.Requests.Memory().Value() + memoryLimitValue := container.Resources.Limits.Memory().Value() + return memoryRequestValue == memoryLimitValue && memoryRequestValue != 0 +} + +// GetContainerOomAdjust returns the amount by which the OOM score of all processes in the +// container should be adjusted. The OOM score of a process is the percentage of memory it consumes +// multiplied by 100 (barring exceptional cases) + a configurable quantity which is between -1000 +// and 1000. Containers with higher OOM scores are killed if the system runs out of memory. +// See https://lwn.net/Articles/391222/ for more information. +func GetContainerOomScoreAdjust(container *api.Container, memoryCapacity int64) int { + if isMemoryGuaranteed(container) { + // Memory guaranteed containers should be the last to get killed. + return -999 + } else if isMemoryBestEffort(container) { + // Memory best-effort containers should be the first to be killed. + return 1000 + } else { + // Burstable containers are a middle tier, between Guaranteed and Best-Effort. Ideally, + // we want to protect Burstable containers that consume less memory than requested. + // The formula below is a heuristic. A container requesting for 10% of a system's + // memory will have an oom score adjust of 900. If a process in container Y + // uses over 10% of memory, its OOM score will be 1000. The idea is that containers + // which use more than their request will have an OOM score of 1000 and will be prime + // targets for OOM kills. + // Note that this is a heuristic, it won't work if a container has many small processes. + memoryRequest := container.Resources.Requests.Memory().Value() + oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity + // A memory guaranteed container using 100% of memory can have an OOM score of 1. Ensure + // that memory burstable containers have a higher OOM score. + if oomScoreAdjust < 2 { + return 2 + } + return int(oomScoreAdjust) + } +} diff --git a/pkg/kubelet/qos/memory_policy_test.go b/pkg/kubelet/qos/memory_policy_test.go new file mode 100644 index 00000000000..bb0ddea210b --- /dev/null +++ b/pkg/kubelet/qos/memory_policy_test.go @@ -0,0 +1,187 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package qos + +import ( + "strconv" + "testing" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" +) + +const ( + standardMemoryAmount = 8000000000 +) + +var ( + zeroRequestMemoryBestEffort = api.Container{ + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("5m"), + api.ResourceName(api.ResourceMemory): resource.MustParse("0G"), + }, + Limits: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("5m"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + } + + edgeMemoryBestEffort = api.Container{ + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceName(api.ResourceMemory): resource.MustParse("0G"), + }, + Limits: api.ResourceList{ + api.ResourceName(api.ResourceMemory): resource.MustParse("0G"), + }, + }, + } + + noRequestMemoryBestEffort = api.Container{ + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + } + + noLimitMemoryBestEffort = api.Container{} + + memoryGuaranteed = api.Container{ + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + Limits: api.ResourceList{ + api.ResourceName(api.ResourceCPU): resource.MustParse("5m"), + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + } + + memoryBurstable = api.Container{ + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount / 2)), + }, + Limits: api.ResourceList{ + api.ResourceName(api.ResourceMemory): resource.MustParse("10G"), + }, + }, + } + + memoryBurstableNoLimit = api.Container{ + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount - 1)), + }, + }, + } +) + +func TestIsMemoryBestEffort(t *testing.T) { + validCases := []api.Container{zeroRequestMemoryBestEffort, noRequestMemoryBestEffort, noLimitMemoryBestEffort, edgeMemoryBestEffort} + for _, container := range validCases { + if !isMemoryBestEffort(&container) { + t.Errorf("container %+v is memory best-effort", container) + } + } + invalidCases := []api.Container{memoryGuaranteed, memoryBurstable} + for _, container := range invalidCases { + if isMemoryBestEffort(&container) { + t.Errorf("container %+v is not memory best-effort", container) + } + } +} + +func TestIsMemoryGuaranteed(t *testing.T) { + validCases := []api.Container{memoryGuaranteed} + for _, container := range validCases { + if !isMemoryGuaranteed(&container) { + t.Errorf("container %+v is memory guaranteed", container) + } + } + invalidCases := []api.Container{zeroRequestMemoryBestEffort, noRequestMemoryBestEffort, noLimitMemoryBestEffort, edgeMemoryBestEffort, memoryBurstable} + for _, container := range invalidCases { + if isMemoryGuaranteed(&container) { + t.Errorf("container %+v is not memory guaranteed", container) + } + } +} + +type oomTest struct { + container *api.Container + memoryCapacity int64 + lowOomScoreAdj int // The max oom_score_adj score the container should be assigned. + highOomScoreAdj int // The min oom_score_adj score the container should be assigned. +} + +func TestGetContainerOomScoreAdjust(t *testing.T) { + + oomTests := []oomTest{ + { + container: &zeroRequestMemoryBestEffort, + memoryCapacity: 4000000000, + lowOomScoreAdj: 1000, + highOomScoreAdj: 1000, + }, + { + container: &edgeMemoryBestEffort, + memoryCapacity: 8000000000, + lowOomScoreAdj: 1000, + highOomScoreAdj: 1000, + }, + { + container: &noRequestMemoryBestEffort, + memoryCapacity: 7230457451, + lowOomScoreAdj: 1000, + highOomScoreAdj: 1000, + }, + { + container: &noLimitMemoryBestEffort, + memoryCapacity: 4000000000, + lowOomScoreAdj: 1000, + highOomScoreAdj: 1000, + }, + { + container: &memoryGuaranteed, + memoryCapacity: 123456789, + lowOomScoreAdj: -999, + highOomScoreAdj: -999, + }, + { + container: &memoryBurstable, + memoryCapacity: standardMemoryAmount, + lowOomScoreAdj: 495, + highOomScoreAdj: 505, + }, + { + container: &memoryBurstableNoLimit, + memoryCapacity: standardMemoryAmount, + lowOomScoreAdj: 2, + highOomScoreAdj: 2, + }, + } + for _, test := range oomTests { + oomScoreAdj := GetContainerOomScoreAdjust(test.container, test.memoryCapacity) + if oomScoreAdj < test.lowOomScoreAdj || oomScoreAdj > test.highOomScoreAdj { + t.Errorf("oom_score_adj should be between %d and %d, but was %d", test.lowOomScoreAdj, test.highOomScoreAdj, oomScoreAdj) + } + } +} diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index 89b311b85d7..d9fec8fc531 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -154,6 +154,7 @@ func TestRunOnce(t *testing.T) { kb.recorder, kb.readinessManager, kb.containerRefManager, + &cadvisorApi.MachineInfo{}, dockertools.PodInfraContainerImage, 0, 0, diff --git a/pkg/util/oom/doc.go b/pkg/util/oom/doc.go new file mode 100644 index 00000000000..539d6c43b29 --- /dev/null +++ b/pkg/util/oom/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package oom implements utility functions relating to out of memory management. +package oom diff --git a/pkg/util/oom/oom.go b/pkg/util/oom/oom.go new file mode 100644 index 00000000000..b3400bda180 --- /dev/null +++ b/pkg/util/oom/oom.go @@ -0,0 +1,26 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package oom + +// This is a struct instead of an interface to allow injection of process ID listers and +// applying OOM score in tests. +// TODO: make this an interface, and inject a mock ioutil struct for testing. +type OomAdjuster struct { + pidLister func(cgroupName string) ([]int, error) + ApplyOomScoreAdj func(pid int, oomScoreAdj int) error + ApplyOomScoreAdjContainer func(cgroupName string, oomScoreAdj, maxTries int) error +} diff --git a/pkg/util/oom/oom_fake.go b/pkg/util/oom/oom_fake.go new file mode 100644 index 00000000000..e28d3a6d982 --- /dev/null +++ b/pkg/util/oom/oom_fake.go @@ -0,0 +1,34 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package oom + +type FakeOomAdjuster struct{} + +func NewFakeOomAdjuster() *OomAdjuster { + return &OomAdjuster{ + ApplyOomScoreAdj: fakeApplyOomScoreAdj, + ApplyOomScoreAdjContainer: fakeApplyOomScoreAdjContainer, + } +} + +func fakeApplyOomScoreAdj(pid int, oomScoreAdj int) error { + return nil +} + +func fakeApplyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error { + return nil +} diff --git a/pkg/util/oom/oom_linux.go b/pkg/util/oom/oom_linux.go new file mode 100644 index 00000000000..1e765828b69 --- /dev/null +++ b/pkg/util/oom/oom_linux.go @@ -0,0 +1,112 @@ +// +build cgo,linux + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package oom + +import ( + "fmt" + "io/ioutil" + "path" + "strconv" + + "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/configs" + "github.com/golang/glog" +) + +func NewOomAdjuster() *OomAdjuster { + oomAdjuster := &OomAdjuster{ + pidLister: getPids, + ApplyOomScoreAdj: applyOomScoreAdj, + } + oomAdjuster.ApplyOomScoreAdjContainer = oomAdjuster.applyOomScoreAdjContainer + return oomAdjuster +} + +func getPids(cgroupName string) ([]int, error) { + fsManager := fs.Manager{ + Cgroups: &configs.Cgroup{ + Name: cgroupName, + }, + } + return fsManager.GetPids() +} + +// Writes 'value' to /proc//oom_score_adj. PID = 0 means self +func applyOomScoreAdj(pid int, oomScoreAdj int) error { + if pid < 0 { + return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid) + } + + var pidStr string + if pid == 0 { + pidStr = "self" + } else { + pidStr = strconv.Itoa(pid) + } + + oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj") + maxTries := 2 + var err error + for i := 0; i < maxTries; i++ { + _, readErr := ioutil.ReadFile(oomScoreAdjPath) + if readErr != nil { + err = fmt.Errorf("failed to read oom_score_adj: %v", readErr) + } else if writeErr := ioutil.WriteFile(oomScoreAdjPath, []byte(strconv.Itoa(oomScoreAdj)), 0700); writeErr != nil { + err = fmt.Errorf("failed to set oom_score_adj to %d: %v", oomScoreAdj, writeErr) + } else { + return nil + } + } + + return err +} + +// Writes 'value' to /proc//oom_score_adj for all processes in cgroup cgroupName. +// Keeps trying to write until the process list of the cgroup stabilizes, or until maxTries tries. +func (oomAdjuster *OomAdjuster) applyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error { + adjustedProcessSet := make(map[int]bool) + for i := 0; i < maxTries; i++ { + continueAdjusting := false + pidList, err := oomAdjuster.pidLister(cgroupName) + if err != nil { + continueAdjusting = true + glog.Errorf("Error getting process list for cgroup %s: %+v", cgroupName, err) + } else if len(pidList) == 0 { + continueAdjusting = true + } else { + for _, pid := range pidList { + if !adjustedProcessSet[pid] { + continueAdjusting = true + if err = oomAdjuster.ApplyOomScoreAdj(pid, oomScoreAdj); err == nil { + adjustedProcessSet[pid] = true + } + } + } + } + if !continueAdjusting { + return nil + } + // There's a slight race. A process might have forked just before we write its OOM score adjust. + // The fork might copy the parent process's old OOM score, then this function might execute and + // update the parent's OOM score, but the forked process id might not be reflected in cgroup.procs + // for a short amount of time. So this function might return without changing the forked process's + // OOM score. Very unlikely race, so ignoring this for now. + } + return fmt.Errorf("exceeded maxTries, some processes might not have desired OOM score") +} diff --git a/pkg/util/oom/oom_linux_test.go b/pkg/util/oom/oom_linux_test.go new file mode 100644 index 00000000000..fbe58bddc4f --- /dev/null +++ b/pkg/util/oom/oom_linux_test.go @@ -0,0 +1,110 @@ +// +build cgo,linux + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package oom + +import ( + "testing" +) + +// Converts a sequence of PID lists into a PID lister. +// The PID lister returns pidListSequence[i] on the ith call. If i >= length of pidListSequence +// then return the last element of pidListSequence (the sequence is considered to have) stabilized. +func sequenceToPidLister(pidListSequence [][]int) func(string) ([]int, error) { + var numCalls int + return func(cgroupName string) ([]int, error) { + numCalls++ + if len(pidListSequence) == 0 { + return []int{}, nil + } else if numCalls > len(pidListSequence) { + return pidListSequence[len(pidListSequence)-1], nil + } + return pidListSequence[numCalls-1], nil + } +} + +// Tests that applyOomScoreAdjContainer correctly applies OOM scores to relevant processes, or +// returns the right error. +func applyOomScoreAdjContainerTester(pidListSequence [][]int, maxTries int, appliedPids []int, expectedError bool, t *testing.T) { + pidOoms := make(map[int]bool) + + // Mock ApplyOomScoreAdj and pidLister. + oomAdjuster := NewOomAdjuster() + oomAdjuster.ApplyOomScoreAdj = func(pid int, oomScoreAdj int) error { + pidOoms[pid] = true + return nil + } + oomAdjuster.pidLister = sequenceToPidLister(pidListSequence) + err := oomAdjuster.ApplyOomScoreAdjContainer("", 100, maxTries) + + // Check error value. + if expectedError && err == nil { + t.Errorf("Expected error %+v when running ApplyOomScoreAdjContainer but got no error", expectedError) + return + } else if !expectedError && err != nil { + t.Errorf("Expected no error but got error %+v when running ApplyOomScoreAdjContainer", err) + return + } else if err != nil { + return + } + + // Check that OOM scores were applied to the right processes. + if len(appliedPids) != len(pidOoms) { + t.Errorf("Applied OOM scores to incorrect number of processes") + return + } + for _, pid := range appliedPids { + if !pidOoms[pid] { + t.Errorf("Failed to apply OOM scores to process %d", pid) + } + } +} + +func TestOomScoreAdjContainer(t *testing.T) { + pidListSequenceEmpty := [][]int{} + applyOomScoreAdjContainerTester(pidListSequenceEmpty, 3, nil, true, t) + + pidListSequence1 := [][]int{ + {1, 2}, + } + applyOomScoreAdjContainerTester(pidListSequence1, 1, nil, true, t) + applyOomScoreAdjContainerTester(pidListSequence1, 2, []int{1, 2}, false, t) + applyOomScoreAdjContainerTester(pidListSequence1, 3, []int{1, 2}, false, t) + + pidListSequence3 := [][]int{ + {1, 2}, + {1, 2, 4, 5}, + {2, 1, 4, 5, 3}, + } + applyOomScoreAdjContainerTester(pidListSequence3, 1, nil, true, t) + applyOomScoreAdjContainerTester(pidListSequence3, 2, nil, true, t) + applyOomScoreAdjContainerTester(pidListSequence3, 3, nil, true, t) + applyOomScoreAdjContainerTester(pidListSequence3, 4, []int{1, 2, 3, 4, 5}, false, t) + + pidListSequenceLag := [][]int{ + {}, + {}, + {}, + {1, 2, 4}, + {1, 2, 4, 5}, + } + for i := 1; i < 5; i++ { + applyOomScoreAdjContainerTester(pidListSequenceLag, i, nil, true, t) + } + applyOomScoreAdjContainerTester(pidListSequenceLag, 6, []int{1, 2, 4, 5}, false, t) +} diff --git a/pkg/util/oom/oom_unsupported.go b/pkg/util/oom/oom_unsupported.go new file mode 100644 index 00000000000..f44d527f88f --- /dev/null +++ b/pkg/util/oom/oom_unsupported.go @@ -0,0 +1,40 @@ +// +build !cgo !linux + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package oom + +import ( + "errors" +) + +var unsupportedErr = errors.New("setting OOM scores is unsupported in this build") + +func NewOomAdjuster() *OomAdjuster { + return &OomAdjuster{ + ApplyOomScoreAdj: unsupportedApplyOomScoreAdj, + ApplyOomScoreAdjContainer: unsupportedApplyOomScoreAdjContainer, + } +} + +func unsupportedApplyOomScoreAdj(pid int, oomScoreAdj int) error { + return unsupportedErr +} + +func unsupportedApplyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error { + return unsupportedErr +} diff --git a/pkg/util/procfs/doc.go b/pkg/util/procfs/doc.go new file mode 100644 index 00000000000..d94e6687c1f --- /dev/null +++ b/pkg/util/procfs/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package procfs implements utility functions relating to the /proc mount. +package procfs diff --git a/pkg/util/procfs/example_proc_cgroup b/pkg/util/procfs/example_proc_cgroup new file mode 100644 index 00000000000..3e42ce92751 --- /dev/null +++ b/pkg/util/procfs/example_proc_cgroup @@ -0,0 +1,10 @@ +11:name=systemd:/user/1000.user/c1.session +10:hugetlb:/user/1000.user/c1.session +9:perf_event:/user/1000.user/c1.session +8:blkio:/user/1000.user/c1.session +7:freezer:/user/1000.user/c1.session +6:devices:/user/1000.user/c1.session +5:memory:/user/1000.user/c1.session +4:cpuacct:/user/1000.user/c1.session +3:cpu:/user/1000.user/c1.session +2:cpuset:/ \ No newline at end of file diff --git a/pkg/util/procfs/procfs.go b/pkg/util/procfs/procfs.go new file mode 100644 index 00000000000..22dd1470ccd --- /dev/null +++ b/pkg/util/procfs/procfs.go @@ -0,0 +1,54 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package procfs + +import ( + "fmt" + "io/ioutil" + "path" + "strconv" + "strings" +) + +type ProcFs struct{} + +func NewProcFs() ProcFsInterface { + return &ProcFs{} +} + +func containerNameFromProcCgroup(content string) (string, error) { + lines := strings.Split(content, "\n") + for _, line := range lines { + entries := strings.SplitN(line, ":", 3) + if len(entries) == 3 && entries[1] == "devices" { + return strings.TrimSpace(entries[2]), nil + } + } + return "", fmt.Errorf("could not find devices cgroup location") +} + +// getFullContainerName gets the container name given the root process id of the container. +// Eg. If the devices cgroup for the container is stored in /sys/fs/cgroup/devices/docker/nginx, +// return docker/nginx. Assumes that the process is part of exactly one cgroup hierarchy. +func (pfs *ProcFs) GetFullContainerName(pid int) (string, error) { + filePath := path.Join("/proc", strconv.Itoa(pid), "cgroup") + content, err := ioutil.ReadFile(filePath) + if err != nil { + return "", err + } + return containerNameFromProcCgroup(string(content)) +} diff --git a/pkg/util/procfs/procfs_fake.go b/pkg/util/procfs/procfs_fake.go new file mode 100644 index 00000000000..b002a9d98c5 --- /dev/null +++ b/pkg/util/procfs/procfs_fake.go @@ -0,0 +1,30 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package procfs + +type FakeProcFs struct{} + +func NewFakeProcFs() ProcFsInterface { + return &FakeProcFs{} +} + +// getFullContainerName gets the container name given the root process id of the container. +// Eg. If the devices cgroup for the container is stored in /sys/fs/cgroup/devices/docker/nginx, +// return docker/nginx. Assumes that the process is part of exactly one cgroup hierarchy. +func (fakePfs *FakeProcFs) GetFullContainerName(pid int) (string, error) { + return "", nil +} diff --git a/pkg/util/procfs/procfs_interface.go b/pkg/util/procfs/procfs_interface.go new file mode 100644 index 00000000000..b776443dbfe --- /dev/null +++ b/pkg/util/procfs/procfs_interface.go @@ -0,0 +1,22 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package procfs + +type ProcFsInterface interface { + // getFullContainerName gets the container name given the root process id of the container. + GetFullContainerName(pid int) (string, error) +} diff --git a/pkg/util/procfs/procfs_test.go b/pkg/util/procfs/procfs_test.go new file mode 100644 index 00000000000..609543dca7d --- /dev/null +++ b/pkg/util/procfs/procfs_test.go @@ -0,0 +1,58 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package procfs + +import ( + "io/ioutil" + "testing" +) + +func verifyContainerName(procCgroupText, expectedName string, expectedErr bool, t *testing.T) { + name, err := containerNameFromProcCgroup(procCgroupText) + if expectedErr && err == nil { + t.Errorf("Expected error but did not get error in verifyContainerName") + return + } else if !expectedErr && err != nil { + t.Errorf("Expected no error, but got error %+v in verifyContainerName", err) + return + } else if expectedErr { + return + } + if name != expectedName { + t.Errorf("Expected container name %s but got name %s", expectedName, name) + } +} + +func TestContainerNameFromProcCgroup(t *testing.T) { + procCgroupValid := "2:devices:docker/kubelet" + verifyContainerName(procCgroupValid, "docker/kubelet", false, t) + + procCgroupEmpty := "" + verifyContainerName(procCgroupEmpty, "", true, t) + + content, err := ioutil.ReadFile("example_proc_cgroup") + if err != nil { + t.Errorf("Could not read example /proc cgroup file") + } + verifyContainerName(string(content), "/user/1000.user/c1.session", false, t) + + procCgroupNoDevice := "2:freezer:docker/kubelet\n5:cpuacct:pkg/kubectl" + verifyContainerName(procCgroupNoDevice, "", true, t) + + procCgroupInvalid := "devices:docker/kubelet\ncpuacct:pkg/kubectl" + verifyContainerName(procCgroupInvalid, "", true, t) +} diff --git a/pkg/util/util.go b/pkg/util/util.go index 36aee047e91..07a8762d0b8 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -22,7 +22,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "net" "net/http" "os" @@ -212,34 +211,6 @@ func UsingSystemdInitSystem() bool { return false } -// Writes 'value' to /proc//oom_score_adj. PID = 0 means self -func ApplyOomScoreAdj(pid int, value int) error { - if value < -1000 || value > 1000 { - return fmt.Errorf("invalid value(%d) specified for oom_score_adj. Values must be within the range [-1000, 1000]", value) - } - if pid < 0 { - return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid) - } - - var pidStr string - if pid == 0 { - pidStr = "self" - } else { - pidStr = strconv.Itoa(pid) - } - - oom_value, err := ioutil.ReadFile(path.Join("/proc", pidStr, "oom_score_adj")) - if err != nil { - return fmt.Errorf("failed to read oom_score_adj: %v", err) - } else if string(oom_value) != strconv.Itoa(value) { - if err := ioutil.WriteFile(path.Join("/proc", pidStr, "oom_score_adj"), []byte(strconv.Itoa(value)), 0700); err != nil { - return fmt.Errorf("failed to set oom_score_adj to %d: %v", value, err) - } - } - - return nil -} - // Tests whether all pointer fields in a struct are nil. This is useful when, // for example, an API struct is handled by plugins which need to distinguish // "no plugin accepted this spec" from "this spec is empty".