Merge pull request #12182 from AnanyaKumar/qos-node

Add QoS support on node
This commit is contained in:
Satnam Singh 2015-08-07 14:27:02 -07:00
commit 950ec96db0
27 changed files with 916 additions and 71 deletions

View File

@ -28,11 +28,13 @@ import (
"k8s.io/kubernetes/pkg/client"
"k8s.io/kubernetes/pkg/client/clientcmd"
clientcmdapi "k8s.io/kubernetes/pkg/client/clientcmd/api"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/proxy"
"k8s.io/kubernetes/pkg/proxy/config"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/exec"
"k8s.io/kubernetes/pkg/util/iptables"
"k8s.io/kubernetes/pkg/util/oom"
"github.com/golang/glog"
"github.com/spf13/pflag"
@ -56,7 +58,7 @@ func NewProxyServer() *ProxyServer {
BindAddress: util.IP(net.ParseIP("0.0.0.0")),
HealthzPort: 10249,
HealthzBindAddress: util.IP(net.ParseIP("127.0.0.1")),
OOMScoreAdj: -899,
OOMScoreAdj: qos.KubeProxyOomScoreAdj,
ResourceContainer: "/kube-proxy",
}
}
@ -76,7 +78,8 @@ func (s *ProxyServer) AddFlags(fs *pflag.FlagSet) {
// Run runs the specified ProxyServer. This should never exit.
func (s *ProxyServer) Run(_ []string) error {
// TODO(vmarmol): Use container config for this.
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
oomAdjuster := oom.NewOomAdjuster()
if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
glog.V(2).Info(err)
}

View File

@ -48,10 +48,12 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/master/ports"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/mount"
nodeutil "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/volume"
"github.com/golang/glog"
@ -167,7 +169,7 @@ func NewKubeletServer() *KubeletServer {
HealthzPort: 10248,
HealthzBindAddress: util.IP(net.ParseIP("127.0.0.1")),
RegisterNode: true, // will be ignored if no apiserver is configured
OOMScoreAdj: -900,
OOMScoreAdj: qos.KubeletOomScoreAdj,
MasterServiceNamespace: api.NamespaceDefault,
ImageGCHighThresholdPercent: 90,
ImageGCLowThresholdPercent: 80,
@ -400,7 +402,8 @@ func (s *KubeletServer) Run(kcfg *KubeletConfig) error {
glog.V(2).Infof("Using root directory: %v", s.RootDirectory)
// TODO(vmarmol): Do this through container config.
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
oomAdjuster := oom.NewOomAdjuster()
if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
glog.Warning(err)
}

View File

@ -47,6 +47,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/util/oom"
"github.com/spf13/pflag"
)
@ -125,7 +126,8 @@ func (s *KubeletExecutorServer) syncExternalShutdownWatcher() (io.Closer, error)
func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
rand.Seed(time.Now().UTC().UnixNano())
if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
oomAdjuster := oom.NewOomAdjuster()
if err := oomAdjuster.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil {
log.Info(err)
}

View File

@ -35,6 +35,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/errors"
"k8s.io/kubernetes/pkg/util/oom"
)
const (
@ -229,7 +230,8 @@ func ensureDockerInContainer(cadvisor cadvisor.Interface, oomScoreAdj int, manag
}
// Also apply oom_score_adj to processes
if err := util.ApplyOomScoreAdj(pid, oomScoreAdj); err != nil {
oomAdjuster := oom.NewOomAdjuster()
if err := oomAdjuster.ApplyOomScoreAdj(pid, oomScoreAdj); err != nil {
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d", oomScoreAdj, pid))
}
}

View File

@ -27,6 +27,7 @@ import (
"github.com/docker/docker/pkg/jsonmessage"
docker "github.com/fsouza/go-dockerclient"
cadvisorApi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/credentialprovider"
@ -655,7 +656,7 @@ func TestFindContainersByPod(t *testing.T) {
}
fakeClient := &FakeDockerClient{}
np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil))
containerManager := NewFakeDockerManager(fakeClient, &record.FakeRecorder{}, nil, nil, PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, nil)
containerManager := NewFakeDockerManager(fakeClient, &record.FakeRecorder{}, nil, nil, &cadvisorApi.MachineInfo{}, PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, nil)
for i, test := range tests {
fakeClient.ContainerList = test.containerList
fakeClient.ExitedContainerList = test.exitedContainerList

View File

@ -17,11 +17,14 @@ limitations under the License.
package dockertools
import (
cadvisorApi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/client/record"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/prober"
kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/procfs"
)
func NewFakeDockerManager(
@ -29,6 +32,7 @@ func NewFakeDockerManager(
recorder record.EventRecorder,
readinessManager *kubecontainer.ReadinessManager,
containerRefManager *kubecontainer.RefManager,
machineInfo *cadvisorApi.MachineInfo,
podInfraContainerImage string,
qps float32,
burst int,
@ -39,8 +43,11 @@ func NewFakeDockerManager(
httpClient kubeletTypes.HttpGetter,
runtimeHooks kubecontainer.RuntimeHooks) *DockerManager {
dm := NewDockerManager(client, recorder, readinessManager, containerRefManager, podInfraContainerImage, qps,
burst, containerLogsDir, osInterface, networkPlugin, generator, httpClient, runtimeHooks, &NativeExecHandler{})
fakeOomAdjuster := oom.NewFakeOomAdjuster()
fakeProcFs := procfs.NewFakeProcFs()
dm := NewDockerManager(client, recorder, readinessManager, containerRefManager, machineInfo, podInfraContainerImage, qps,
burst, containerLogsDir, osInterface, networkPlugin, generator, httpClient, runtimeHooks, &NativeExecHandler{},
fakeOomAdjuster, fakeProcFs)
dm.puller = &FakeDockerPuller{}
dm.prober = prober.New(nil, readinessManager, containerRefManager, recorder)
return dm

View File

@ -34,6 +34,7 @@ import (
docker "github.com/fsouza/go-dockerclient"
"github.com/golang/glog"
"github.com/golang/groupcache/lru"
cadvisorApi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/latest"
"k8s.io/kubernetes/pkg/client/record"
@ -42,20 +43,17 @@ import (
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/prober"
"k8s.io/kubernetes/pkg/kubelet/qos"
kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/probe"
"k8s.io/kubernetes/pkg/securitycontext"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/procfs"
)
const (
// The oom_score_adj of the POD infrastructure container. The default is 0 for
// any other docker containers, so any value below that makes it *less* likely
// to get OOM killed.
podOomScoreAdj = -100
userContainerOomScoreAdj = 0
maxReasonCacheEntries = 200
kubernetesPodLabel = "io.kubernetes.pod.data"
@ -75,6 +73,7 @@ type DockerManager struct {
readinessManager *kubecontainer.ReadinessManager
containerRefManager *kubecontainer.RefManager
os kubecontainer.OSInterface
machineInfo *cadvisorApi.MachineInfo
// The image name of the pod infra container.
podInfraContainerImage string
@ -114,6 +113,12 @@ type DockerManager struct {
// Handler used to execute commands in containers.
execHandler ExecHandler
// Used to set OOM scores of processes.
oomAdjuster *oom.OomAdjuster
// Get information from /proc mount.
procFs procfs.ProcFsInterface
}
func NewDockerManager(
@ -121,6 +126,7 @@ func NewDockerManager(
recorder record.EventRecorder,
readinessManager *kubecontainer.ReadinessManager,
containerRefManager *kubecontainer.RefManager,
machineInfo *cadvisorApi.MachineInfo,
podInfraContainerImage string,
qps float32,
burst int,
@ -130,7 +136,9 @@ func NewDockerManager(
generator kubecontainer.RunContainerOptionsGenerator,
httpClient kubeletTypes.HttpGetter,
runtimeHooks kubecontainer.RuntimeHooks,
execHandler ExecHandler) *DockerManager {
execHandler ExecHandler,
oomAdjuster *oom.OomAdjuster,
procFs procfs.ProcFsInterface) *DockerManager {
// Work out the location of the Docker runtime, defaulting to /var/lib/docker
// if there are any problems.
dockerRoot := "/var/lib/docker"
@ -169,6 +177,7 @@ func NewDockerManager(
readinessManager: readinessManager,
containerRefManager: containerRefManager,
os: osInterface,
machineInfo: machineInfo,
podInfraContainerImage: podInfraContainerImage,
reasonCache: reasonCache,
puller: newDockerPuller(client, qps, burst),
@ -179,6 +188,8 @@ func NewDockerManager(
generator: generator,
runtimeHooks: runtimeHooks,
execHandler: execHandler,
oomAdjuster: oomAdjuster,
procFs: procFs,
}
dm.runner = lifecycle.NewHandlerRunner(httpClient, dm, dm)
dm.prober = prober.New(dm, readinessManager, containerRefManager, recorder)
@ -1242,32 +1253,42 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
glog.Errorf("Failed to create symbolic link to the log file of pod %q container %q: %v", podFullName, container.Name, err)
}
// Set OOM score of POD container to lower than those of the other containers
// which have OOM score 0 by default in the pod. This ensures that it is
// killed only as a last resort.
// Container information is used in adjusting OOM scores and adding ndots.
containerInfo, err := dm.client.InspectContainer(string(id))
if err != nil {
return "", err
}
// Ensure the PID actually exists, else we'll move ourselves.
if containerInfo.State.Pid == 0 {
return "", fmt.Errorf("failed to get init PID for Docker container %q", string(id))
}
// Set OOM score of the container based on the priority of the container.
// Processes in lower-priority pods should be killed first if the system runs out of memory.
// The main pod infrastructure container is considered high priority, since if it is killed the
// whole pod will die.
var oomScoreAdj int
if container.Name == PodInfraContainerName {
util.ApplyOomScoreAdj(containerInfo.State.Pid, podOomScoreAdj)
oomScoreAdj = qos.PodInfraOomAdj
} else {
oomScoreAdj = qos.GetContainerOomScoreAdjust(container, dm.machineInfo.MemoryCapacity)
}
cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid)
if err != nil {
return "", err
}
if err = dm.oomAdjuster.ApplyOomScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
return "", err
}
// currently, Docker does not have a flag by which the ndots option can be passed.
// (A seperate issue has been filed with Docker to add a ndots flag)
// The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
// This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
// we modify it when the pause container is created since it is the first container created in the pod since it holds
// the networking namespace.
if container.Name == PodInfraContainerName {
err = addNDotsOption(containerInfo.ResolvConfPath)
} else {
// Children processes of docker daemon will inheritant the OOM score from docker
// daemon process. We explicitly apply OOM score 0 by default to the user
// containers to avoid daemons or POD containers are killed by oom killer.
util.ApplyOomScoreAdj(containerInfo.State.Pid, userContainerOomScoreAdj)
}
return kubeletTypes.DockerID(id), err

View File

@ -31,6 +31,7 @@ import (
"time"
docker "github.com/fsouza/go-dockerclient"
cadvisorApi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/client/record"
@ -114,6 +115,7 @@ func newTestDockerManagerWithHTTPClient(fakeHTTPClient *fakeHTTP) (*DockerManage
fakeRecorder,
readinessManager,
containerRefManager,
&cadvisorApi.MachineInfo{},
PodInfraContainerImage,
0, 0, "",
kubecontainer.FakeOS{},

View File

@ -34,6 +34,7 @@ import (
"time"
"github.com/golang/glog"
cadvisorApi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/api"
apierrors "k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/api/resource"
@ -60,13 +61,13 @@ import (
utilErrors "k8s.io/kubernetes/pkg/util/errors"
"k8s.io/kubernetes/pkg/util/mount"
nodeutil "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/procfs"
"k8s.io/kubernetes/pkg/version"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/watch"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/third_party/golang/expansion"
cadvisorApi "github.com/google/cadvisor/info/v1"
)
const (
@ -274,6 +275,14 @@ func NewMainKubelet(
klet.networkPlugin = plug
}
machineInfo, err := klet.GetCachedMachineInfo()
if err != nil {
return nil, err
}
oomAdjuster := oom.NewOomAdjuster()
procFs := procfs.NewProcFs()
// Initialize the runtime.
switch containerRuntime {
case "docker":
@ -283,6 +292,7 @@ func NewMainKubelet(
recorder,
readinessManager,
containerRefManager,
machineInfo,
podInfraContainerImage,
pullQPS,
pullBurst,
@ -292,7 +302,9 @@ func NewMainKubelet(
klet,
klet.httpClient,
newKubeletRuntimeHooks(recorder),
dockerExecHandler)
dockerExecHandler,
oomAdjuster,
procFs)
case "rkt":
conf := &rkt.Config{InsecureSkipVerify: true}
rktRuntime, err := rkt.New(

View File

@ -24,6 +24,7 @@ import (
"time"
docker "github.com/fsouza/go-dockerclient"
cadvisorApi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/client/record"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
@ -44,7 +45,7 @@ func newPod(uid, name string) *api.Pod {
func createFakeRuntimeCache(fakeRecorder *record.FakeRecorder) kubecontainer.RuntimeCache {
fakeDocker := &dockertools.FakeDockerClient{}
np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil))
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, &cadvisorApi.MachineInfo{}, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
return kubecontainer.NewFakeRuntimeCache(dockerManager)
}
@ -224,7 +225,7 @@ func TestFakePodWorkers(t *testing.T) {
fakeDocker := &dockertools.FakeDockerClient{}
fakeRecorder := &record.FakeRecorder{}
np, _ := network.InitNetworkPlugin([]network.NetworkPlugin{}, "", network.NewFakeHost(nil))
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
dockerManager := dockertools.NewFakeDockerManager(fakeDocker, fakeRecorder, nil, nil, &cadvisorApi.MachineInfo{}, dockertools.PodInfraContainerImage, 0, 0, "", kubecontainer.FakeOS{}, np, nil, nil, newKubeletRuntimeHooks(fakeRecorder))
fakeRuntimeCache := kubecontainer.NewFakeRuntimeCache(dockerManager)
kubeletForRealWorkers := &simpleFakeKubelet{}

25
pkg/kubelet/qos/doc.go Normal file
View File

@ -0,0 +1,25 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// package qos contains helper functions for quality of service.
// For each resource (memory, CPU) Kubelet supports three classes of containers.
// Memory guaranteed containers will receive the highest priority and will get all the resources
// they need.
// Burstable containers will be guaranteed their request and can “burst” and use more resources
// when available.
// Best-Effort containers, which dont specify a request, can use resources only if not being used
// by other pods.
package qos

View File

@ -0,0 +1,75 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package qos
import (
"k8s.io/kubernetes/pkg/api"
)
const (
PodInfraOomAdj int = -999
KubeletOomScoreAdj int = -999
KubeProxyOomScoreAdj int = -999
)
// isMemoryBestEffort returns true if the container's memory requirements are best-effort.
func isMemoryBestEffort(container *api.Container) bool {
// A container is memory best-effort if its memory request is unspecified or 0.
// If a request is specified, then the user expects some kind of resource guarantee.
return container.Resources.Requests.Memory().Value() == 0
}
// isMemoryGuaranteed returns true if the container's memory requirements are Guaranteed.
func isMemoryGuaranteed(container *api.Container) bool {
// A container is memory guaranteed if its memory request == memory limit.
// If memory request == memory limit, the user is very confident of resource consumption.
memoryRequestValue := container.Resources.Requests.Memory().Value()
memoryLimitValue := container.Resources.Limits.Memory().Value()
return memoryRequestValue == memoryLimitValue && memoryRequestValue != 0
}
// GetContainerOomAdjust returns the amount by which the OOM score of all processes in the
// container should be adjusted. The OOM score of a process is the percentage of memory it consumes
// multiplied by 100 (barring exceptional cases) + a configurable quantity which is between -1000
// and 1000. Containers with higher OOM scores are killed if the system runs out of memory.
// See https://lwn.net/Articles/391222/ for more information.
func GetContainerOomScoreAdjust(container *api.Container, memoryCapacity int64) int {
if isMemoryGuaranteed(container) {
// Memory guaranteed containers should be the last to get killed.
return -999
} else if isMemoryBestEffort(container) {
// Memory best-effort containers should be the first to be killed.
return 1000
} else {
// Burstable containers are a middle tier, between Guaranteed and Best-Effort. Ideally,
// we want to protect Burstable containers that consume less memory than requested.
// The formula below is a heuristic. A container requesting for 10% of a system's
// memory will have an oom score adjust of 900. If a process in container Y
// uses over 10% of memory, its OOM score will be 1000. The idea is that containers
// which use more than their request will have an OOM score of 1000 and will be prime
// targets for OOM kills.
// Note that this is a heuristic, it won't work if a container has many small processes.
memoryRequest := container.Resources.Requests.Memory().Value()
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
// A memory guaranteed container using 100% of memory can have an OOM score of 1. Ensure
// that memory burstable containers have a higher OOM score.
if oomScoreAdjust < 2 {
return 2
}
return int(oomScoreAdjust)
}
}

View File

@ -0,0 +1,187 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package qos
import (
"strconv"
"testing"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
)
const (
standardMemoryAmount = 8000000000
)
var (
zeroRequestMemoryBestEffort = api.Container{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("5m"),
api.ResourceName(api.ResourceMemory): resource.MustParse("0G"),
},
Limits: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("5m"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
}
edgeMemoryBestEffort = api.Container{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse("0G"),
},
Limits: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse("0G"),
},
},
}
noRequestMemoryBestEffort = api.Container{
Resources: api.ResourceRequirements{
Limits: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
}
noLimitMemoryBestEffort = api.Container{}
memoryGuaranteed = api.Container{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
Limits: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("5m"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
}
memoryBurstable = api.Container{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount / 2)),
},
Limits: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
}
memoryBurstableNoLimit = api.Container{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceMemory): resource.MustParse(strconv.Itoa(standardMemoryAmount - 1)),
},
},
}
)
func TestIsMemoryBestEffort(t *testing.T) {
validCases := []api.Container{zeroRequestMemoryBestEffort, noRequestMemoryBestEffort, noLimitMemoryBestEffort, edgeMemoryBestEffort}
for _, container := range validCases {
if !isMemoryBestEffort(&container) {
t.Errorf("container %+v is memory best-effort", container)
}
}
invalidCases := []api.Container{memoryGuaranteed, memoryBurstable}
for _, container := range invalidCases {
if isMemoryBestEffort(&container) {
t.Errorf("container %+v is not memory best-effort", container)
}
}
}
func TestIsMemoryGuaranteed(t *testing.T) {
validCases := []api.Container{memoryGuaranteed}
for _, container := range validCases {
if !isMemoryGuaranteed(&container) {
t.Errorf("container %+v is memory guaranteed", container)
}
}
invalidCases := []api.Container{zeroRequestMemoryBestEffort, noRequestMemoryBestEffort, noLimitMemoryBestEffort, edgeMemoryBestEffort, memoryBurstable}
for _, container := range invalidCases {
if isMemoryGuaranteed(&container) {
t.Errorf("container %+v is not memory guaranteed", container)
}
}
}
type oomTest struct {
container *api.Container
memoryCapacity int64
lowOomScoreAdj int // The max oom_score_adj score the container should be assigned.
highOomScoreAdj int // The min oom_score_adj score the container should be assigned.
}
func TestGetContainerOomScoreAdjust(t *testing.T) {
oomTests := []oomTest{
{
container: &zeroRequestMemoryBestEffort,
memoryCapacity: 4000000000,
lowOomScoreAdj: 1000,
highOomScoreAdj: 1000,
},
{
container: &edgeMemoryBestEffort,
memoryCapacity: 8000000000,
lowOomScoreAdj: 1000,
highOomScoreAdj: 1000,
},
{
container: &noRequestMemoryBestEffort,
memoryCapacity: 7230457451,
lowOomScoreAdj: 1000,
highOomScoreAdj: 1000,
},
{
container: &noLimitMemoryBestEffort,
memoryCapacity: 4000000000,
lowOomScoreAdj: 1000,
highOomScoreAdj: 1000,
},
{
container: &memoryGuaranteed,
memoryCapacity: 123456789,
lowOomScoreAdj: -999,
highOomScoreAdj: -999,
},
{
container: &memoryBurstable,
memoryCapacity: standardMemoryAmount,
lowOomScoreAdj: 495,
highOomScoreAdj: 505,
},
{
container: &memoryBurstableNoLimit,
memoryCapacity: standardMemoryAmount,
lowOomScoreAdj: 2,
highOomScoreAdj: 2,
},
}
for _, test := range oomTests {
oomScoreAdj := GetContainerOomScoreAdjust(test.container, test.memoryCapacity)
if oomScoreAdj < test.lowOomScoreAdj || oomScoreAdj > test.highOomScoreAdj {
t.Errorf("oom_score_adj should be between %d and %d, but was %d", test.lowOomScoreAdj, test.highOomScoreAdj, oomScoreAdj)
}
}
}

View File

@ -154,6 +154,7 @@ func TestRunOnce(t *testing.T) {
kb.recorder,
kb.readinessManager,
kb.containerRefManager,
&cadvisorApi.MachineInfo{},
dockertools.PodInfraContainerImage,
0,
0,

18
pkg/util/oom/doc.go Normal file
View File

@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package oom implements utility functions relating to out of memory management.
package oom

26
pkg/util/oom/oom.go Normal file
View File

@ -0,0 +1,26 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package oom
// This is a struct instead of an interface to allow injection of process ID listers and
// applying OOM score in tests.
// TODO: make this an interface, and inject a mock ioutil struct for testing.
type OomAdjuster struct {
pidLister func(cgroupName string) ([]int, error)
ApplyOomScoreAdj func(pid int, oomScoreAdj int) error
ApplyOomScoreAdjContainer func(cgroupName string, oomScoreAdj, maxTries int) error
}

34
pkg/util/oom/oom_fake.go Normal file
View File

@ -0,0 +1,34 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package oom
type FakeOomAdjuster struct{}
func NewFakeOomAdjuster() *OomAdjuster {
return &OomAdjuster{
ApplyOomScoreAdj: fakeApplyOomScoreAdj,
ApplyOomScoreAdjContainer: fakeApplyOomScoreAdjContainer,
}
}
func fakeApplyOomScoreAdj(pid int, oomScoreAdj int) error {
return nil
}
func fakeApplyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error {
return nil
}

112
pkg/util/oom/oom_linux.go Normal file
View File

@ -0,0 +1,112 @@
// +build cgo,linux
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package oom
import (
"fmt"
"io/ioutil"
"path"
"strconv"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/configs"
"github.com/golang/glog"
)
func NewOomAdjuster() *OomAdjuster {
oomAdjuster := &OomAdjuster{
pidLister: getPids,
ApplyOomScoreAdj: applyOomScoreAdj,
}
oomAdjuster.ApplyOomScoreAdjContainer = oomAdjuster.applyOomScoreAdjContainer
return oomAdjuster
}
func getPids(cgroupName string) ([]int, error) {
fsManager := fs.Manager{
Cgroups: &configs.Cgroup{
Name: cgroupName,
},
}
return fsManager.GetPids()
}
// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
func applyOomScoreAdj(pid int, oomScoreAdj int) error {
if pid < 0 {
return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid)
}
var pidStr string
if pid == 0 {
pidStr = "self"
} else {
pidStr = strconv.Itoa(pid)
}
oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
maxTries := 2
var err error
for i := 0; i < maxTries; i++ {
_, readErr := ioutil.ReadFile(oomScoreAdjPath)
if readErr != nil {
err = fmt.Errorf("failed to read oom_score_adj: %v", readErr)
} else if writeErr := ioutil.WriteFile(oomScoreAdjPath, []byte(strconv.Itoa(oomScoreAdj)), 0700); writeErr != nil {
err = fmt.Errorf("failed to set oom_score_adj to %d: %v", oomScoreAdj, writeErr)
} else {
return nil
}
}
return err
}
// Writes 'value' to /proc/<pid>/oom_score_adj for all processes in cgroup cgroupName.
// Keeps trying to write until the process list of the cgroup stabilizes, or until maxTries tries.
func (oomAdjuster *OomAdjuster) applyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error {
adjustedProcessSet := make(map[int]bool)
for i := 0; i < maxTries; i++ {
continueAdjusting := false
pidList, err := oomAdjuster.pidLister(cgroupName)
if err != nil {
continueAdjusting = true
glog.Errorf("Error getting process list for cgroup %s: %+v", cgroupName, err)
} else if len(pidList) == 0 {
continueAdjusting = true
} else {
for _, pid := range pidList {
if !adjustedProcessSet[pid] {
continueAdjusting = true
if err = oomAdjuster.ApplyOomScoreAdj(pid, oomScoreAdj); err == nil {
adjustedProcessSet[pid] = true
}
}
}
}
if !continueAdjusting {
return nil
}
// There's a slight race. A process might have forked just before we write its OOM score adjust.
// The fork might copy the parent process's old OOM score, then this function might execute and
// update the parent's OOM score, but the forked process id might not be reflected in cgroup.procs
// for a short amount of time. So this function might return without changing the forked process's
// OOM score. Very unlikely race, so ignoring this for now.
}
return fmt.Errorf("exceeded maxTries, some processes might not have desired OOM score")
}

View File

@ -0,0 +1,110 @@
// +build cgo,linux
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package oom
import (
"testing"
)
// Converts a sequence of PID lists into a PID lister.
// The PID lister returns pidListSequence[i] on the ith call. If i >= length of pidListSequence
// then return the last element of pidListSequence (the sequence is considered to have) stabilized.
func sequenceToPidLister(pidListSequence [][]int) func(string) ([]int, error) {
var numCalls int
return func(cgroupName string) ([]int, error) {
numCalls++
if len(pidListSequence) == 0 {
return []int{}, nil
} else if numCalls > len(pidListSequence) {
return pidListSequence[len(pidListSequence)-1], nil
}
return pidListSequence[numCalls-1], nil
}
}
// Tests that applyOomScoreAdjContainer correctly applies OOM scores to relevant processes, or
// returns the right error.
func applyOomScoreAdjContainerTester(pidListSequence [][]int, maxTries int, appliedPids []int, expectedError bool, t *testing.T) {
pidOoms := make(map[int]bool)
// Mock ApplyOomScoreAdj and pidLister.
oomAdjuster := NewOomAdjuster()
oomAdjuster.ApplyOomScoreAdj = func(pid int, oomScoreAdj int) error {
pidOoms[pid] = true
return nil
}
oomAdjuster.pidLister = sequenceToPidLister(pidListSequence)
err := oomAdjuster.ApplyOomScoreAdjContainer("", 100, maxTries)
// Check error value.
if expectedError && err == nil {
t.Errorf("Expected error %+v when running ApplyOomScoreAdjContainer but got no error", expectedError)
return
} else if !expectedError && err != nil {
t.Errorf("Expected no error but got error %+v when running ApplyOomScoreAdjContainer", err)
return
} else if err != nil {
return
}
// Check that OOM scores were applied to the right processes.
if len(appliedPids) != len(pidOoms) {
t.Errorf("Applied OOM scores to incorrect number of processes")
return
}
for _, pid := range appliedPids {
if !pidOoms[pid] {
t.Errorf("Failed to apply OOM scores to process %d", pid)
}
}
}
func TestOomScoreAdjContainer(t *testing.T) {
pidListSequenceEmpty := [][]int{}
applyOomScoreAdjContainerTester(pidListSequenceEmpty, 3, nil, true, t)
pidListSequence1 := [][]int{
{1, 2},
}
applyOomScoreAdjContainerTester(pidListSequence1, 1, nil, true, t)
applyOomScoreAdjContainerTester(pidListSequence1, 2, []int{1, 2}, false, t)
applyOomScoreAdjContainerTester(pidListSequence1, 3, []int{1, 2}, false, t)
pidListSequence3 := [][]int{
{1, 2},
{1, 2, 4, 5},
{2, 1, 4, 5, 3},
}
applyOomScoreAdjContainerTester(pidListSequence3, 1, nil, true, t)
applyOomScoreAdjContainerTester(pidListSequence3, 2, nil, true, t)
applyOomScoreAdjContainerTester(pidListSequence3, 3, nil, true, t)
applyOomScoreAdjContainerTester(pidListSequence3, 4, []int{1, 2, 3, 4, 5}, false, t)
pidListSequenceLag := [][]int{
{},
{},
{},
{1, 2, 4},
{1, 2, 4, 5},
}
for i := 1; i < 5; i++ {
applyOomScoreAdjContainerTester(pidListSequenceLag, i, nil, true, t)
}
applyOomScoreAdjContainerTester(pidListSequenceLag, 6, []int{1, 2, 4, 5}, false, t)
}

View File

@ -0,0 +1,40 @@
// +build !cgo !linux
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package oom
import (
"errors"
)
var unsupportedErr = errors.New("setting OOM scores is unsupported in this build")
func NewOomAdjuster() *OomAdjuster {
return &OomAdjuster{
ApplyOomScoreAdj: unsupportedApplyOomScoreAdj,
ApplyOomScoreAdjContainer: unsupportedApplyOomScoreAdjContainer,
}
}
func unsupportedApplyOomScoreAdj(pid int, oomScoreAdj int) error {
return unsupportedErr
}
func unsupportedApplyOomScoreAdjContainer(cgroupName string, oomScoreAdj, maxTries int) error {
return unsupportedErr
}

18
pkg/util/procfs/doc.go Normal file
View File

@ -0,0 +1,18 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package procfs implements utility functions relating to the /proc mount.
package procfs

View File

@ -0,0 +1,10 @@
11:name=systemd:/user/1000.user/c1.session
10:hugetlb:/user/1000.user/c1.session
9:perf_event:/user/1000.user/c1.session
8:blkio:/user/1000.user/c1.session
7:freezer:/user/1000.user/c1.session
6:devices:/user/1000.user/c1.session
5:memory:/user/1000.user/c1.session
4:cpuacct:/user/1000.user/c1.session
3:cpu:/user/1000.user/c1.session
2:cpuset:/

54
pkg/util/procfs/procfs.go Normal file
View File

@ -0,0 +1,54 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package procfs
import (
"fmt"
"io/ioutil"
"path"
"strconv"
"strings"
)
type ProcFs struct{}
func NewProcFs() ProcFsInterface {
return &ProcFs{}
}
func containerNameFromProcCgroup(content string) (string, error) {
lines := strings.Split(content, "\n")
for _, line := range lines {
entries := strings.SplitN(line, ":", 3)
if len(entries) == 3 && entries[1] == "devices" {
return strings.TrimSpace(entries[2]), nil
}
}
return "", fmt.Errorf("could not find devices cgroup location")
}
// getFullContainerName gets the container name given the root process id of the container.
// Eg. If the devices cgroup for the container is stored in /sys/fs/cgroup/devices/docker/nginx,
// return docker/nginx. Assumes that the process is part of exactly one cgroup hierarchy.
func (pfs *ProcFs) GetFullContainerName(pid int) (string, error) {
filePath := path.Join("/proc", strconv.Itoa(pid), "cgroup")
content, err := ioutil.ReadFile(filePath)
if err != nil {
return "", err
}
return containerNameFromProcCgroup(string(content))
}

View File

@ -0,0 +1,30 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package procfs
type FakeProcFs struct{}
func NewFakeProcFs() ProcFsInterface {
return &FakeProcFs{}
}
// getFullContainerName gets the container name given the root process id of the container.
// Eg. If the devices cgroup for the container is stored in /sys/fs/cgroup/devices/docker/nginx,
// return docker/nginx. Assumes that the process is part of exactly one cgroup hierarchy.
func (fakePfs *FakeProcFs) GetFullContainerName(pid int) (string, error) {
return "", nil
}

View File

@ -0,0 +1,22 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package procfs
type ProcFsInterface interface {
// getFullContainerName gets the container name given the root process id of the container.
GetFullContainerName(pid int) (string, error)
}

View File

@ -0,0 +1,58 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package procfs
import (
"io/ioutil"
"testing"
)
func verifyContainerName(procCgroupText, expectedName string, expectedErr bool, t *testing.T) {
name, err := containerNameFromProcCgroup(procCgroupText)
if expectedErr && err == nil {
t.Errorf("Expected error but did not get error in verifyContainerName")
return
} else if !expectedErr && err != nil {
t.Errorf("Expected no error, but got error %+v in verifyContainerName", err)
return
} else if expectedErr {
return
}
if name != expectedName {
t.Errorf("Expected container name %s but got name %s", expectedName, name)
}
}
func TestContainerNameFromProcCgroup(t *testing.T) {
procCgroupValid := "2:devices:docker/kubelet"
verifyContainerName(procCgroupValid, "docker/kubelet", false, t)
procCgroupEmpty := ""
verifyContainerName(procCgroupEmpty, "", true, t)
content, err := ioutil.ReadFile("example_proc_cgroup")
if err != nil {
t.Errorf("Could not read example /proc cgroup file")
}
verifyContainerName(string(content), "/user/1000.user/c1.session", false, t)
procCgroupNoDevice := "2:freezer:docker/kubelet\n5:cpuacct:pkg/kubectl"
verifyContainerName(procCgroupNoDevice, "", true, t)
procCgroupInvalid := "devices:docker/kubelet\ncpuacct:pkg/kubectl"
verifyContainerName(procCgroupInvalid, "", true, t)
}

View File

@ -22,7 +22,6 @@ import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"os"
@ -212,34 +211,6 @@ func UsingSystemdInitSystem() bool {
return false
}
// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
func ApplyOomScoreAdj(pid int, value int) error {
if value < -1000 || value > 1000 {
return fmt.Errorf("invalid value(%d) specified for oom_score_adj. Values must be within the range [-1000, 1000]", value)
}
if pid < 0 {
return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid)
}
var pidStr string
if pid == 0 {
pidStr = "self"
} else {
pidStr = strconv.Itoa(pid)
}
oom_value, err := ioutil.ReadFile(path.Join("/proc", pidStr, "oom_score_adj"))
if err != nil {
return fmt.Errorf("failed to read oom_score_adj: %v", err)
} else if string(oom_value) != strconv.Itoa(value) {
if err := ioutil.WriteFile(path.Join("/proc", pidStr, "oom_score_adj"), []byte(strconv.Itoa(value)), 0700); err != nil {
return fmt.Errorf("failed to set oom_score_adj to %d: %v", value, err)
}
}
return nil
}
// Tests whether all pointer fields in a struct are nil. This is useful when,
// for example, an API struct is handled by plugins which need to distinguish
// "no plugin accepted this spec" from "this spec is empty".