From 2141e230a65a7bfdf829acdabf7d29a101577e0a Mon Sep 17 00:00:00 2001 From: Random-Liu Date: Sat, 24 Sep 2016 19:07:43 -0700 Subject: [PATCH] Add oom score adj in new CRI implementation. --- pkg/kubelet/dockershim/docker_sandbox.go | 5 +++-- pkg/kubelet/kubelet.go | 2 ++ pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go | 4 +++- pkg/kubelet/kuberuntime/kuberuntime_container.go | 11 +++++++++-- pkg/kubelet/kuberuntime/kuberuntime_manager.go | 6 ++++++ pkg/kubelet/kuberuntime/kuberuntime_manager_test.go | 7 ++++++- pkg/kubelet/qos/policy.go | 4 ++++ 7 files changed, 33 insertions(+), 6 deletions(-) diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index aabcec52bca..c3574a537ca 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -25,6 +25,7 @@ import ( "github.com/golang/glog" runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" + "k8s.io/kubernetes/pkg/kubelet/qos" ) const ( @@ -32,7 +33,6 @@ const ( // Various default sandbox resources requests/limits. defaultSandboxCPUshares int64 = 2 - defaultSandboxOOMScore int = -999 // Termination grace period defaultSandboxGracePeriod int = 10 @@ -263,5 +263,6 @@ func setSandboxResources(hc *dockercontainer.HostConfig) { CPUShares: defaultSandboxCPUshares, // Use docker's default cpu quota/period. } - hc.OomScoreAdj = defaultSandboxOOMScore + // TODO: Get rid of the dependency on kubelet internal package. + hc.OomScoreAdj = qos.PodInfraOOMAdj } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 78dee53b8fc..a52f257b2ef 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -555,6 +555,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub kubecontainer.FilterEventRecorder(kubeDeps.Recorder), klet.livenessManager, containerRefManager, + machineInfo, klet.podManager, kubeDeps.OSInterface, klet.networkPlugin, @@ -649,6 +650,7 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub kubecontainer.FilterEventRecorder(kubeDeps.Recorder), klet.livenessManager, containerRefManager, + machineInfo, klet.podManager, kubeDeps.OSInterface, klet.networkPlugin, diff --git a/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go b/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go index 67cb6103d0d..8d257b4c334 100644 --- a/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go @@ -21,6 +21,7 @@ import ( "net/http" "time" + cadvisorapi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/client/record" "k8s.io/kubernetes/pkg/credentialprovider" @@ -90,13 +91,14 @@ func (f *fakePodGetter) GetPodByUID(uid types.UID) (*api.Pod, bool) { return pod, found } -func NewFakeKubeRuntimeManager(runtimeService internalApi.RuntimeService, imageService internalApi.ImageManagerService, networkPlugin network.NetworkPlugin, osInterface kubecontainer.OSInterface) (*kubeGenericRuntimeManager, error) { +func NewFakeKubeRuntimeManager(runtimeService internalApi.RuntimeService, imageService internalApi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, networkPlugin network.NetworkPlugin, osInterface kubecontainer.OSInterface) (*kubeGenericRuntimeManager, error) { recorder := &record.FakeRecorder{} kubeRuntimeManager := &kubeGenericRuntimeManager{ recorder: recorder, cpuCFSQuota: false, livenessManager: proberesults.NewManager(), containerRefManager: kubecontainer.NewRefManager(), + machineInfo: machineInfo, osInterface: osInterface, networkPlugin: networkPlugin, runtimeHelper: &fakeRuntimeHelper{}, diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 1b8e04cc3e9..a9c9a38cb21 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -34,6 +34,7 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/dockershim" "k8s.io/kubernetes/pkg/kubelet/events" + "k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/types" utilruntime "k8s.io/kubernetes/pkg/util/runtime" @@ -144,7 +145,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta Stdin: &container.Stdin, StdinOnce: &container.StdinOnce, Tty: &container.TTY, - Linux: m.generateLinuxContainerConfig(container), + Linux: m.generateLinuxContainerConfig(container, pod), } // set privileged and readonlyRootfs @@ -173,7 +174,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta } // generateLinuxContainerConfig generates linux container config for kubelet runtime api. -func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.Container) *runtimeApi.LinuxContainerConfig { +func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.Container, pod *api.Pod) *runtimeApi.LinuxContainerConfig { linuxConfig := &runtimeApi.LinuxContainerConfig{ Resources: &runtimeApi.LinuxContainerResources{}, } @@ -183,6 +184,8 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api. cpuRequest := container.Resources.Requests.Cpu() cpuLimit := container.Resources.Limits.Cpu() memoryLimit := container.Resources.Limits.Memory().Value() + oomScoreAdj := int64(qos.GetContainerOOMScoreAdjust(pod, container, + int64(m.machineInfo.MemoryCapacity))) // If request is not specified, but limit is, we want request to default to limit. // API server does this for new containers, but we repeat this logic in Kubelet // for containers running on existing Kubernetes clusters. @@ -197,6 +200,10 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api. if memoryLimit != 0 { linuxConfig.Resources.MemoryLimitInBytes = &memoryLimit } + // Set OOM score of the container based on qos policy. Processes in lower-priority pods should + // be killed first if the system runs out of memory. + linuxConfig.Resources.OomScoreAdj = &oomScoreAdj + if m.cpuCFSQuota { // if cpuLimit.Amount is nil, then the appropriate default value is returned // to allow full usage of cpu resource. diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 541e10b763b..2abc488e396 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -24,6 +24,7 @@ import ( "github.com/coreos/go-semver/semver" "github.com/golang/glog" + cadvisorapi "github.com/google/cadvisor/info/v1" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/client/record" @@ -68,6 +69,9 @@ type kubeGenericRuntimeManager struct { osInterface kubecontainer.OSInterface containerRefManager *kubecontainer.RefManager + // machineInfo contains the machine information. + machineInfo *cadvisorapi.MachineInfo + // Container GC manager containerGC *containerGC @@ -102,6 +106,7 @@ func NewKubeGenericRuntimeManager( recorder record.EventRecorder, livenessManager proberesults.Manager, containerRefManager *kubecontainer.RefManager, + machineInfo *cadvisorapi.MachineInfo, podGetter podGetter, osInterface kubecontainer.OSInterface, networkPlugin network.NetworkPlugin, @@ -120,6 +125,7 @@ func NewKubeGenericRuntimeManager( cpuCFSQuota: cpuCFSQuota, livenessManager: livenessManager, containerRefManager: containerRefManager, + machineInfo: machineInfo, osInterface: osInterface, networkPlugin: networkPlugin, runtimeHelper: runtimeHelper, diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index 72be2d06cf3..6233f311909 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + cadvisorapi "github.com/google/cadvisor/info/v1" "github.com/stretchr/testify/assert" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/apis/componentconfig" @@ -42,6 +43,10 @@ var ( func createTestRuntimeManager() (*apitest.FakeRuntimeService, *apitest.FakeImageService, *kubeGenericRuntimeManager, error) { fakeRuntimeService := apitest.NewFakeRuntimeService() fakeImageService := apitest.NewFakeImageService() + // Only an empty machineInfo is needed here, because in unit test all containers are besteffort, + // data in machineInfo is not used. If burstable containers are used in unit test in the future, + // we may want to set memory capacity. + machineInfo := &cadvisorapi.MachineInfo{} networkPlugin, _ := network.InitNetworkPlugin( []network.NetworkPlugin{}, "", @@ -51,7 +56,7 @@ func createTestRuntimeManager() (*apitest.FakeRuntimeService, *apitest.FakeImage network.UseDefaultMTU, ) osInterface := &containertest.FakeOS{} - manager, err := NewFakeKubeRuntimeManager(fakeRuntimeService, fakeImageService, networkPlugin, osInterface) + manager, err := NewFakeKubeRuntimeManager(fakeRuntimeService, fakeImageService, machineInfo, networkPlugin, osInterface) return fakeRuntimeService, fakeImageService, manager, err } diff --git a/pkg/kubelet/qos/policy.go b/pkg/kubelet/qos/policy.go index 7013f712f87..7c142f5cd10 100644 --- a/pkg/kubelet/qos/policy.go +++ b/pkg/kubelet/qos/policy.go @@ -21,6 +21,10 @@ import ( ) const ( + // PodInfraOOMAdj is very docker specific. For arbitrary runtime, it may not make + // sense to set sandbox level oom score, e.g. a sandbox could only be a namespace + // without a process. + // TODO: Handle infra container oom score adj in a runtime agnostic way. PodInfraOOMAdj int = -998 KubeletOOMScoreAdj int = -999 DockerOOMScoreAdj int = -999