From 2d1a8d0da00bced1ef0e564c2378b393e48279a2 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Thu, 19 Feb 2015 17:16:31 -0800 Subject: [PATCH 1/2] Allow ApplyOomScoreAdj to specify what PID to adjust for. --- pkg/kubelet/server/server.go | 2 +- pkg/proxy/server/server.go | 2 +- pkg/util/util.go | 18 ++++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go index 727b003b950..866ddeb14b9 100644 --- a/pkg/kubelet/server/server.go +++ b/pkg/kubelet/server/server.go @@ -170,7 +170,7 @@ func (s *KubeletServer) Run(_ []string) error { s.EtcdServerList = util.StringList{} } - if err := util.ApplyOomScoreAdj(s.OOMScoreAdj); err != nil { + if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { glog.Info(err) } diff --git a/pkg/proxy/server/server.go b/pkg/proxy/server/server.go index 44eff45f390..55c11f69f5e 100644 --- a/pkg/proxy/server/server.go +++ b/pkg/proxy/server/server.go @@ -89,7 +89,7 @@ func (s *ProxyServer) AddFlags(fs *pflag.FlagSet) { // Run runs the specified ProxyServer. This should never exit. func (s *ProxyServer) Run(_ []string) error { - if err := util.ApplyOomScoreAdj(s.OOMScoreAdj); err != nil { + if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { glog.Info(err) } diff --git a/pkg/util/util.go b/pkg/util/util.go index d7e6a37dea2..579e9061324 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -175,14 +175,24 @@ func CompileRegexps(regexpStrings []string) ([]*regexp.Regexp, error) { return regexps, nil } -// Writes 'value' to /proc/self/oom_score_adj. -func ApplyOomScoreAdj(value int) error { +// Writes 'value' to /proc//oom_score_adj. PID = 0 means self +func ApplyOomScoreAdj(pid int, value int) error { if value < -1000 || value > 1000 { return fmt.Errorf("invalid value(%d) specified for oom_score_adj. Values must be within the range [-1000, 1000]", value) } + if pid < 0 { + return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid) + } - if err := ioutil.WriteFile("/proc/self/oom_score_adj", []byte(strconv.Itoa(value)), 0700); err != nil { - fmt.Errorf("failed to set oom_score_adj to %d - %q", value, err) + var pidStr string + if pid == 0 { + pidStr = "self" + } else { + pidStr = strconv.Itoa(pid) + } + + if err := ioutil.WriteFile(path.Join("/proc", pidStr, "oom_score_adj"), []byte(strconv.Itoa(value)), 0700); err != nil { + fmt.Errorf("failed to set oom_score_adj to %d: %v", value, err) } return nil From 8649628c6cc657d07fbebb9949c38a27ef81a7d6 Mon Sep 17 00:00:00 2001 From: Victor Marmol Date: Thu, 19 Feb 2015 19:17:44 -0800 Subject: [PATCH 2/2] Make POD container last OOM victim. Setting the oom_score_adj of the PID of the POD container to -100 which is less than the default of 0. This ensures that this PID is the last OOM victim chosen by the kernel. Fixes #3067. --- pkg/kubelet/kubelet.go | 19 ++++++++++++++++++- pkg/kubelet/kubelet_test.go | 14 +++++++------- pkg/kubelet/runonce_test.go | 18 ++++++++++++++++-- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 80184d02e72..2e40384b4b9 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -56,6 +56,10 @@ const minShares = 2 const sharesPerCPU = 1024 const milliCPUToCPU = 1000 +// The oom_score_adj of the POD infrastructure container. The default is 0, so +// any value below that makes it *less* likely to get OOM killed. +const podOomScoreAdj = -100 + // SyncHandler is an interface implemented by Kubelet, for testability type SyncHandler interface { SyncPods([]api.BoundPod) error @@ -938,7 +942,20 @@ func (kl *Kubelet) createPodInfraContainer(pod *api.BoundPod) (dockertools.Docke if ref != nil { record.Eventf(ref, "pulled", "Successfully pulled image %q", container.Image) } - return kl.runContainer(pod, container, nil, "", "") + id, err := kl.runContainer(pod, container, nil, "", "") + if err != nil { + return "", err + } + + // Set OOM score of POD container to lower than those of the other + // containers in the pod. This ensures that it is killed only as a last + // resort. + containerInfo, err := kl.dockerClient.InspectContainer(string(id)) + if err != nil { + return "", err + } + + return id, util.ApplyOomScoreAdj(containerInfo.State.Pid, podOomScoreAdj) } func (kl *Kubelet) pullImage(img string, ref *api.ObjectReference) error { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 7c0a2c92cb5..daafb6bcbdf 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -437,7 +437,7 @@ func TestSyncPodsWithTerminationLog(t *testing.T) { } kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "create", "start", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "create", "start", "inspect_container", "list", "inspect_container", "inspect_image", "list", "create", "start"}) fakeDocker.Lock() parts := strings.Split(fakeDocker.Container.HostConfig.Binds[0], ":") @@ -497,7 +497,7 @@ func TestSyncPodsCreatesNetAndContainer(t *testing.T) { kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "create", "start", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "create", "start", "inspect_container", "list", "inspect_container", "inspect_image", "list", "create", "start"}) fakeDocker.Lock() @@ -547,7 +547,7 @@ func TestSyncPodsCreatesNetAndContainerPullsImage(t *testing.T) { kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "create", "start", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "create", "start", "inspect_container", "list", "inspect_container", "inspect_image", "list", "create", "start"}) fakeDocker.Lock() @@ -563,7 +563,7 @@ func TestSyncPodsCreatesNetAndContainerPullsImage(t *testing.T) { fakeDocker.Unlock() } -func TestSyncPodsWithNetCreatesContainer(t *testing.T) { +func TestSyncPodsWithPodInfraCreatesContainer(t *testing.T) { kubelet, fakeDocker := newTestKubelet(t) fakeDocker.ContainerList = []docker.APIContainers{ { @@ -604,7 +604,7 @@ func TestSyncPodsWithNetCreatesContainer(t *testing.T) { fakeDocker.Unlock() } -func TestSyncPodsWithNetCreatesContainerCallsHandler(t *testing.T) { +func TestSyncPodsWithPodInfraCreatesContainerCallsHandler(t *testing.T) { kubelet, fakeDocker := newTestKubelet(t) fakeHttp := fakeHTTP{} kubelet.httpClient = &fakeHttp @@ -661,7 +661,7 @@ func TestSyncPodsWithNetCreatesContainerCallsHandler(t *testing.T) { } } -func TestSyncPodsDeletesWithNoNetContainer(t *testing.T) { +func TestSyncPodsDeletesWithNoPodInfraContainer(t *testing.T) { kubelet, fakeDocker := newTestKubelet(t) fakeDocker.ContainerList = []docker.APIContainers{ { @@ -692,7 +692,7 @@ func TestSyncPodsDeletesWithNoNetContainer(t *testing.T) { kubelet.drainWorkers() verifyCalls(t, fakeDocker, []string{ - "list", "stop", "create", "start", "list", "list", "inspect_container", "inspect_image", "list", "create", "start"}) + "list", "stop", "create", "start", "inspect_container", "list", "list", "inspect_container", "inspect_image", "list", "create", "start"}) // A map iteration is used to delete containers, so must not depend on // order here. diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index 8a1f9af9b75..3fde0209a25 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -97,14 +97,28 @@ func TestRunOnce(t *testing.T) { label: "syncPod", container: docker.Container{ Config: &docker.Config{Image: "someimage"}, - State: docker.State{Running: true}, + State: docker.State{Running: true, Pid: 42}, }, }, { label: "syncPod", container: docker.Container{ Config: &docker.Config{Image: "someimage"}, - State: docker.State{Running: true}, + State: docker.State{Running: true, Pid: 42}, + }, + }, + { + label: "syncPod", + container: docker.Container{ + Config: &docker.Config{Image: "someimage"}, + State: docker.State{Running: true, Pid: 42}, + }, + }, + { + label: "syncPod", + container: docker.Container{ + Config: &docker.Config{Image: "someimage"}, + State: docker.State{Running: true, Pid: 42}, }, }, },