From 87aaf4c0ac0eafd668fa5bed479c3c33ae8e7422 Mon Sep 17 00:00:00 2001 From: Yu-Ju Hong Date: Mon, 10 Oct 2016 13:56:53 -0700 Subject: [PATCH] dockershim: move docker to the given cgruop This change add a container manager inside the dockershim to move docker daemon and associated processes to a specified cgroup. The original kubelet container manager will continue checking the name of the cgroup, so that kubelet know how to report runtime stats. --- cmd/kubelet/app/server.go | 15 +- pkg/kubelet/cm/container_manager.go | 15 +- pkg/kubelet/cm/container_manager_linux.go | 31 +++- pkg/kubelet/dockershim/BUILD | 1 + pkg/kubelet/dockershim/cm/BUILD | 30 ++++ .../dockershim/cm/container_manager.go | 21 +++ .../dockershim/cm/container_manager_linux.go | 147 ++++++++++++++++++ .../cm/container_manager_unsupported.go | 34 ++++ pkg/kubelet/dockershim/docker_service.go | 15 +- pkg/kubelet/kubelet.go | 9 +- 10 files changed, 297 insertions(+), 21 deletions(-) create mode 100644 pkg/kubelet/dockershim/cm/BUILD create mode 100644 pkg/kubelet/dockershim/cm/container_manager.go create mode 100644 pkg/kubelet/dockershim/cm/container_manager_linux.go create mode 100644 pkg/kubelet/dockershim/cm/container_manager_unsupported.go diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index 874bca6577a..8292b314a01 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -424,13 +424,14 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) { return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified") } kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, kubeDeps.CAdvisorInterface, cm.NodeConfig{ - RuntimeCgroupsName: s.RuntimeCgroups, - SystemCgroupsName: s.SystemCgroups, - KubeletCgroupsName: s.KubeletCgroups, - ContainerRuntime: s.ContainerRuntime, - CgroupsPerQOS: s.CgroupsPerQOS, - CgroupRoot: s.CgroupRoot, - ProtectKernelDefaults: s.ProtectKernelDefaults, + RuntimeCgroupsName: s.RuntimeCgroups, + SystemCgroupsName: s.SystemCgroups, + KubeletCgroupsName: s.KubeletCgroups, + ContainerRuntime: s.ContainerRuntime, + CgroupsPerQOS: s.CgroupsPerQOS, + CgroupRoot: s.CgroupRoot, + ProtectKernelDefaults: s.ProtectKernelDefaults, + RuntimeIntegrationType: s.ExperimentalRuntimeIntegrationType, }) if err != nil { return err diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index 6012f7819eb..a58e547205a 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -49,13 +49,14 @@ type ContainerManager interface { } type NodeConfig struct { - RuntimeCgroupsName string - SystemCgroupsName string - KubeletCgroupsName string - ContainerRuntime string - CgroupsPerQOS bool - CgroupRoot string - ProtectKernelDefaults bool + RuntimeCgroupsName string + SystemCgroupsName string + KubeletCgroupsName string + ContainerRuntime string + CgroupsPerQOS bool + CgroupRoot string + ProtectKernelDefaults bool + RuntimeIntegrationType string } type Status struct { diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 7abcfdbf46b..7bfb808bb7f 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -327,7 +327,27 @@ func (cm *containerManagerImpl) setupNode() error { systemContainers := []*systemContainer{} if cm.ContainerRuntime == "docker" { dockerVersion := getDockerVersion(cm.cadvisorInterface) - if cm.RuntimeCgroupsName != "" { + if cm.RuntimeIntegrationType == "cri" { + // If kubelet uses CRI, dockershim will manage the cgroups and oom + // score for the docker processes. + // In the future, NodeSpec should mandate the cgroup that the + // runtime processes need to be in. For now, we still check the + // cgroup for docker periodically, so that kubelet can recognize + // the cgroup for docker and serve stats for the runtime. + // TODO(#27097): Fix this after NodeSpec is clearly defined. + cm.periodicTasks = append(cm.periodicTasks, func() { + glog.V(4).Infof("[ContainerManager]: Adding periodic tasks for docker CRI integration") + cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile) + if err != nil { + glog.Error(err) + return + } + glog.V(2).Infof("[ContainerManager]: Discovered runtime cgroups name: %s", cont) + cm.Lock() + defer cm.Unlock() + cm.RuntimeCgroupsName = cont + }) + } else if cm.RuntimeCgroupsName != "" { cont := newSystemCgroups(cm.RuntimeCgroupsName) var capacity = api.ResourceList{} if info, err := cm.cadvisorInterface.MachineInfo(); err == nil { @@ -353,13 +373,13 @@ func (cm *containerManagerImpl) setupNode() error { }, } cont.ensureStateFunc = func(manager *fs.Manager) error { - return ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer) + return EnsureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer) } systemContainers = append(systemContainers, cont) } else { cm.periodicTasks = append(cm.periodicTasks, func() { glog.V(10).Infof("Adding docker daemon periodic tasks") - if err := ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil { + if err := EnsureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil { glog.Error(err) return } @@ -572,7 +592,10 @@ func getPidsForProcess(name, pidFile string) ([]int, error) { } // Ensures that the Docker daemon is in the desired container. -func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, manager *fs.Manager) error { +// Temporarily export the function to be used by dockershim. +// TODO(yujuhong): Move this function to dockershim once kubelet migrates to +// dockershim as the default. +func EnsureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, manager *fs.Manager) error { type process struct{ name, file string } dockerProcs := []process{{dockerProcessName, dockerPidFile}} if dockerVersion.GTE(containerdVersion) { diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index 672e10c1326..85ca6b203e8 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/kubelet/api:go_default_library", "//pkg/kubelet/api/v1alpha1/runtime:go_default_library", "//pkg/kubelet/container:go_default_library", + "//pkg/kubelet/dockershim/cm:go_default_library", "//pkg/kubelet/dockertools:go_default_library", "//pkg/kubelet/leaky:go_default_library", "//pkg/kubelet/network:go_default_library", diff --git a/pkg/kubelet/dockershim/cm/BUILD b/pkg/kubelet/dockershim/cm/BUILD new file mode 100644 index 00000000000..c12a383c98a --- /dev/null +++ b/pkg/kubelet/dockershim/cm/BUILD @@ -0,0 +1,30 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +load( + "@io_bazel_rules_go//go:def.bzl", + "go_binary", + "go_library", + "go_test", + "cgo_library", +) + +go_library( + name = "go_default_library", + srcs = [ + "container_manager.go", + "container_manager_linux.go", + ], + tags = ["automanaged"], + deps = [ + "//pkg/kubelet/cm:go_default_library", + "//pkg/kubelet/dockertools:go_default_library", + "//pkg/kubelet/qos:go_default_library", + "//pkg/util/wait:go_default_library", + "//vendor:github.com/blang/semver", + "//vendor:github.com/golang/glog", + "//vendor:github.com/opencontainers/runc/libcontainer/cgroups/fs", + "//vendor:github.com/opencontainers/runc/libcontainer/configs", + ], +) diff --git a/pkg/kubelet/dockershim/cm/container_manager.go b/pkg/kubelet/dockershim/cm/container_manager.go new file mode 100644 index 00000000000..91f2ace4d96 --- /dev/null +++ b/pkg/kubelet/dockershim/cm/container_manager.go @@ -0,0 +1,21 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cm + +type ContainerManager interface { + Start() error +} diff --git a/pkg/kubelet/dockershim/cm/container_manager_linux.go b/pkg/kubelet/dockershim/cm/container_manager_linux.go new file mode 100644 index 00000000000..a2d0f95d96e --- /dev/null +++ b/pkg/kubelet/dockershim/cm/container_manager_linux.go @@ -0,0 +1,147 @@ +// +build linux + +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cm + +import ( + "fmt" + "io/ioutil" + "regexp" + "strconv" + "time" + + "github.com/blang/semver" + "github.com/golang/glog" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" + kubecm "k8s.io/kubernetes/pkg/kubelet/cm" + "k8s.io/kubernetes/pkg/kubelet/dockertools" + "k8s.io/kubernetes/pkg/kubelet/qos" + "k8s.io/kubernetes/pkg/util/wait" +) + +const ( + // The percent of the machine memory capacity. + dockerMemoryLimitThresholdPercent = kubecm.DockerMemoryLimitThresholdPercent + + // The minimum memory limit allocated to docker container. + minDockerMemoryLimit = kubecm.MinDockerMemoryLimit + + // The Docker OOM score adjustment. + dockerOOMScoreAdj = qos.DockerOOMScoreAdj +) + +var ( + memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`) +) + +func NewContainerManager(cgroupsName string, client dockertools.DockerInterface) ContainerManager { + return &containerManager{ + cgroupsName: cgroupsName, + client: client, + } +} + +type containerManager struct { + // Docker client. + client dockertools.DockerInterface + // Name of the cgroups. + cgroupsName string + // Manager for the cgroups. + cgroupsManager *fs.Manager +} + +func (m *containerManager) Start() error { + // TODO: check if the required cgroups are mounted. + if len(m.cgroupsName) != 0 { + manager, err := createCgroupManager(m.cgroupsName) + if err != nil { + return err + } + m.cgroupsManager = manager + } + go wait.Until(m.doWork, 5*time.Minute, wait.NeverStop) + return nil +} + +func (m *containerManager) doWork() { + v, err := m.client.Version() + if err != nil { + glog.Errorf("Unable to get docker version: %v", err) + return + } + version, err := semver.Parse(v.Version) + if err != nil { + glog.Errorf("Unable to parse docker version %q: %v", v.Version, err) + return + } + // EnsureDockerInConatiner does two things. + // 1. Ensure processes run in the cgroups if m.cgroupsManager is not nil. + // 2. Ensure processes have the OOM score applied. + if err := kubecm.EnsureDockerInContainer(version, dockerOOMScoreAdj, m.cgroupsManager); err != nil { + glog.Errorf("Unable to ensure the docker processes run in the desired containers") + } +} + +func createCgroupManager(name string) (*fs.Manager, error) { + var memoryLimit uint64 + memoryCapacity, err := getMemoryCapacity() + if err != nil || memoryCapacity*dockerMemoryLimitThresholdPercent/100 < minDockerMemoryLimit { + memoryLimit = minDockerMemoryLimit + } + glog.V(2).Infof("Configure resource-only container %q with memory limit: %d", name, memoryLimit) + + allowAllDevices := true + cm := &fs.Manager{ + Cgroups: &configs.Cgroup{ + Parent: "/", + Name: name, + Resources: &configs.Resources{ + Memory: int64(memoryLimit), + MemorySwap: -1, + AllowAllDevices: &allowAllDevices, + }, + }, + } + return cm, nil +} + +// getMemoryCapacity returns the memory capacity on the machine in bytes. +func getMemoryCapacity() (uint64, error) { + out, err := ioutil.ReadFile("/proc/meminfo") + if err != nil { + return 0, err + } + return parseCapacity(out, memoryCapacityRegexp) +} + +// parseCapacity matches a Regexp in a []byte, returning the resulting value in bytes. +// Assumes that the value matched by the Regexp is in KB. +func parseCapacity(b []byte, r *regexp.Regexp) (uint64, error) { + matches := r.FindSubmatch(b) + if len(matches) != 2 { + return 0, fmt.Errorf("failed to match regexp in output: %q", string(b)) + } + m, err := strconv.ParseUint(string(matches[1]), 10, 64) + if err != nil { + return 0, err + } + + // Convert to bytes. + return m * 1024, err +} diff --git a/pkg/kubelet/dockershim/cm/container_manager_unsupported.go b/pkg/kubelet/dockershim/cm/container_manager_unsupported.go new file mode 100644 index 00000000000..b2a7281ca5c --- /dev/null +++ b/pkg/kubelet/dockershim/cm/container_manager_unsupported.go @@ -0,0 +1,34 @@ +// +build !linux + +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cm + +import ( + "k8s.io/kubernetes/pkg/kubelet/dockertools" +) + +type unsupportedContainerManager struct { +} + +func NewContainerManager(_ string, _ dockertools.DockerInterface) ContainerManager { + return &unsupportedContainerManager{} +} + +func (m *unsupportedContainerManager) Start() error { + return fmt.Errorf("Container Manager is unsupported in this build") +} diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index f96a3f32695..1e222fcaf62 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -26,6 +26,7 @@ import ( internalApi "k8s.io/kubernetes/pkg/kubelet/api" runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + "k8s.io/kubernetes/pkg/kubelet/dockershim/cm" "k8s.io/kubernetes/pkg/kubelet/dockertools" "k8s.io/kubernetes/pkg/kubelet/network" "k8s.io/kubernetes/pkg/kubelet/network/cni" @@ -92,10 +93,11 @@ type NetworkPluginSettings struct { var internalLabelKeys []string = []string{containerTypeLabelKey, containerLogPathLabelKey, sandboxIDLabelKey} // NOTE: Anything passed to DockerService should be eventually handled in another way when we switch to running the shim as a different process. -func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings) (DockerService, error) { +func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings, cgroupsName string) (DockerService, error) { + c := dockertools.NewInstrumentedDockerInterface(client) ds := &dockerService{ seccompProfileRoot: seccompProfileRoot, - client: dockertools.NewInstrumentedDockerInterface(client), + client: c, os: kubecontainer.RealOS{}, podSandboxImage: podSandboxImage, streamingRuntime: &streamingRuntime{ @@ -104,6 +106,7 @@ func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot str // TODO(#35747) - Either deprecate nsenter exec handling, or add support for it here. execHandler: &dockertools.NativeExecHandler{}, }, + containerManager: cm.NewContainerManager(cgroupsName, client), } if streamingConfig != nil { var err error @@ -135,6 +138,7 @@ type DockerService interface { internalApi.RuntimeService internalApi.ImageManagerService DockerLegacyService + Start() error } // DockerLegacyService is an interface that embeds all legacy methods for @@ -142,6 +146,7 @@ type DockerService interface { type DockerLegacyService interface { // Supporting legacy methods for docker. GetContainerLogs(pod *api.Pod, containerID kubecontainer.ContainerID, logOptions *api.PodLogOptions, stdout, stderr io.Writer) (err error) + LegacyExec(containerID kubecontainer.ContainerID, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error LegacyAttach(id kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error LegacyPortForward(sandboxID string, port uint16, stream io.ReadWriteCloser) error @@ -155,6 +160,7 @@ type dockerService struct { streamingRuntime *streamingRuntime streamingServer streaming.Server networkPlugin network.NetworkPlugin + containerManager cm.ContainerManager } // Version returns the runtime name, runtime version and runtime API version @@ -214,3 +220,8 @@ type dockerNetworkHost struct { network.LegacyHost *namespaceGetter } + +// Start initializes and starts components in dockerService. +func (ds *dockerService) Start() error { + return ds.containerManager.Start() +} diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 4751a8b8996..e9b72887e72 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -509,7 +509,10 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub case "cri": // Use the new CRI shim for docker. This is needed for testing the // docker integration through CRI, and may be removed in the future. - dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil, &pluginSettings) + dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil, &pluginSettings, kubeCfg.RuntimeCgroups) + if err != nil { + return nil, err + } runtimeService := dockerService.(internalApi.RuntimeService) imageService := dockerService.(internalApi.ImageManagerService) @@ -537,6 +540,10 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub return nil, err } } + // TODO: Find a better place to start the service. + if err := dockerService.Start(); err != nil { + return nil, err + } // kubelet defers to the runtime shim to setup networking. Setting // this to nil will prevent it from trying to invoke the plugin.