From b36c5cbbec85c46cd099d14c41278fe311af922f Mon Sep 17 00:00:00 2001 From: Harry Zhang Date: Thu, 8 Dec 2016 16:59:41 +0800 Subject: [PATCH] Enable pod qos for systemd in cri Check kubelet config with docker config --- pkg/kubelet/dockershim/BUILD | 1 + pkg/kubelet/dockershim/docker_container.go | 8 +++-- pkg/kubelet/dockershim/docker_sandbox.go | 7 ++-- pkg/kubelet/dockershim/docker_service.go | 41 +++++++++++++++++++++- pkg/kubelet/dockertools/docker_manager.go | 2 +- pkg/kubelet/kubelet.go | 3 +- 6 files changed, 54 insertions(+), 8 deletions(-) diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index 014698a6101..25ab9ed1026 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -28,6 +28,7 @@ go_library( "//pkg/apis/componentconfig:go_default_library", "//pkg/kubelet/api:go_default_library", "//pkg/kubelet/api/v1alpha1/runtime:go_default_library", + "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/dockershim/cm:go_default_library", "//pkg/kubelet/dockertools:go_default_library", diff --git a/pkg/kubelet/dockershim/docker_container.go b/pkg/kubelet/dockershim/docker_container.go index 9f2a153d133..0e8f80814f9 100644 --- a/pkg/kubelet/dockershim/docker_container.go +++ b/pkg/kubelet/dockershim/docker_container.go @@ -149,9 +149,11 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeapi // Apply cgroupsParent derived from the sandbox config. if lc := sandboxConfig.GetLinux(); lc != nil { // Apply Cgroup options. - // TODO: Check if this works with per-pod cgroups. - // TODO: we need to pass the cgroup in syntax expected by cgroup driver but shim does not use docker info yet... - hc.CgroupParent = lc.GetCgroupParent() + cgroupParent, err := ds.GenerateExpectedCgroupParent(lc.GetCgroupParent()) + if err != nil { + return "", fmt.Errorf("failed to generate cgroup parent in expected syntax for container %q: %v", config.Metadata.GetName(), err) + } + hc.CgroupParent = cgroupParent } // Set devices for container. diff --git a/pkg/kubelet/dockershim/docker_sandbox.go b/pkg/kubelet/dockershim/docker_sandbox.go index 1c0014da92b..1e4d4ce3af0 100644 --- a/pkg/kubelet/dockershim/docker_sandbox.go +++ b/pkg/kubelet/dockershim/docker_sandbox.go @@ -291,8 +291,11 @@ func (ds *dockerService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([] // applySandboxLinuxOptions applies LinuxPodSandboxConfig to dockercontainer.HostConfig and dockercontainer.ContainerCreateConfig. func (ds *dockerService) applySandboxLinuxOptions(hc *dockercontainer.HostConfig, lc *runtimeapi.LinuxPodSandboxConfig, createConfig *dockertypes.ContainerCreateConfig, image string) error { // Apply Cgroup options. - // TODO: Check if this works with per-pod cgroups. - hc.CgroupParent = lc.GetCgroupParent() + cgroupParent, err := ds.GenerateExpectedCgroupParent(lc.GetCgroupParent()) + if err != nil { + return err + } + hc.CgroupParent = cgroupParent // Apply security context. applySandboxSecurityContext(lc, createConfig.Config, hc, ds.networkPlugin) diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index 394c30fe059..9635eaa7a12 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -26,6 +26,7 @@ import ( "k8s.io/kubernetes/pkg/apis/componentconfig" internalapi "k8s.io/kubernetes/pkg/kubelet/api" runtimeapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" + kubecm "k8s.io/kubernetes/pkg/kubelet/cm" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/dockershim/cm" "k8s.io/kubernetes/pkg/kubelet/dockertools" @@ -100,7 +101,8 @@ type NetworkPluginSettings struct { var internalLabelKeys []string = []string{containerTypeLabelKey, containerLogPathLabelKey, sandboxIDLabelKey} // NOTE: Anything passed to DockerService should be eventually handled in another way when we switch to running the shim as a different process. -func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings, cgroupsName string) (DockerService, error) { +func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, + pluginSettings *NetworkPluginSettings, cgroupsName string, kubeCgroupDriver string) (DockerService, error) { c := dockertools.NewInstrumentedDockerInterface(client) ds := &dockerService{ seccompProfileRoot: seccompProfileRoot, @@ -135,6 +137,22 @@ func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot str } ds.networkPlugin = plug glog.Infof("Docker cri networking managed by %v", plug.Name()) + + // NOTE: cgroup driver is only detectable in docker 1.11+ + var cgroupDriver string + dockerInfo, err := ds.client.Info() + if err != nil { + glog.Errorf("failed to execute Info() call to the Docker client: %v", err) + glog.Warningf("Using fallback default of cgroupfs as cgroup driver") + } else { + cgroupDriver = dockerInfo.CgroupDriver + if len(kubeCgroupDriver) != 0 && kubeCgroupDriver != cgroupDriver { + return nil, fmt.Errorf("misconfiguration: kubelet cgroup driver: %q is different from docker cgroup driver: %q", kubeCgroupDriver, cgroupDriver) + } + glog.Infof("Setting cgroupDriver to %s", cgroupDriver) + } + ds.cgroupDriver = cgroupDriver + return ds, nil } @@ -157,6 +175,8 @@ type dockerService struct { streamingServer streaming.Server networkPlugin network.NetworkPlugin containerManager cm.ContainerManager + // cgroup driver used by Docker runtime. + cgroupDriver string } // Version returns the runtime name, runtime version and runtime API version @@ -254,3 +274,22 @@ func (ds *dockerService) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.NotFound(w, r) } } + +// GenerateExpectedCgroupParent returns cgroup parent in syntax expected by cgroup driver +func (ds *dockerService) GenerateExpectedCgroupParent(cgroupParent string) (string, error) { + if len(cgroupParent) > 0 { + // if docker uses the systemd cgroup driver, it expects *.slice style names for cgroup parent. + // if we configured kubelet to use --cgroup-driver=cgroupfs, and docker is configured to use systemd driver + // docker will fail to launch the container because the name we provide will not be a valid slice. + // this is a very good thing. + if ds.cgroupDriver == "systemd" { + systemdCgroupParent, err := kubecm.ConvertCgroupFsNameToSystemd(cgroupParent) + if err != nil { + return "", err + } + cgroupParent = systemdCgroupParent + } + } + glog.V(3).Infof("Setting cgroup parent to: %q", cgroupParent) + return cgroupParent, nil +} diff --git a/pkg/kubelet/dockertools/docker_manager.go b/pkg/kubelet/dockertools/docker_manager.go index 9b9abb6076b..f515db814fc 100644 --- a/pkg/kubelet/dockertools/docker_manager.go +++ b/pkg/kubelet/dockertools/docker_manager.go @@ -243,7 +243,7 @@ func NewDockerManager( // if there are any problems. dockerRoot := "/var/lib/docker" - // cgroup driver is only detectable in docker 1.12+ + // cgroup driver is only detectable in docker 1.11+ // when the execution driver is not detectable, we provide the cgroupfs form. // if your docker engine is configured to use the systemd cgroup driver, and you // want to use pod level cgroups, you must be on docker 1.12+ to ensure cgroup-parent diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index ff607a52b46..5317bcc613a 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -538,7 +538,8 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub case "docker": streamingConfig := getStreamingConfig(kubeCfg, kubeDeps) // Use the new CRI shim for docker. - ds, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, streamingConfig, &pluginSettings, kubeCfg.RuntimeCgroups) + ds, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, + streamingConfig, &pluginSettings, kubeCfg.RuntimeCgroups, kubeCfg.CgroupDriver) if err != nil { return nil, err }