diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 59da8592831..286b0c7317c 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -25,6 +25,17 @@ import ( libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" ) +// CgroupSubsystems holds information about the mounted cgroup subsytems +type CgroupSubsystems struct { + // Cgroup subsystem mounts. + // e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"] + Mounts []libcontainercgroups.Mount + + // Cgroup subsystem to their mount location. + // e.g.: "cpu" -> "/sys/fs/cgroup/cpu" + MountPoints map[string]string +} + // cgroupManagerImpl implements the CgroupManager interface. // Its a stateless object which can be used to // update,create or delete any number of cgroups @@ -32,14 +43,14 @@ import ( type cgroupManagerImpl struct { // subsystems holds information about all the // mounted cgroup subsytems on the node - subsystems *cgroupSubsystems + subsystems *CgroupSubsystems } // Make sure that cgroupManagerImpl implements the CgroupManager interface var _ CgroupManager = &cgroupManagerImpl{} // NewCgroupManager is a factory method that returns a CgroupManager -func NewCgroupManager(cs *cgroupSubsystems) CgroupManager { +func NewCgroupManager(cs *CgroupSubsystems) CgroupManager { return &cgroupManagerImpl{ subsystems: cs, } @@ -48,8 +59,8 @@ func NewCgroupManager(cs *cgroupSubsystems) CgroupManager { // Exists checks if all subsystem cgroups already exist func (m *cgroupManagerImpl) Exists(name string) bool { // Get map of all cgroup paths on the system for the particular cgroup - cgroupPaths := make(map[string]string, len(m.subsystems.mountPoints)) - for key, val := range m.subsystems.mountPoints { + cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints)) + for key, val := range m.subsystems.MountPoints { cgroupPaths[key] = path.Join(val, name) } @@ -68,8 +79,8 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error { name := cgroupConfig.Name // Get map of all cgroup paths on the system for the particular cgroup - cgroupPaths := make(map[string]string, len(m.subsystems.mountPoints)) - for key, val := range m.subsystems.mountPoints { + cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints)) + for key, val := range m.subsystems.MountPoints { cgroupPaths[key] = path.Join(val, name) } @@ -98,7 +109,7 @@ type subsystem interface { } // Cgroup subsystems we currently support -var supportedSubsystems []subsystem = []subsystem{ +var supportedSubsystems = []subsystem{ &cgroupfs.MemoryGroup{}, &cgroupfs.CpuGroup{}, } @@ -142,11 +153,18 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { resources.CpuQuota = *resourceConfig.CpuQuota } + // Get map of all cgroup paths on the system for the particular cgroup + cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints)) + for key, val := range m.subsystems.MountPoints { + cgroupPaths[key] = path.Join(val, name) + } + // Initialize libcontainer's cgroup config libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ Name: path.Base(name), Parent: path.Dir(name), Resources: resources, + Paths: cgroupPaths, } if err := setSupportedSubsytems(libcontainerCgroupConfig); err != nil { @@ -177,7 +195,7 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { // It creates cgroup files for each subsytems and writes the pid // in the tasks file. We use the function to create all the required // cgroup files but not attach any "real" pid to the cgroup. - if err := fsCgroupManager.Apply(0); err != nil { + if err := fsCgroupManager.Apply(-1); err != nil { return fmt.Errorf("Failed to apply cgroup config for %v: %v", name, err) } return nil diff --git a/pkg/kubelet/cm/cgroup_manager_unsupported.go b/pkg/kubelet/cm/cgroup_manager_unsupported.go index 650aeca6c22..609c0155df0 100644 --- a/pkg/kubelet/cm/cgroup_manager_unsupported.go +++ b/pkg/kubelet/cm/cgroup_manager_unsupported.go @@ -25,6 +25,11 @@ type unsupportedCgroupManager struct{} // Make sure that unsupportedCgroupManager implements the CgroupManager interface var _ CgroupManager = &unsupportedCgroupManager{} +type CgroupSubsystems struct { + Mounts []interface{} + MountPoints map[string]string +} + func NewCgroupManager(_ interface{}) CgroupManager { return &unsupportedCgroupManager{} } diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index 408636d24fb..6012f7819eb 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -25,7 +25,7 @@ type ContainerManager interface { // Runs the container manager's housekeeping. // - Ensures that the Docker daemon is in a container. // - Creates the system container where all non-containerized processes run. - Start() error + Start(*api.Node) error // Returns resources allocated to system cgroups in the machine. // These cgroups include the system and Kubernetes services. @@ -36,6 +36,16 @@ type ContainerManager interface { // Returns internal Status. Status() Status + + // NewPodContainerManager is a factory method which returns a podContainerManager object + // Returns a noop implementation if qos cgroup hierarchy is not enabled + NewPodContainerManager() PodContainerManager + + // GetMountedSubsystems returns the mounted cgroup subsytems on the node + GetMountedSubsystems() *CgroupSubsystems + + // GetQOSContainersInfo returns the names of top level QoS containers + GetQOSContainersInfo() QOSContainersInfo } type NodeConfig struct { diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 79accf4b7f8..b5b6205fbdf 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -100,7 +100,8 @@ type containerManagerImpl struct { qosContainers QOSContainersInfo periodicTasks []func() // holds all the mounted cgroup subsystems - subsystems *cgroupSubsystems + subsystems *CgroupSubsystems + nodeInfo *api.Node } type features struct { @@ -173,9 +174,9 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I return nil, fmt.Errorf("invalid configuration: cgroup-root doesn't exist : %v", err) } } - subsystems, err := getCgroupSubsystems() + subsystems, err := GetCgroupSubsystems() if err != nil { - return nil, fmt.Errorf("failed to get mounted subsystems: %v", err) + return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err) } return &containerManagerImpl{ cadvisorInterface: cadvisorInterface, @@ -185,6 +186,23 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I }, nil } +// NewPodContainerManager is a factory method returns a PodContainerManager object +// If qosCgroups are enabled then it returns the general pod container manager +// otherwise it returns a no-op manager which essentially does nothing +func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { + if cm.NodeConfig.CgroupsPerQOS { + return &podContainerManagerImpl{ + qosContainersInfo: cm.qosContainers, + nodeInfo: cm.nodeInfo, + subsystems: cm.subsystems, + cgroupManager: NewCgroupManager(cm.subsystems), + } + } + return &podContainerManagerNoop{ + cgroupRoot: cm.NodeConfig.CgroupRoot, + } +} + // Create a cgroup container manager. func createManager(containerName string) *fs.Manager { allowAllDevices := true @@ -213,7 +231,7 @@ const ( // RootContainer by default. InitQOS is called only once during kubelet bootstrapping. // TODO(@dubstack) Add support for cgroup-root to work on both systemd and cgroupfs // drivers. Currently we only support systems running cgroupfs driver -func InitQOS(rootContainer string, subsystems *cgroupSubsystems) (QOSContainersInfo, error) { +func InitQOS(rootContainer string, subsystems *CgroupSubsystems) (QOSContainersInfo, error) { cm := NewCgroupManager(subsystems) // Top level for Qos containers are created only for Burstable // and Best Effort classes @@ -425,13 +443,24 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig { return cm.NodeConfig } +func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems { + return cm.subsystems +} + +func (cm *containerManagerImpl) GetQOSContainersInfo() QOSContainersInfo { + return cm.qosContainers +} + func (cm *containerManagerImpl) Status() Status { cm.RLock() defer cm.RUnlock() return cm.status } -func (cm *containerManagerImpl) Start() error { +func (cm *containerManagerImpl) Start(node *api.Node) error { + // cache the node Info including resource capacity and + // allocatable of the node + cm.nodeInfo = node // Setup the node if err := cm.setupNode(); err != nil { return err diff --git a/pkg/kubelet/cm/container_manager_stub.go b/pkg/kubelet/cm/container_manager_stub.go index 977b713fa8e..186d773dbe9 100644 --- a/pkg/kubelet/cm/container_manager_stub.go +++ b/pkg/kubelet/cm/container_manager_stub.go @@ -25,7 +25,7 @@ type containerManagerStub struct{} var _ ContainerManager = &containerManagerStub{} -func (cm *containerManagerStub) Start() error { +func (cm *containerManagerStub) Start(_ *api.Node) error { glog.V(2).Infof("Starting stub container manager") return nil } @@ -38,10 +38,22 @@ func (cm *containerManagerStub) GetNodeConfig() NodeConfig { return NodeConfig{} } +func (cm *containerManagerStub) GetMountedSubsystems() *CgroupSubsystems { + return &CgroupSubsystems{} +} + +func (cm *containerManagerStub) GetQOSContainersInfo() QOSContainersInfo { + return QOSContainersInfo{} +} + func (cm *containerManagerStub) Status() Status { return Status{} } +func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager { + return &podContainerManagerStub{} +} + func NewStubContainerManager() ContainerManager { return &containerManagerStub{} } diff --git a/pkg/kubelet/cm/container_manager_unsupported.go b/pkg/kubelet/cm/container_manager_unsupported.go index fe6cb808fa1..4118a998f8c 100644 --- a/pkg/kubelet/cm/container_manager_unsupported.go +++ b/pkg/kubelet/cm/container_manager_unsupported.go @@ -31,7 +31,7 @@ type unsupportedContainerManager struct { var _ ContainerManager = &unsupportedContainerManager{} -func (unsupportedContainerManager) Start() error { +func (unsupportedContainerManager) Start(_ *api.Node) error { return fmt.Errorf("Container Manager is unsupported in this build") } @@ -43,10 +43,22 @@ func (unsupportedContainerManager) GetNodeConfig() NodeConfig { return NodeConfig{} } +func (unsupportedContainerManager) GetMountedSubsystems() *CgroupSubsystems { + return &CgroupSubsystems{} +} + +func (unsupportedContainerManager) GetQOSContainersInfo() QOSContainersInfo { + return QOSContainersInfo{} +} + func (cm *unsupportedContainerManager) Status() Status { return Status{} } +func (cm *unsupportedContainerManager) NewPodContainerManager() PodContainerManager { + return &unsupportedPodContainerManager{} +} + func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig) (ContainerManager, error) { return &unsupportedContainerManager{}, nil } diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go index 92b3c7565c0..561244c2811 100644 --- a/pkg/kubelet/cm/helpers_linux.go +++ b/pkg/kubelet/cm/helpers_linux.go @@ -22,37 +22,25 @@ import ( libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" ) -// cgroupSubsystems holds information about the mounted cgroup subsytems -type cgroupSubsystems struct { - // Cgroup subsystem mounts. - // e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"] - mounts []libcontainercgroups.Mount - - // Cgroup subsystem to their mount location. - // e.g.: "cpu" -> "/sys/fs/cgroup/cpu" - mountPoints map[string]string -} - // GetCgroupSubsystems returns information about the mounted cgroup subsystems -func getCgroupSubsystems() (*cgroupSubsystems, error) { - // Get all cgroup mounts. +func GetCgroupSubsystems() (*CgroupSubsystems, error) { + // get all cgroup mounts. allCgroups, err := libcontainercgroups.GetCgroupMounts() if err != nil { - return &cgroupSubsystems{}, err + return &CgroupSubsystems{}, err } if len(allCgroups) == 0 { - return &cgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts") + return &CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts") } - //TODO(@dubstack) should we trim to only the supported ones mountPoints := make(map[string]string, len(allCgroups)) for _, mount := range allCgroups { for _, subsystem := range mount.Subsystems { mountPoints[subsystem] = mount.Mountpoint } } - return &cgroupSubsystems{ - mounts: allCgroups, - mountPoints: mountPoints, + return &CgroupSubsystems{ + Mounts: allCgroups, + MountPoints: mountPoints, }, nil } diff --git a/pkg/kubelet/cm/pod_container_manager_linux.go b/pkg/kubelet/cm/pod_container_manager_linux.go new file mode 100644 index 00000000000..f02ea085feb --- /dev/null +++ b/pkg/kubelet/cm/pod_container_manager_linux.go @@ -0,0 +1,145 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cm + +import ( + "fmt" + "path" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/kubelet/qos" +) + +const ( + podCgroupNamePrefix = "pod#" +) + +// podContainerManagerImpl implements podContainerManager interface. +// It is the general implementation which allows pod level container +// management if qos Cgroup is enabled. +type podContainerManagerImpl struct { + // nodeInfo stores information about the node resource capacity + nodeInfo *api.Node + // qosContainersInfo hold absolute paths of the top level qos containers + qosContainersInfo QOSContainersInfo + // Stores the mounted cgroup subsystems + subsystems *CgroupSubsystems + // cgroupManager is the cgroup Manager Object responsible for managing all + // pod cgroups. + cgroupManager CgroupManager +} + +// Make sure that podContainerManagerImpl implements the PodContainerManager interface +var _ PodContainerManager = &podContainerManagerImpl{} + +// applyLimits sets pod cgroup resource limits +// It also updates the resource limits on top level qos containers. +func (m *podContainerManagerImpl) applyLimits(pod *api.Pod) error { + // This function will house the logic for setting the resource parameters + // on the pod container config and updating top level qos container configs + return nil +} + +// Exists checks if the pod's cgroup already exists +func (m *podContainerManagerImpl) Exists(pod *api.Pod) bool { + podContainerName := m.GetPodContainerName(pod) + return m.cgroupManager.Exists(podContainerName) +} + +// EnsureExists takes a pod as argument and makes sure that +// pod cgroup exists if qos cgroup hierarchy flag is enabled. +// If the pod level container doesen't already exist it is created. +func (m *podContainerManagerImpl) EnsureExists(pod *api.Pod) error { + podContainerName := m.GetPodContainerName(pod) + // check if container already exist + alreadyExists := m.Exists(pod) + if !alreadyExists { + // Create the pod container + containerConfig := &CgroupConfig{ + Name: podContainerName, + ResourceParameters: &ResourceConfig{}, + } + if err := m.cgroupManager.Create(containerConfig); err != nil { + return fmt.Errorf("failed to create container for %v : %v", podContainerName, err) + } + } + // Apply appropriate resource limits on the pod container + // Top level qos containers limits are not updated + // until we figure how to maintain the desired state in the kubelet. + // Because maintaining the desired state is difficult without checkpointing. + if err := m.applyLimits(pod); err != nil { + return fmt.Errorf("failed to apply resource limits on container for %v : %v", podContainerName, err) + } + return nil +} + +// GetPodContainerName is a util func takes in a pod as an argument +// and returns the pod's cgroup name. We follow a pod cgroup naming format +// which is opaque and deterministic. Given a pod it's cgroup would be named +// "pod-UID" where the UID is the Pod UID +func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) string { + podQOS := qos.GetPodQOS(pod) + // Get the parent QOS container name + var parentContainer string + switch podQOS { + case qos.Guaranteed: + parentContainer = m.qosContainersInfo.Guaranteed + case qos.Burstable: + parentContainer = m.qosContainersInfo.Burstable + case qos.BestEffort: + parentContainer = m.qosContainersInfo.BestEffort + } + podContainer := podCgroupNamePrefix + string(pod.UID) + // Get the absolute path of the cgroup + return path.Join(parentContainer, podContainer) +} + +// Destroy destroys the pod container cgroup paths +func (m *podContainerManagerImpl) Destroy(podCgroup string) error { + // This will house the logic for destroying the pod cgroups. + // Will be handled in the next PR. + return nil +} + +// podContainerManagerNoop implements podContainerManager interface. +// It is a no-op implementation and basically does nothing +// podContainerManagerNoop is used in case the QoS cgroup Hierarchy is not +// enabled, so Exists() returns true always as the cgroupRoot +// is expected to always exist. +type podContainerManagerNoop struct { + cgroupRoot string +} + +// Make sure that podContainerManagerStub implements the PodContainerManager interface +var _ PodContainerManager = &podContainerManagerNoop{} + +func (m *podContainerManagerNoop) Exists(_ *api.Pod) bool { + return true +} + +func (m *podContainerManagerNoop) EnsureExists(_ *api.Pod) error { + return nil +} + +func (m *podContainerManagerNoop) GetPodContainerName(_ *api.Pod) string { + return m.cgroupRoot +} + +// Destroy destroys the pod container cgroup paths +func (m *podContainerManagerNoop) Destroy(_ string) error { + return nil +} diff --git a/pkg/kubelet/cm/pod_container_manager_stub.go b/pkg/kubelet/cm/pod_container_manager_stub.go new file mode 100644 index 00000000000..a6432582b51 --- /dev/null +++ b/pkg/kubelet/cm/pod_container_manager_stub.go @@ -0,0 +1,40 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cm + +import "k8s.io/kubernetes/pkg/api" + +type podContainerManagerStub struct { +} + +var _ PodContainerManager = &podContainerManagerStub{} + +func (m *podContainerManagerStub) Exists(_ *api.Pod) bool { + return true +} + +func (m *podContainerManagerStub) EnsureExists(_ *api.Pod) error { + return nil +} + +func (m *podContainerManagerStub) GetPodContainerName(_ *api.Pod) string { + return "" +} + +func (m *podContainerManagerStub) Destroy(_ string) error { + return nil +} diff --git a/pkg/kubelet/cm/pod_container_manager_unsupported.go b/pkg/kubelet/cm/pod_container_manager_unsupported.go new file mode 100644 index 00000000000..c68ae2cdbce --- /dev/null +++ b/pkg/kubelet/cm/pod_container_manager_unsupported.go @@ -0,0 +1,42 @@ +// +build !linux + +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cm + +import "k8s.io/kubernetes/pkg/api" + +type unsupportedPodContainerManager struct { +} + +var _ PodContainerManager = &unsupportedPodContainerManager{} + +func (m *unsupportedPodContainerManager) Exists(_ *api.Pod) bool { + return true +} + +func (m *unsupportedPodContainerManager) EnsureExists(_ *api.Pod) error { + return nil +} + +func (m *unsupportedPodContainerManager) GetPodContainerName(_ *api.Pod) string { + return "" +} + +func (m *unsupportedPodContainerManager) Destroy(_ string) error { + return nil +} diff --git a/pkg/kubelet/cm/types.go b/pkg/kubelet/cm/types.go index 745870bcdcc..bf4d5743594 100644 --- a/pkg/kubelet/cm/types.go +++ b/pkg/kubelet/cm/types.go @@ -16,6 +16,10 @@ limitations under the License. package cm +import ( + "k8s.io/kubernetes/pkg/api" +) + // ResourceConfig holds information about all the supported cgroup resource parameters. type ResourceConfig struct { // Memory limit (in bytes). @@ -57,9 +61,27 @@ type CgroupManager interface { Exists(string) bool } -// QOSContainersInfo hold the names of containers per qos +// QOSContainersInfo stores the names of containers per qos type QOSContainersInfo struct { Guaranteed string BestEffort string Burstable string } + +// PodContainerManager stores and manages pod level containers +// The Pod workers interact with the PodContainerManager to create and destroy +// containers for the pod. +type PodContainerManager interface { + // getPodContainerName returns the pod container's absolute name + GetPodContainerName(*api.Pod) string + + // EnsureExists takes a pod as argument and makes sure that + // pod cgroup exists if qos cgroup hierarchy flag is enabled. + // If the pod cgroup doesen't already exist this method creates it. + EnsureExists(*api.Pod) error + + Exists(*api.Pod) bool + + //Destroy takes a pod as argument and destroys the pod's container. + Destroy(string) error +} diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 7e53cfec2fa..f5922169b15 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -934,7 +934,13 @@ func (kl *Kubelet) initializeModules() error { } // Step 5: Start container manager. - if err := kl.containerManager.Start(); err != nil { + node, err := kl.getNodeAnyWay() + if err != nil { + glog.Errorf("Cannot get Node info: %v", err) + return fmt.Errorf("Kubelet failed to get node info.") + } + + if err := kl.containerManager.Start(node); err != nil { return fmt.Errorf("Failed to start ContainerManager %v", err) }