Add support for pod container management

2025-09-13 13:14:05 +00:00 · 2016-07-12 21:39:22 -07:00
parent 0bef4243cd
commit 4ddfe172ce
12 changed files with 366 additions and 37 deletions
--- a/pkg/kubelet/cm/cgroup_manager_linux.go
+++ b/pkg/kubelet/cm/cgroup_manager_linux.go
@@ -25,6 +25,17 @@ import (
 	libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
 )

+// CgroupSubsystems holds information about the mounted cgroup subsytems
+type CgroupSubsystems struct {
+	// Cgroup subsystem mounts.
+	// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
+	Mounts []libcontainercgroups.Mount
+
+	// Cgroup subsystem to their mount location.
+	// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
+	MountPoints map[string]string
+}
+
 // cgroupManagerImpl implements the CgroupManager interface.
 // Its a stateless object which can be used to
 // update,create or delete any number of cgroups
@@ -32,14 +43,14 @@ import (
 type cgroupManagerImpl struct {
 	// subsystems holds information about all the
 	// mounted cgroup subsytems on the node
-	subsystems *cgroupSubsystems
+	subsystems *CgroupSubsystems
 }

 // Make sure that cgroupManagerImpl implements the CgroupManager interface
 var _ CgroupManager = &cgroupManagerImpl{}

 // NewCgroupManager is a factory method that returns a CgroupManager
-func NewCgroupManager(cs *cgroupSubsystems) CgroupManager {
+func NewCgroupManager(cs *CgroupSubsystems) CgroupManager {
 	return &cgroupManagerImpl{
 		subsystems: cs,
 	}
@@ -48,8 +59,8 @@ func NewCgroupManager(cs *cgroupSubsystems) CgroupManager {
 // Exists checks if all subsystem cgroups already exist
 func (m *cgroupManagerImpl) Exists(name string) bool {
 	// Get map of all cgroup paths on the system for the particular cgroup
-	cgroupPaths := make(map[string]string, len(m.subsystems.mountPoints))
-	for key, val := range m.subsystems.mountPoints {
+	cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints))
+	for key, val := range m.subsystems.MountPoints {
 		cgroupPaths[key] = path.Join(val, name)
 	}

@@ -68,8 +79,8 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error {
 	name := cgroupConfig.Name

 	// Get map of all cgroup paths on the system for the particular cgroup
-	cgroupPaths := make(map[string]string, len(m.subsystems.mountPoints))
-	for key, val := range m.subsystems.mountPoints {
+	cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints))
+	for key, val := range m.subsystems.MountPoints {
 		cgroupPaths[key] = path.Join(val, name)
 	}

@@ -98,7 +109,7 @@ type subsystem interface {
 }

 // Cgroup subsystems we currently support
-var supportedSubsystems []subsystem = []subsystem{
+var supportedSubsystems = []subsystem{
 	&cgroupfs.MemoryGroup{},
 	&cgroupfs.CpuGroup{},
 }
@@ -142,11 +153,18 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
 		resources.CpuQuota = *resourceConfig.CpuQuota
 	}

+	// Get map of all cgroup paths on the system for the particular cgroup
+	cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints))
+	for key, val := range m.subsystems.MountPoints {
+		cgroupPaths[key] = path.Join(val, name)
+	}
+
 	// Initialize libcontainer's cgroup config
 	libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
 		Name:      path.Base(name),
 		Parent:    path.Dir(name),
 		Resources: resources,
+		Paths:     cgroupPaths,
 	}

 	if err := setSupportedSubsytems(libcontainerCgroupConfig); err != nil {
@@ -177,7 +195,7 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
 	// It creates cgroup files for each subsytems and writes the pid
 	// in the tasks file. We use the function to create all the required
 	// cgroup files but not attach any "real" pid to the cgroup.
-	if err := fsCgroupManager.Apply(0); err != nil {
+	if err := fsCgroupManager.Apply(-1); err != nil {
 		return fmt.Errorf("Failed to apply cgroup config for %v: %v", name, err)
 	}
 	return nil
--- a/pkg/kubelet/cm/cgroup_manager_unsupported.go
+++ b/pkg/kubelet/cm/cgroup_manager_unsupported.go
@@ -25,6 +25,11 @@ type unsupportedCgroupManager struct{}
 // Make sure that unsupportedCgroupManager implements the CgroupManager interface
 var _ CgroupManager = &unsupportedCgroupManager{}

+type CgroupSubsystems struct {
+	Mounts      []interface{}
+	MountPoints map[string]string
+}
+
 func NewCgroupManager(_ interface{}) CgroupManager {
 	return &unsupportedCgroupManager{}
 }
--- a/pkg/kubelet/cm/container_manager.go
+++ b/pkg/kubelet/cm/container_manager.go
@@ -25,7 +25,7 @@ type ContainerManager interface {
 	// Runs the container manager's housekeeping.
 	// - Ensures that the Docker daemon is in a container.
 	// - Creates the system container where all non-containerized processes run.
-	Start() error
+	Start(*api.Node) error

 	// Returns resources allocated to system cgroups in the machine.
 	// These cgroups include the system and Kubernetes services.
@@ -36,6 +36,16 @@ type ContainerManager interface {

 	// Returns internal Status.
 	Status() Status
+
+	// NewPodContainerManager is a factory method which returns a podContainerManager object
+	// Returns a noop implementation if qos cgroup hierarchy is not enabled
+	NewPodContainerManager() PodContainerManager
+
+	// GetMountedSubsystems returns the mounted cgroup subsytems on the node
+	GetMountedSubsystems() *CgroupSubsystems
+
+	// GetQOSContainersInfo returns the names of top level QoS containers
+	GetQOSContainersInfo() QOSContainersInfo
 }

 type NodeConfig struct {
--- a/pkg/kubelet/cm/container_manager_linux.go
+++ b/pkg/kubelet/cm/container_manager_linux.go
@@ -100,7 +100,8 @@ type containerManagerImpl struct {
 	qosContainers    QOSContainersInfo
 	periodicTasks    []func()
 	// holds all the mounted cgroup subsystems
-	subsystems *cgroupSubsystems
+	subsystems *CgroupSubsystems
+	nodeInfo   *api.Node
 }

 type features struct {
@@ -173,9 +174,9 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
 			return nil, fmt.Errorf("invalid configuration: cgroup-root doesn't exist : %v", err)
 		}
 	}
-	subsystems, err := getCgroupSubsystems()
+	subsystems, err := GetCgroupSubsystems()
 	if err != nil {
-		return nil, fmt.Errorf("failed to get mounted subsystems: %v", err)
+		return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err)
 	}
 	return &containerManagerImpl{
 		cadvisorInterface: cadvisorInterface,
@@ -185,6 +186,23 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
 	}, nil
 }

+// NewPodContainerManager is a factory method returns a PodContainerManager object
+// If qosCgroups are enabled then it returns the general pod container manager
+// otherwise it returns a no-op manager which essentially does nothing
+func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
+	if cm.NodeConfig.CgroupsPerQOS {
+		return &podContainerManagerImpl{
+			qosContainersInfo: cm.qosContainers,
+			nodeInfo:          cm.nodeInfo,
+			subsystems:        cm.subsystems,
+			cgroupManager:     NewCgroupManager(cm.subsystems),
+		}
+	}
+	return &podContainerManagerNoop{
+		cgroupRoot: cm.NodeConfig.CgroupRoot,
+	}
+}
+
 // Create a cgroup container manager.
 func createManager(containerName string) *fs.Manager {
 	allowAllDevices := true
@@ -213,7 +231,7 @@ const (
 // RootContainer by default. InitQOS is called only once during kubelet bootstrapping.
 // TODO(@dubstack) Add support for cgroup-root to work on both systemd and cgroupfs
 // drivers. Currently we only support systems running cgroupfs driver
-func InitQOS(rootContainer string, subsystems *cgroupSubsystems) (QOSContainersInfo, error) {
+func InitQOS(rootContainer string, subsystems *CgroupSubsystems) (QOSContainersInfo, error) {
 	cm := NewCgroupManager(subsystems)
 	// Top level for Qos containers are created only for Burstable
 	// and Best Effort classes
@@ -425,13 +443,24 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig {
 	return cm.NodeConfig
 }

+func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems {
+	return cm.subsystems
+}
+
+func (cm *containerManagerImpl) GetQOSContainersInfo() QOSContainersInfo {
+	return cm.qosContainers
+}
+
 func (cm *containerManagerImpl) Status() Status {
 	cm.RLock()
 	defer cm.RUnlock()
 	return cm.status
 }

-func (cm *containerManagerImpl) Start() error {
+func (cm *containerManagerImpl) Start(node *api.Node) error {
+	// cache the node Info including resource capacity and
+	// allocatable of the node
+	cm.nodeInfo = node
 	// Setup the node
 	if err := cm.setupNode(); err != nil {
 		return err
--- a/pkg/kubelet/cm/container_manager_stub.go
+++ b/pkg/kubelet/cm/container_manager_stub.go
@@ -25,7 +25,7 @@ type containerManagerStub struct{}

 var _ ContainerManager = &containerManagerStub{}

-func (cm *containerManagerStub) Start() error {
+func (cm *containerManagerStub) Start(_ *api.Node) error {
 	glog.V(2).Infof("Starting stub container manager")
 	return nil
 }
@@ -38,10 +38,22 @@ func (cm *containerManagerStub) GetNodeConfig() NodeConfig {
 	return NodeConfig{}
 }

+func (cm *containerManagerStub) GetMountedSubsystems() *CgroupSubsystems {
+	return &CgroupSubsystems{}
+}
+
+func (cm *containerManagerStub) GetQOSContainersInfo() QOSContainersInfo {
+	return QOSContainersInfo{}
+}
+
 func (cm *containerManagerStub) Status() Status {
 	return Status{}
 }

+func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager {
+	return &podContainerManagerStub{}
+}
+
 func NewStubContainerManager() ContainerManager {
 	return &containerManagerStub{}
 }
--- a/pkg/kubelet/cm/container_manager_unsupported.go
+++ b/pkg/kubelet/cm/container_manager_unsupported.go
@@ -31,7 +31,7 @@ type unsupportedContainerManager struct {

 var _ ContainerManager = &unsupportedContainerManager{}

-func (unsupportedContainerManager) Start() error {
+func (unsupportedContainerManager) Start(_ *api.Node) error {
 	return fmt.Errorf("Container Manager is unsupported in this build")
 }

@@ -43,10 +43,22 @@ func (unsupportedContainerManager) GetNodeConfig() NodeConfig {
 	return NodeConfig{}
 }

+func (unsupportedContainerManager) GetMountedSubsystems() *CgroupSubsystems {
+	return &CgroupSubsystems{}
+}
+
+func (unsupportedContainerManager) GetQOSContainersInfo() QOSContainersInfo {
+	return QOSContainersInfo{}
+}
+
 func (cm *unsupportedContainerManager) Status() Status {
 	return Status{}
 }

+func (cm *unsupportedContainerManager) NewPodContainerManager() PodContainerManager {
+	return &unsupportedPodContainerManager{}
+}
+
 func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig) (ContainerManager, error) {
 	return &unsupportedContainerManager{}, nil
 }
--- a/pkg/kubelet/cm/helpers_linux.go
+++ b/pkg/kubelet/cm/helpers_linux.go
@@ -22,37 +22,25 @@ import (
 	libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
 )

-// cgroupSubsystems holds information about the mounted cgroup subsytems
-type cgroupSubsystems struct {
-	// Cgroup subsystem mounts.
-	// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
-	mounts []libcontainercgroups.Mount
-
-	// Cgroup subsystem to their mount location.
-	// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
-	mountPoints map[string]string
-}
-
 // GetCgroupSubsystems returns information about the mounted cgroup subsystems
-func getCgroupSubsystems() (*cgroupSubsystems, error) {
-	// Get all cgroup mounts.
+func GetCgroupSubsystems() (*CgroupSubsystems, error) {
+	// get all cgroup mounts.
 	allCgroups, err := libcontainercgroups.GetCgroupMounts()
 	if err != nil {
-		return &cgroupSubsystems{}, err
+		return &CgroupSubsystems{}, err
 	}
 	if len(allCgroups) == 0 {
-		return &cgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts")
+		return &CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts")
 	}

-	//TODO(@dubstack) should we trim to only the supported ones
 	mountPoints := make(map[string]string, len(allCgroups))
 	for _, mount := range allCgroups {
 		for _, subsystem := range mount.Subsystems {
 			mountPoints[subsystem] = mount.Mountpoint
 		}
 	}
-	return &cgroupSubsystems{
-		mounts:      allCgroups,
-		mountPoints: mountPoints,
+	return &CgroupSubsystems{
+		Mounts:      allCgroups,
+		MountPoints: mountPoints,
 	}, nil
 }
--- a/pkg/kubelet/cm/pod_container_manager_linux.go
+++ b/pkg/kubelet/cm/pod_container_manager_linux.go
@@ -0,0 +1,145 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cm
+
+import (
+	"fmt"
+	"path"
+
+	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/kubelet/qos"
+)
+
+const (
+	podCgroupNamePrefix = "pod#"
+)
+
+// podContainerManagerImpl implements podContainerManager interface.
+// It is the general implementation which allows pod level container
+// management if qos Cgroup is enabled.
+type podContainerManagerImpl struct {
+	// nodeInfo stores information about the node resource capacity
+	nodeInfo *api.Node
+	// qosContainersInfo hold absolute paths of the top level qos containers
+	qosContainersInfo QOSContainersInfo
+	// Stores the mounted cgroup subsystems
+	subsystems *CgroupSubsystems
+	// cgroupManager is the cgroup Manager Object responsible for managing all
+	// pod cgroups.
+	cgroupManager CgroupManager
+}
+
+// Make sure that podContainerManagerImpl implements the PodContainerManager interface
+var _ PodContainerManager = &podContainerManagerImpl{}
+
+// applyLimits sets pod cgroup resource limits
+// It also updates the resource limits on top level qos containers.
+func (m *podContainerManagerImpl) applyLimits(pod *api.Pod) error {
+	// This function will house the logic for setting the resource parameters
+	// on the pod container config and updating top level qos container configs
+	return nil
+}
+
+// Exists checks if the pod's cgroup already exists
+func (m *podContainerManagerImpl) Exists(pod *api.Pod) bool {
+	podContainerName := m.GetPodContainerName(pod)
+	return m.cgroupManager.Exists(podContainerName)
+}
+
+// EnsureExists takes a pod as argument and makes sure that
+// pod cgroup exists if qos cgroup hierarchy flag is enabled.
+// If the pod level container doesen't already exist it is created.
+func (m *podContainerManagerImpl) EnsureExists(pod *api.Pod) error {
+	podContainerName := m.GetPodContainerName(pod)
+	// check if container already exist
+	alreadyExists := m.Exists(pod)
+	if !alreadyExists {
+		// Create the pod container
+		containerConfig := &CgroupConfig{
+			Name:               podContainerName,
+			ResourceParameters: &ResourceConfig{},
+		}
+		if err := m.cgroupManager.Create(containerConfig); err != nil {
+			return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
+		}
+	}
+	// Apply appropriate resource limits on the pod container
+	// Top level qos containers limits are not updated
+	// until we figure how to maintain the desired state in the kubelet.
+	// Because maintaining the desired state is difficult without checkpointing.
+	if err := m.applyLimits(pod); err != nil {
+		return fmt.Errorf("failed to apply resource limits on container for %v : %v", podContainerName, err)
+	}
+	return nil
+}
+
+// GetPodContainerName is a util func takes in a pod as an argument
+// and returns the pod's cgroup name. We follow a pod cgroup naming format
+// which is opaque and deterministic. Given a pod it's cgroup would be named
+// "pod-UID" where the UID is the Pod UID
+func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) string {
+	podQOS := qos.GetPodQOS(pod)
+	// Get the parent QOS container name
+	var parentContainer string
+	switch podQOS {
+	case qos.Guaranteed:
+		parentContainer = m.qosContainersInfo.Guaranteed
+	case qos.Burstable:
+		parentContainer = m.qosContainersInfo.Burstable
+	case qos.BestEffort:
+		parentContainer = m.qosContainersInfo.BestEffort
+	}
+	podContainer := podCgroupNamePrefix + string(pod.UID)
+	// Get the absolute path of the cgroup
+	return path.Join(parentContainer, podContainer)
+}
+
+// Destroy destroys the pod container cgroup paths
+func (m *podContainerManagerImpl) Destroy(podCgroup string) error {
+	// This will house the logic for destroying the pod cgroups.
+	// Will be handled in the next PR.
+	return nil
+}
+
+// podContainerManagerNoop implements podContainerManager interface.
+// It is a no-op implementation and basically does nothing
+// podContainerManagerNoop is used in case the QoS cgroup Hierarchy is not
+// enabled, so Exists() returns true always as the cgroupRoot
+// is expected to always exist.
+type podContainerManagerNoop struct {
+	cgroupRoot string
+}
+
+// Make sure that podContainerManagerStub implements the PodContainerManager interface
+var _ PodContainerManager = &podContainerManagerNoop{}
+
+func (m *podContainerManagerNoop) Exists(_ *api.Pod) bool {
+	return true
+}
+
+func (m *podContainerManagerNoop) EnsureExists(_ *api.Pod) error {
+	return nil
+}
+
+func (m *podContainerManagerNoop) GetPodContainerName(_ *api.Pod) string {
+	return m.cgroupRoot
+}
+
+// Destroy destroys the pod container cgroup paths
+func (m *podContainerManagerNoop) Destroy(_ string) error {
+	return nil
+}
--- a/pkg/kubelet/cm/pod_container_manager_stub.go
+++ b/pkg/kubelet/cm/pod_container_manager_stub.go
@@ -0,0 +1,40 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cm
+
+import "k8s.io/kubernetes/pkg/api"
+
+type podContainerManagerStub struct {
+}
+
+var _ PodContainerManager = &podContainerManagerStub{}
+
+func (m *podContainerManagerStub) Exists(_ *api.Pod) bool {
+	return true
+}
+
+func (m *podContainerManagerStub) EnsureExists(_ *api.Pod) error {
+	return nil
+}
+
+func (m *podContainerManagerStub) GetPodContainerName(_ *api.Pod) string {
+	return ""
+}
+
+func (m *podContainerManagerStub) Destroy(_ string) error {
+	return nil
+}
--- a/pkg/kubelet/cm/pod_container_manager_unsupported.go
+++ b/pkg/kubelet/cm/pod_container_manager_unsupported.go
@@ -0,0 +1,42 @@
+// +build !linux
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cm
+
+import "k8s.io/kubernetes/pkg/api"
+
+type unsupportedPodContainerManager struct {
+}
+
+var _ PodContainerManager = &unsupportedPodContainerManager{}
+
+func (m *unsupportedPodContainerManager) Exists(_ *api.Pod) bool {
+	return true
+}
+
+func (m *unsupportedPodContainerManager) EnsureExists(_ *api.Pod) error {
+	return nil
+}
+
+func (m *unsupportedPodContainerManager) GetPodContainerName(_ *api.Pod) string {
+	return ""
+}
+
+func (m *unsupportedPodContainerManager) Destroy(_ string) error {
+	return nil
+}
--- a/pkg/kubelet/cm/types.go
+++ b/pkg/kubelet/cm/types.go
@@ -16,6 +16,10 @@ limitations under the License.

 package cm

+import (
+	"k8s.io/kubernetes/pkg/api"
+)
+
 // ResourceConfig holds information about all the supported cgroup resource parameters.
 type ResourceConfig struct {
 	// Memory limit (in bytes).
@@ -57,9 +61,27 @@ type CgroupManager interface {
 	Exists(string) bool
 }

-// QOSContainersInfo hold the names of containers per qos
+// QOSContainersInfo stores the names of containers per qos
 type QOSContainersInfo struct {
 	Guaranteed string
 	BestEffort string
 	Burstable  string
 }
+
+// PodContainerManager stores and manages pod level containers
+// The Pod workers interact with the PodContainerManager to create and destroy
+// containers for the pod.
+type PodContainerManager interface {
+	// getPodContainerName returns the pod container's absolute name
+	GetPodContainerName(*api.Pod) string
+
+	// EnsureExists takes a pod as argument and makes sure that
+	// pod cgroup exists if qos cgroup hierarchy flag is enabled.
+	// If the pod cgroup doesen't already exist this method creates it.
+	EnsureExists(*api.Pod) error
+
+	Exists(*api.Pod) bool
+
+	//Destroy takes a pod as argument and destroys the pod's container.
+	Destroy(string) error
+}
--- a/pkg/kubelet/kubelet.go
+++ b/pkg/kubelet/kubelet.go
@@ -934,7 +934,13 @@ func (kl *Kubelet) initializeModules() error {
 	}

 	// Step 5: Start container manager.
-	if err := kl.containerManager.Start(); err != nil {
+	node, err := kl.getNodeAnyWay()
+	if err != nil {
+		glog.Errorf("Cannot get Node info: %v", err)
+		return fmt.Errorf("Kubelet failed to get node info.")
+	}
+
+	if err := kl.containerManager.Start(node); err != nil {
 		return fmt.Errorf("Failed to start ContainerManager %v", err)
 	}