kubelet: enable qos-level memory request reservation

This commit is contained in:
Seth Jennings
2017-02-28 15:03:06 -06:00
parent 01bfbb5fa0
commit cc50aa9dfb
19 changed files with 353 additions and 17 deletions

View File

@@ -25,7 +25,6 @@ import (
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/qos"
)
@@ -37,19 +36,21 @@ const (
)
type QOSContainerManager interface {
Start(*v1.Node, ActivePodsFunc) error
Start(func() v1.ResourceList, ActivePodsFunc) error
GetQOSContainersInfo() QOSContainersInfo
UpdateCgroups() error
}
type qosContainerManagerImpl struct {
sync.Mutex
nodeInfo *v1.Node
qosContainersInfo QOSContainersInfo
subsystems *CgroupSubsystems
cgroupManager CgroupManager
activePods ActivePodsFunc
cgroupRoot string
nodeInfo *v1.Node
qosContainersInfo QOSContainersInfo
subsystems *CgroupSubsystems
cgroupManager CgroupManager
activePods ActivePodsFunc
getNodeAllocatable func() v1.ResourceList
cgroupRoot string
qosReserved map[v1.ResourceName]int64
}
func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot string, nodeConfig NodeConfig) (QOSContainerManager, error) {
@@ -63,6 +64,7 @@ func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot string, nod
subsystems: subsystems,
cgroupManager: NewCgroupManager(subsystems, nodeConfig.CgroupDriver),
cgroupRoot: cgroupRoot,
qosReserved: nodeConfig.ExperimentalQOSReserved,
}, nil
}
@@ -70,7 +72,7 @@ func (m *qosContainerManagerImpl) GetQOSContainersInfo() QOSContainersInfo {
return m.qosContainersInfo
}
func (m *qosContainerManagerImpl) Start(nodeInfo *v1.Node, activePods ActivePodsFunc) error {
func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceList, activePods ActivePodsFunc) error {
cm := m.cgroupManager
rootContainer := m.cgroupRoot
if !cm.Exists(CgroupName(rootContainer)) {
@@ -115,7 +117,7 @@ func (m *qosContainerManagerImpl) Start(nodeInfo *v1.Node, activePods ActivePods
Burstable: path.Join(rootContainer, string(v1.PodQOSBurstable)),
BestEffort: path.Join(rootContainer, string(v1.PodQOSBestEffort)),
}
m.nodeInfo = nodeInfo
m.getNodeAllocatable = getNodeAllocatable
m.activePods = activePods
// update qos cgroup tiers on startup and in periodic intervals
@@ -162,6 +164,85 @@ func (m *qosContainerManagerImpl) setCPUCgroupConfig(configs map[v1.PodQOSClass]
return nil
}
// setMemoryReserve sums the memory limits of all pods in a QOS class,
// calculates QOS class memory limits, and set those limits in the
// CgroupConfig for each QOS class.
func (m *qosContainerManagerImpl) setMemoryReserve(configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
qosMemoryRequests := map[v1.PodQOSClass]int64{
v1.PodQOSGuaranteed: 0,
v1.PodQOSBurstable: 0,
}
// Sum the pod limits for pods in each QOS class
pods := m.activePods()
for _, pod := range pods {
podMemoryRequest := int64(0)
qosClass := qos.GetPodQOS(pod)
if qosClass == v1.PodQOSBestEffort {
// limits are not set for Best Effort pods
continue
}
req, _, err := v1.PodRequestsAndLimits(pod)
if err != nil {
glog.V(2).Infof("[Container Manager] Pod resource requests/limits could not be determined. Not setting QOS memory limts.")
return
}
if request, found := req[v1.ResourceMemory]; found {
podMemoryRequest += request.Value()
}
qosMemoryRequests[qosClass] += podMemoryRequest
}
resources := m.getNodeAllocatable()
allocatableResource, ok := resources[v1.ResourceMemory]
if !ok {
glog.V(2).Infof("[Container Manager] Allocatable memory value could not be determined. Not setting QOS memory limts.")
return
}
allocatable := allocatableResource.Value()
if allocatable == 0 {
glog.V(2).Infof("[Container Manager] Memory allocatable reported as 0, might be in standalone mode. Not setting QOS memory limts.")
return
}
for qos, limits := range qosMemoryRequests {
glog.V(2).Infof("[Container Manager] %s pod requests total %d bytes (reserve %d%%)", qos, limits, percentReserve)
}
// Calculate QOS memory limits
burstableLimit := allocatable - (qosMemoryRequests[v1.PodQOSGuaranteed] * percentReserve / 100)
bestEffortLimit := burstableLimit - (qosMemoryRequests[v1.PodQOSBurstable] * percentReserve / 100)
configs[v1.PodQOSBurstable].ResourceParameters.Memory = &burstableLimit
configs[v1.PodQOSBestEffort].ResourceParameters.Memory = &bestEffortLimit
}
// retrySetMemoryReserve checks for any QoS cgroups over the limit
// that was attempted to be set in the first Update() and adjusts
// their memory limit to the usage to prevent further growth.
func (m *qosContainerManagerImpl) retrySetMemoryReserve(configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
// Unreclaimable memory usage may already exceeded the desired limit
// Attempt to set the limit near the current usage to put pressure
// on the cgroup and prevent further growth.
for qos, config := range configs {
stats, err := m.cgroupManager.GetResourceStats(config.Name)
if err != nil {
glog.V(2).Infof("[Container Manager] %v", err)
return
}
usage := stats.MemoryStats.Usage
// Because there is no good way to determine of the original Update()
// on the memory resource was successful, we determine failure of the
// first attempt by checking if the usage is above the limit we attempt
// to set. If it is, we assume the first attempt to set the limit failed
// and try again setting the limit to the usage. Otherwise we leave
// the CgroupConfig as is.
if configs[qos].ResourceParameters.Memory != nil && usage > *configs[qos].ResourceParameters.Memory {
configs[qos].ResourceParameters.Memory = &usage
}
}
}
func (m *qosContainerManagerImpl) UpdateCgroups() error {
m.Lock()
defer m.Unlock()
@@ -182,6 +263,34 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
return err
}
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.setMemoryReserve(qosConfigs, percentReserve)
}
}
updateSuccess := true
for _, config := range qosConfigs {
err := m.cgroupManager.Update(config)
if err != nil {
updateSuccess = false
}
}
if updateSuccess {
glog.V(2).Infof("[ContainerManager]: Updated QoS cgroup configuration")
return nil
}
// If the resource can adjust the ResourceConfig to increase likelihood of
// success, call the adjustment function here. Otherwise, the Update() will
// be called again with the same values.
for resource, percentReserve := range m.qosReserved {
switch resource {
case v1.ResourceMemory:
m.retrySetMemoryReserve(qosConfigs, percentReserve)
}
}
for _, config := range qosConfigs {
err := m.cgroupManager.Update(config)
if err != nil {
@@ -189,8 +298,8 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
return err
}
}
glog.V(2).Infof("[ContainerManager]: Updated QoS cgroup configuration")
glog.V(2).Infof("[ContainerManager]: Updated QoS cgroup configuration on retry")
return nil
}
@@ -204,7 +313,7 @@ func (m *qosContainerManagerNoop) GetQOSContainersInfo() QOSContainersInfo {
return QOSContainersInfo{}
}
func (m *qosContainerManagerNoop) Start(_ *v1.Node, _ ActivePodsFunc) error {
func (m *qosContainerManagerNoop) Start(_ func() v1.ResourceList, _ ActivePodsFunc) error {
return nil
}