mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 14:37:00 +00:00
Proactively remove init Containers in CPUManager static policy
This patch fixes a bug in the CPUManager, whereby it doesn't honor the "effective requests/limits" of a Pod as defined by: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resources The rule states that a Pod’s "effective request/limit" for a resource should be the larger of: * The highest of any particular resource request or limit defined on all init Containers * The sum of all app Containers request/limit for a resource Moreover, the rule states that: * The effective QoS tier is the same for init Containers and app containers alike This means that the resource requests of init Containers and app Containers should be able to overlap, such that the larger of the two becomes the "effective resource request/limit" for the Pod. Likewise, if a QoS tier of "Guaranteed" is determined for the Pod, then both init Containers and app Containers should run in this tier. In its current implementation, the CPU manager honors the effective QoS tier for both init and app containers, but doesn't honor the "effective request/limit" correctly. Instead, it treats the "effective request/limit" as: * The sum of all init Containers plus the sum of all app Containers request/limit for a resource It does this by not proactively removing the CPUs given to previous init containers when new containers are being created. In the worst case, this causes the CPUManager to give non-overlapping CPUs to all containers (whether init or app) in the "Guaranteed" QoS tier before any of the containers in the Pod actually start. This effectively blocks these Pods from running if the total number of CPUs being requested across init and app Containers goes beyond the limits of the system. This patch fixes this problem by updating the CPUManager static policy so that it proactively removes any guaranteed CPUs it has granted to init Containers before allocating CPUs to app containers. Since all init container are run sequentially, it also makes sure this proactive removal happens for previous init containers when allocating CPUs to later ones.
This commit is contained in:
parent
78915a1094
commit
c6d9bbcb74
@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"container_map.go",
|
||||
"cpu_assignment.go",
|
||||
"cpu_manager.go",
|
||||
"fake_cpu_manager.go",
|
||||
|
68
pkg/kubelet/cm/cpumanager/container_map.go
Normal file
68
pkg/kubelet/cm/cpumanager/container_map.go
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// containerMap maps (podUID, containerName) -> containerID
|
||||
type containerMap map[string]map[string]string
|
||||
|
||||
func newContainerMap() containerMap {
|
||||
return make(containerMap)
|
||||
}
|
||||
|
||||
func (cm containerMap) Add(p *v1.Pod, c *v1.Container, containerID string) {
|
||||
podUID := string(p.UID)
|
||||
if _, exists := cm[podUID]; !exists {
|
||||
cm[podUID] = make(map[string]string)
|
||||
}
|
||||
cm[podUID][c.Name] = containerID
|
||||
}
|
||||
|
||||
func (cm containerMap) Remove(containerID string) {
|
||||
found := false
|
||||
for podUID := range cm {
|
||||
for containerName := range cm[podUID] {
|
||||
if containerID == cm[podUID][containerName] {
|
||||
delete(cm[podUID], containerName)
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if len(cm[podUID]) == 0 {
|
||||
delete(cm, podUID)
|
||||
}
|
||||
if found {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (cm containerMap) Get(p *v1.Pod, c *v1.Container) (string, error) {
|
||||
podUID := string(p.UID)
|
||||
if _, exists := cm[podUID]; !exists {
|
||||
return "", fmt.Errorf("pod %s not in containerMap", podUID)
|
||||
}
|
||||
if _, exists := cm[podUID][c.Name]; !exists {
|
||||
return "", fmt.Errorf("container %s not in containerMap for pod %s", c.Name, podUID)
|
||||
}
|
||||
return cm[podUID][c.Name], nil
|
||||
}
|
@ -73,6 +73,10 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
// containerMap provides a mapping from
|
||||
// (pod, container) -> containerID
|
||||
// for all containers a pod
|
||||
containerMap containerMap
|
||||
}
|
||||
|
||||
// Ensure staticPolicy implements Policy interface
|
||||
@ -97,8 +101,9 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int) Policy
|
||||
klog.Infof("[cpumanager] reserved %d CPUs (\"%s\") not available for exclusive assignment", reserved.Size(), reserved)
|
||||
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
containerMap: newContainerMap(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -172,7 +177,15 @@ func (p *staticPolicy) assignableCPUs(s state.State) cpuset.CPUSet {
|
||||
return s.GetDefaultCPUSet().Difference(p.reserved)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error {
|
||||
func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) (rerr error) {
|
||||
// So long as this function does not return an error,
|
||||
// add (pod, container, containerID) to the containerMap.
|
||||
defer func() {
|
||||
if rerr == nil {
|
||||
p.containerMap.Add(pod, container, containerID)
|
||||
}
|
||||
}()
|
||||
|
||||
if numCPUs := guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||
klog.Infof("[cpumanager] static policy: AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||
// container belongs in an exclusively allocated pool
|
||||
@ -182,6 +195,22 @@ func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Co
|
||||
return nil
|
||||
}
|
||||
|
||||
// Proactively remove CPUs from init containers that have already run.
|
||||
// They are guaranteed to have run to completion before any other
|
||||
// container is run.
|
||||
for _, initContainer := range pod.Spec.InitContainers {
|
||||
if container.Name != initContainer.Name {
|
||||
initContainerID, err := p.containerMap.Get(pod, &initContainer)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
err = p.RemoveContainer(s, initContainerID)
|
||||
if err != nil {
|
||||
klog.Warningf("[cpumanager] unable to remove init container (container id: %s, error: %v)", initContainerID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cpuset, err := p.allocateCPUs(s, numCPUs)
|
||||
if err != nil {
|
||||
klog.Errorf("[cpumanager] unable to allocate %d CPUs (container id: %s, error: %v)", numCPUs, containerID, err)
|
||||
@ -193,7 +222,15 @@ func (p *staticPolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Co
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *staticPolicy) RemoveContainer(s state.State, containerID string) error {
|
||||
func (p *staticPolicy) RemoveContainer(s state.State, containerID string) (rerr error) {
|
||||
// So long as this function does not return an error,
|
||||
// remove containerID from the containerMap.
|
||||
defer func() {
|
||||
if rerr == nil {
|
||||
p.containerMap.Remove(containerID)
|
||||
}
|
||||
}()
|
||||
|
||||
klog.Infof("[cpumanager] static policy: RemoveContainer (container id: %s)", containerID)
|
||||
if toRelease, ok := s.GetCPUSet(containerID); ok {
|
||||
s.Delete(containerID)
|
||||
|
Loading…
Reference in New Issue
Block a user