memory manager: specify the container cpuset.memory during the creation

Set the container cpuset.memory during the creation and avoid an additional
call to the resources update of the container.

Signed-off-by: Artyom Lukianov <alukiano@redhat.com>
This commit is contained in:
Artyom Lukianov 2021-02-09 19:00:58 +02:00
parent f79795d718
commit 95b2777204
5 changed files with 44 additions and 129 deletions

View File

@ -46,10 +46,7 @@ func (i *internalContainerLifecycleImpl) PreStartContainer(pod *v1.Pod, containe
}
if i.memoryManager != nil {
err := i.memoryManager.AddContainer(pod, container, containerID)
if err != nil {
return err
}
i.memoryManager.AddContainer(pod, container, containerID)
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) {

View File

@ -19,6 +19,9 @@ limitations under the License.
package cm
import (
"strconv"
"strings"
"k8s.io/api/core/v1"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
@ -31,5 +34,16 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
}
}
if i.memoryManager != nil {
numaNodes := i.memoryManager.GetMemoryNUMANodes(pod, container)
if numaNodes.Len() > 0 {
var affinity []string
for _, numaNode := range numaNodes.List() {
affinity = append(affinity, strconv.Itoa(numaNode))
}
containerConfig.Linux.Resources.CpusetMems = strings.Join(affinity, ",")
}
}
return nil
}

View File

@ -18,6 +18,7 @@ package memorymanager
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
@ -45,8 +46,12 @@ func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error {
return nil
}
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
klog.Infof("[fake memorymanager] AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
}
func (m *fakeManager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Int {
klog.Infof("[fake memorymanager] GetMemoryNUMANodes (pod: %s, container: %s)", pod.Name, container.Name)
return nil
}

View File

@ -18,14 +18,13 @@ package memorymanager
import (
"fmt"
"strconv"
"strings"
"sync"
cadvisorapi "github.com/google/cadvisor/info/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/klog/v2"
corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
@ -57,10 +56,9 @@ type Manager interface {
// Start is called during Kubelet initialization.
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error
// AddContainer is called between container create and container start
// so that initial memory affinity settings can be written through to the
// container runtime before the first process begins to execute.
AddContainer(p *v1.Pod, c *v1.Container, containerID string) error
// AddContainer adds the mapping between container ID to pod UID and the container name
// The mapping used to remove the memory allocation during the container removal
AddContainer(p *v1.Pod, c *v1.Container, containerID string)
// Allocate is called to pre-allocate memory resources during Pod admission.
// This must be called at some point prior to the AddContainer() call for a container, e.g. at pod admission time.
@ -82,6 +80,9 @@ type Manager interface {
// and is consulted to achieve NUMA aware resource alignment among this
// and other resource controllers.
GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint
// GetMemoryNUMANodes provides NUMA nodes that are used to allocate the container memory
GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Int
}
type manager struct {
@ -176,38 +177,31 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
}
// AddContainer saves the value of requested memory for the guaranteed pod under the state and set memory affinity according to the topolgy manager
func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
m.Lock()
m.containerMap.Add(string(pod.UID), container.Name, containerID)
m.Unlock()
defer m.Unlock()
m.containerMap.Add(string(pod.UID), container.Name, containerID)
}
// GetMemory provides NUMA nodes that used to allocate the container memory
func (m *manager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Int {
// Get NUMA node affinity of blocks assigned to the container during Allocate()
var nodes []string
numaNodes := sets.NewInt()
for _, block := range m.state.GetMemoryBlocks(string(pod.UID), container.Name) {
for _, nodeID := range block.NUMAAffinity {
nodes = append(nodes, strconv.Itoa(nodeID))
// avoid nodes duplication when hugepages and memory blocks pinned to the same NUMA node
numaNodes.Insert(nodeID)
}
}
if len(nodes) < 1 {
klog.V(5).Infof("[memorymanager] update container resources is skipped due to memory blocks are empty")
if numaNodes.Len() == 0 {
klog.V(5).Infof("No allocation is available for (Pod: %s, Container: %s)", pod.Name, container.Name)
return nil
}
affinity := strings.Join(nodes, ",")
klog.Infof("[memorymanager] Set container %q cpuset.mems to %q", containerID, affinity)
err := m.containerRuntime.UpdateContainerResources(containerID, &runtimeapi.LinuxContainerResources{CpusetMems: affinity})
if err != nil {
klog.Errorf("[memorymanager] AddContainer error: error updating cpuset.mems for container (pod: %s, container: %s, container id: %s, err: %v)", pod.Name, container.Name, containerID, err)
m.Lock()
err = m.policyRemoveContainerByRef(string(pod.UID), container.Name)
if err != nil {
klog.Errorf("[memorymanager] AddContainer rollback state error: %v", err)
}
m.Unlock()
}
return err
klog.Infof("(Pod: %s, Container: %s) memory affinity is %v", pod.Name, container.Name, numaNodes)
return numaNodes
}
// Allocate is called to pre-allocate memory resources during Pod admission.

View File

@ -1122,102 +1122,6 @@ func TestAddContainer(t *testing.T) {
assignments: state.ContainerMemoryAssignments{},
activePods: nil,
},
{
description: "Adding container should fail (CRI error) but without an error",
updateError: fmt.Errorf("fake reg error"),
policyName: policyTypeStatic,
machineInfo: machineInfo,
reserved: reserved,
machineState: state.NUMANodeMap{
0: &state.NUMANodeState{
Cells: []int{0},
NumberOfAssignments: 0,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 9 * gb,
Reserved: 0 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 5 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 5 * gb,
},
},
},
1: &state.NUMANodeState{
Cells: []int{1},
NumberOfAssignments: 0,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 9 * gb,
Reserved: 0 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 5 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 5 * gb,
},
},
},
},
expectedMachineState: state.NUMANodeMap{
0: &state.NUMANodeState{
Cells: []int{0},
NumberOfAssignments: 0,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 9 * gb,
Reserved: 0 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 5 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 5 * gb,
},
},
},
1: &state.NUMANodeState{
Cells: []int{1},
NumberOfAssignments: 0,
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
v1.ResourceMemory: {
Allocatable: 9 * gb,
Free: 9 * gb,
Reserved: 0 * gb,
SystemReserved: 1 * gb,
TotalMemSize: 10 * gb,
},
hugepages1Gi: {
Allocatable: 5 * gb,
Free: 5 * gb,
Reserved: 0,
SystemReserved: 0,
TotalMemSize: 5 * gb,
},
},
},
},
expectedAllocateError: nil,
expectedAddContainerError: nil,
podAllocate: pod,
assignments: state.ContainerMemoryAssignments{},
activePods: nil,
},
{
description: "Correct allocation of container requiring amount of memory higher than capacity of one NUMA node",
policyName: policyTypeStatic,
@ -1487,7 +1391,8 @@ func TestAddContainer(t *testing.T) {
t.Errorf("Memory Manager Allocate() error (%v), expected error: %v, but got: %v",
testCase.description, testCase.expectedAllocateError, err)
}
err = mgr.AddContainer(pod, container, "fakeID")
mgr.AddContainer(pod, container, "fakeID")
_, _, err = mgr.containerMap.GetContainerRef("fakeID")
if !reflect.DeepEqual(err, testCase.expectedAddContainerError) {
t.Errorf("Memory Manager AddContainer() error (%v), expected error: %v, but got: %v",
testCase.description, testCase.expectedAddContainerError, err)