mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
memory manager: specify the container cpuset.memory during the creation
Set the container cpuset.memory during the creation and avoid an additional call to the resources update of the container. Signed-off-by: Artyom Lukianov <alukiano@redhat.com>
This commit is contained in:
parent
f79795d718
commit
95b2777204
@ -46,10 +46,7 @@ func (i *internalContainerLifecycleImpl) PreStartContainer(pod *v1.Pod, containe
|
|||||||
}
|
}
|
||||||
|
|
||||||
if i.memoryManager != nil {
|
if i.memoryManager != nil {
|
||||||
err := i.memoryManager.AddContainer(pod, container, containerID)
|
i.memoryManager.AddContainer(pod, container, containerID)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) {
|
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) {
|
||||||
|
@ -19,6 +19,9 @@ limitations under the License.
|
|||||||
package cm
|
package cm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||||
)
|
)
|
||||||
@ -31,5 +34,16 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if i.memoryManager != nil {
|
||||||
|
numaNodes := i.memoryManager.GetMemoryNUMANodes(pod, container)
|
||||||
|
if numaNodes.Len() > 0 {
|
||||||
|
var affinity []string
|
||||||
|
for _, numaNode := range numaNodes.List() {
|
||||||
|
affinity = append(affinity, strconv.Itoa(numaNode))
|
||||||
|
}
|
||||||
|
containerConfig.Linux.Resources.CpusetMems = strings.Join(affinity, ",")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ package memorymanager
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
||||||
@ -45,8 +46,12 @@ func (m *fakeManager) Allocate(pod *v1.Pod, container *v1.Container) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
|
func (m *fakeManager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
|
||||||
klog.Infof("[fake memorymanager] AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
klog.Infof("[fake memorymanager] AddContainer (pod: %s, container: %s, container id: %s)", pod.Name, container.Name, containerID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *fakeManager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Int {
|
||||||
|
klog.Infof("[fake memorymanager] GetMemoryNUMANodes (pod: %s, container: %s)", pod.Name, container.Name)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,14 +18,13 @@ package memorymanager
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||||
@ -57,10 +56,9 @@ type Manager interface {
|
|||||||
// Start is called during Kubelet initialization.
|
// Start is called during Kubelet initialization.
|
||||||
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error
|
Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error
|
||||||
|
|
||||||
// AddContainer is called between container create and container start
|
// AddContainer adds the mapping between container ID to pod UID and the container name
|
||||||
// so that initial memory affinity settings can be written through to the
|
// The mapping used to remove the memory allocation during the container removal
|
||||||
// container runtime before the first process begins to execute.
|
AddContainer(p *v1.Pod, c *v1.Container, containerID string)
|
||||||
AddContainer(p *v1.Pod, c *v1.Container, containerID string) error
|
|
||||||
|
|
||||||
// Allocate is called to pre-allocate memory resources during Pod admission.
|
// Allocate is called to pre-allocate memory resources during Pod admission.
|
||||||
// This must be called at some point prior to the AddContainer() call for a container, e.g. at pod admission time.
|
// This must be called at some point prior to the AddContainer() call for a container, e.g. at pod admission time.
|
||||||
@ -82,6 +80,9 @@ type Manager interface {
|
|||||||
// and is consulted to achieve NUMA aware resource alignment among this
|
// and is consulted to achieve NUMA aware resource alignment among this
|
||||||
// and other resource controllers.
|
// and other resource controllers.
|
||||||
GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint
|
GetPodTopologyHints(*v1.Pod) map[string][]topologymanager.TopologyHint
|
||||||
|
|
||||||
|
// GetMemoryNUMANodes provides NUMA nodes that are used to allocate the container memory
|
||||||
|
GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Int
|
||||||
}
|
}
|
||||||
|
|
||||||
type manager struct {
|
type manager struct {
|
||||||
@ -176,38 +177,31 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
// AddContainer saves the value of requested memory for the guaranteed pod under the state and set memory affinity according to the topolgy manager
|
// AddContainer saves the value of requested memory for the guaranteed pod under the state and set memory affinity according to the topolgy manager
|
||||||
func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) error {
|
func (m *manager) AddContainer(pod *v1.Pod, container *v1.Container, containerID string) {
|
||||||
m.Lock()
|
m.Lock()
|
||||||
m.containerMap.Add(string(pod.UID), container.Name, containerID)
|
defer m.Unlock()
|
||||||
m.Unlock()
|
|
||||||
|
|
||||||
|
m.containerMap.Add(string(pod.UID), container.Name, containerID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetMemory provides NUMA nodes that used to allocate the container memory
|
||||||
|
func (m *manager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.Int {
|
||||||
// Get NUMA node affinity of blocks assigned to the container during Allocate()
|
// Get NUMA node affinity of blocks assigned to the container during Allocate()
|
||||||
var nodes []string
|
numaNodes := sets.NewInt()
|
||||||
for _, block := range m.state.GetMemoryBlocks(string(pod.UID), container.Name) {
|
for _, block := range m.state.GetMemoryBlocks(string(pod.UID), container.Name) {
|
||||||
for _, nodeID := range block.NUMAAffinity {
|
for _, nodeID := range block.NUMAAffinity {
|
||||||
nodes = append(nodes, strconv.Itoa(nodeID))
|
// avoid nodes duplication when hugepages and memory blocks pinned to the same NUMA node
|
||||||
|
numaNodes.Insert(nodeID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(nodes) < 1 {
|
if numaNodes.Len() == 0 {
|
||||||
klog.V(5).Infof("[memorymanager] update container resources is skipped due to memory blocks are empty")
|
klog.V(5).Infof("No allocation is available for (Pod: %s, Container: %s)", pod.Name, container.Name)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
affinity := strings.Join(nodes, ",")
|
klog.Infof("(Pod: %s, Container: %s) memory affinity is %v", pod.Name, container.Name, numaNodes)
|
||||||
klog.Infof("[memorymanager] Set container %q cpuset.mems to %q", containerID, affinity)
|
return numaNodes
|
||||||
err := m.containerRuntime.UpdateContainerResources(containerID, &runtimeapi.LinuxContainerResources{CpusetMems: affinity})
|
|
||||||
if err != nil {
|
|
||||||
klog.Errorf("[memorymanager] AddContainer error: error updating cpuset.mems for container (pod: %s, container: %s, container id: %s, err: %v)", pod.Name, container.Name, containerID, err)
|
|
||||||
|
|
||||||
m.Lock()
|
|
||||||
err = m.policyRemoveContainerByRef(string(pod.UID), container.Name)
|
|
||||||
if err != nil {
|
|
||||||
klog.Errorf("[memorymanager] AddContainer rollback state error: %v", err)
|
|
||||||
}
|
|
||||||
m.Unlock()
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate is called to pre-allocate memory resources during Pod admission.
|
// Allocate is called to pre-allocate memory resources during Pod admission.
|
||||||
|
@ -1122,102 +1122,6 @@ func TestAddContainer(t *testing.T) {
|
|||||||
assignments: state.ContainerMemoryAssignments{},
|
assignments: state.ContainerMemoryAssignments{},
|
||||||
activePods: nil,
|
activePods: nil,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
description: "Adding container should fail (CRI error) but without an error",
|
|
||||||
updateError: fmt.Errorf("fake reg error"),
|
|
||||||
policyName: policyTypeStatic,
|
|
||||||
machineInfo: machineInfo,
|
|
||||||
reserved: reserved,
|
|
||||||
machineState: state.NUMANodeMap{
|
|
||||||
0: &state.NUMANodeState{
|
|
||||||
Cells: []int{0},
|
|
||||||
NumberOfAssignments: 0,
|
|
||||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
|
||||||
v1.ResourceMemory: {
|
|
||||||
Allocatable: 9 * gb,
|
|
||||||
Free: 9 * gb,
|
|
||||||
Reserved: 0 * gb,
|
|
||||||
SystemReserved: 1 * gb,
|
|
||||||
TotalMemSize: 10 * gb,
|
|
||||||
},
|
|
||||||
hugepages1Gi: {
|
|
||||||
Allocatable: 5 * gb,
|
|
||||||
Free: 5 * gb,
|
|
||||||
Reserved: 0,
|
|
||||||
SystemReserved: 0,
|
|
||||||
TotalMemSize: 5 * gb,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
1: &state.NUMANodeState{
|
|
||||||
Cells: []int{1},
|
|
||||||
NumberOfAssignments: 0,
|
|
||||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
|
||||||
v1.ResourceMemory: {
|
|
||||||
Allocatable: 9 * gb,
|
|
||||||
Free: 9 * gb,
|
|
||||||
Reserved: 0 * gb,
|
|
||||||
SystemReserved: 1 * gb,
|
|
||||||
TotalMemSize: 10 * gb,
|
|
||||||
},
|
|
||||||
hugepages1Gi: {
|
|
||||||
Allocatable: 5 * gb,
|
|
||||||
Free: 5 * gb,
|
|
||||||
Reserved: 0,
|
|
||||||
SystemReserved: 0,
|
|
||||||
TotalMemSize: 5 * gb,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedMachineState: state.NUMANodeMap{
|
|
||||||
0: &state.NUMANodeState{
|
|
||||||
Cells: []int{0},
|
|
||||||
NumberOfAssignments: 0,
|
|
||||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
|
||||||
v1.ResourceMemory: {
|
|
||||||
Allocatable: 9 * gb,
|
|
||||||
Free: 9 * gb,
|
|
||||||
Reserved: 0 * gb,
|
|
||||||
SystemReserved: 1 * gb,
|
|
||||||
TotalMemSize: 10 * gb,
|
|
||||||
},
|
|
||||||
hugepages1Gi: {
|
|
||||||
Allocatable: 5 * gb,
|
|
||||||
Free: 5 * gb,
|
|
||||||
Reserved: 0,
|
|
||||||
SystemReserved: 0,
|
|
||||||
TotalMemSize: 5 * gb,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
1: &state.NUMANodeState{
|
|
||||||
Cells: []int{1},
|
|
||||||
NumberOfAssignments: 0,
|
|
||||||
MemoryMap: map[v1.ResourceName]*state.MemoryTable{
|
|
||||||
v1.ResourceMemory: {
|
|
||||||
Allocatable: 9 * gb,
|
|
||||||
Free: 9 * gb,
|
|
||||||
Reserved: 0 * gb,
|
|
||||||
SystemReserved: 1 * gb,
|
|
||||||
TotalMemSize: 10 * gb,
|
|
||||||
},
|
|
||||||
hugepages1Gi: {
|
|
||||||
Allocatable: 5 * gb,
|
|
||||||
Free: 5 * gb,
|
|
||||||
Reserved: 0,
|
|
||||||
SystemReserved: 0,
|
|
||||||
TotalMemSize: 5 * gb,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedAllocateError: nil,
|
|
||||||
expectedAddContainerError: nil,
|
|
||||||
podAllocate: pod,
|
|
||||||
assignments: state.ContainerMemoryAssignments{},
|
|
||||||
activePods: nil,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
description: "Correct allocation of container requiring amount of memory higher than capacity of one NUMA node",
|
description: "Correct allocation of container requiring amount of memory higher than capacity of one NUMA node",
|
||||||
policyName: policyTypeStatic,
|
policyName: policyTypeStatic,
|
||||||
@ -1487,7 +1391,8 @@ func TestAddContainer(t *testing.T) {
|
|||||||
t.Errorf("Memory Manager Allocate() error (%v), expected error: %v, but got: %v",
|
t.Errorf("Memory Manager Allocate() error (%v), expected error: %v, but got: %v",
|
||||||
testCase.description, testCase.expectedAllocateError, err)
|
testCase.description, testCase.expectedAllocateError, err)
|
||||||
}
|
}
|
||||||
err = mgr.AddContainer(pod, container, "fakeID")
|
mgr.AddContainer(pod, container, "fakeID")
|
||||||
|
_, _, err = mgr.containerMap.GetContainerRef("fakeID")
|
||||||
if !reflect.DeepEqual(err, testCase.expectedAddContainerError) {
|
if !reflect.DeepEqual(err, testCase.expectedAddContainerError) {
|
||||||
t.Errorf("Memory Manager AddContainer() error (%v), expected error: %v, but got: %v",
|
t.Errorf("Memory Manager AddContainer() error (%v), expected error: %v, but got: %v",
|
||||||
testCase.description, testCase.expectedAddContainerError, err)
|
testCase.description, testCase.expectedAddContainerError, err)
|
||||||
|
Loading…
Reference in New Issue
Block a user