From 4a6410291804306e6409da1736acb92edd24c3a2 Mon Sep 17 00:00:00 2001 From: Cezary Zukowski Date: Thu, 23 Apr 2020 17:56:47 +0200 Subject: [PATCH] memory manager: validate reserved-memory against Node Allocatable Reserved memory of all kinds (and over all NUMA nodes) must be equal to the values determined by Node Allocatable feature. Signed-off-by: Cezary Zukowski --- pkg/features/kube_features.go | 1 + .../cm/memorymanager/memory_manager.go | 105 +++++++--- .../cm/memorymanager/memory_manager_test.go | 181 ++++++++++++++++++ ...acket_separated_slice_map_string_string.go | 17 +- ..._separated_slice_map_string_string_test.go | 10 +- 5 files changed, 269 insertions(+), 45 deletions(-) create mode 100644 pkg/kubelet/cm/memorymanager/memory_manager_test.go diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index f1a47fbdb8c..0801411b39c 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -703,6 +703,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS ExpandInUsePersistentVolumes: {Default: true, PreRelease: featuregate.Beta}, ExpandCSIVolumes: {Default: true, PreRelease: featuregate.Beta}, CPUManager: {Default: true, PreRelease: featuregate.Beta}, + MemoryManager: {Default: false, PreRelease: featuregate.Alpha}, CPUCFSQuotaPeriod: {Default: false, PreRelease: featuregate.Alpha}, TopologyManager: {Default: true, PreRelease: featuregate.Beta}, ServiceNodeExclusion: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.22 diff --git a/pkg/kubelet/cm/memorymanager/memory_manager.go b/pkg/kubelet/cm/memorymanager/memory_manager.go index b298ca384d9..59d19e618c3 100644 --- a/pkg/kubelet/cm/memorymanager/memory_manager.go +++ b/pkg/kubelet/cm/memorymanager/memory_manager.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" "k8s.io/klog/v2" + corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" @@ -101,8 +102,6 @@ type manager struct { // for all containers a pod containerMap containermap.ContainerMap - nodeAllocatableReservation v1.ResourceList - // sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness. // We use it to determine when we can purge inactive pods from checkpointed state. sourcesReady config.SourcesReady @@ -123,12 +122,12 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll policy = NewPolicyNone() case policyTypeStatic: - reserved, err := getReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory) + systemReserved, err := getSystemReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory) if err != nil { return nil, err } - policy, err = NewPolicyStatic(machineInfo, reserved, affinity) + policy, err = NewPolicyStatic(machineInfo, systemReserved, affinity) if err != nil { return nil, err } @@ -138,9 +137,8 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll } manager := &manager{ - policy: policy, - nodeAllocatableReservation: nodeAllocatableReservation, - stateFileDirectory: stateFileDirectory, + policy: policy, + stateFileDirectory: stateFileDirectory, } manager.sourcesReady = &sourcesReadyStub{} return manager, nil @@ -309,35 +307,86 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string return err } +func getTotalMemoryTypeReserved(preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity { + totalMemoryType := map[v1.ResourceName]resource.Quantity{} + + for _, node := range preReservedMemory { + for memType, memVal := range node { + if totalMem, exists := totalMemoryType[memType]; exists { + memVal.Add(totalMem) + } + totalMemoryType[memType] = memVal + } + } + + return totalMemoryType +} + func validateReservedMemory(nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error { - // TODO: this will check equality of total reserved memory by node allocatable feature and total pre-reserved memory + totalMemoryType := getTotalMemoryTypeReserved(reservedMemory) + + commonMemoryTypeSet := make(map[v1.ResourceName]bool) + for resourceType := range totalMemoryType { + if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) { + continue + } + commonMemoryTypeSet[resourceType] = true + } + for resourceType := range nodeAllocatableReservation { + if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) { + continue + } + commonMemoryTypeSet[resourceType] = true + } + + for resourceType := range commonMemoryTypeSet { + nodeAllocatableMemory := resource.NewQuantity(0, resource.DecimalSI) + if memValue, set := nodeAllocatableReservation[resourceType]; set { + nodeAllocatableMemory.Add(memValue) + } + + reservedMemory := resource.NewQuantity(0, resource.DecimalSI) + if memValue, set := totalMemoryType[resourceType]; set { + reservedMemory.Add(memValue) + } + + if !(*nodeAllocatableMemory).Equal(*reservedMemory) { + return fmt.Errorf("the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature", resourceType) + } + } return nil } -func getReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { - // TODO: we should add new kubelet parameter, and to get reserved memory per NUMA node from it - // currently we use kube-reserved + system-reserved + eviction reserve for each NUMA node, that creates memory over-consumption - // and no reservation for huge pages +func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { + preReservedMemoryConverted := make(map[int]map[v1.ResourceName]uint64) + for _, node := range machineInfo.Topology { + preReservedMemoryConverted[node.Id] = make(map[v1.ResourceName]uint64) + } - if err := validateReservedMemory(nodeAllocatableReservation, reservedMemory); err != nil { + for numaIndex := range reservedMemory { + for memoryType := range reservedMemory[numaIndex] { + tmp := reservedMemory[numaIndex][memoryType] + if val, success := tmp.AsInt64(); success { + preReservedMemoryConverted[numaIndex][memoryType] = uint64(val) + } else { + return nil, fmt.Errorf("could not covert a variable of type Quantity to int64") + } + } + } + + return preReservedMemoryConverted, nil +} + +func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) { + if err := validateReservedMemory(nodeAllocatableReservation, preReservedMemory); err != nil { return nil, err } - reserved := systemReservedMemory{} - for _, node := range machineInfo.Topology { - memory := nodeAllocatableReservation[v1.ResourceMemory] - if memory.IsZero() { - break - } - value, succeeded := memory.AsInt64() - if !succeeded { - return nil, fmt.Errorf("failed to represent reserved memory as int64") - } - - reserved[node.Id] = map[v1.ResourceName]uint64{ - v1.ResourceMemory: uint64(value), - } + reservedMemoryConverted, err := convertReserved(machineInfo, preReservedMemory) + if err != nil { + return nil, err } - return reserved, nil + + return reservedMemoryConverted, nil } diff --git a/pkg/kubelet/cm/memorymanager/memory_manager_test.go b/pkg/kubelet/cm/memorymanager/memory_manager_test.go new file mode 100644 index 00000000000..ae6ec42e6ad --- /dev/null +++ b/pkg/kubelet/cm/memorymanager/memory_manager_test.go @@ -0,0 +1,181 @@ +package memorymanager + +import ( + "fmt" + "reflect" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + info "github.com/google/cadvisor/info/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +const ( + hugepages2M = "hugepages-2Mi" + hugepages1G = "hugepages-1Gi" +) + +type nodeResources map[v1.ResourceName]resource.Quantity + +// validateReservedMemory +func TestValidatePreReservedMemory(t *testing.T) { + const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature" + testCases := []struct { + description string + nodeAllocatableReservation v1.ResourceList + preReservedMemory map[int]map[v1.ResourceName]resource.Quantity + expectedError string + }{ + { + "Node Allocatable not set, pre-reserved not set", + v1.ResourceList{}, + map[int]map[v1.ResourceName]resource.Quantity{}, + "", + }, + { + "Node Allocatable set to zero, pre-reserved set to zero", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)}, + }, + "", + }, + { + "Node Allocatable not set (equal zero), pre-reserved set", + v1.ResourceList{}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + }, + { + "Node Allocatable set, pre-reserved not set", + v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{}, + fmt.Sprintf(msgNotEqual, hugepages2M), + }, + { + "Pre-reserved not equal to Node Allocatable", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, v1.ResourceMemory), + }, + { + "Pre-reserved total equal to Node Allocatable", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(77, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + }, + "", + }, + { + "Pre-reserved total hugapages-2M not equal to Node Allocatable", + v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(14, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + }, + fmt.Sprintf(msgNotEqual, hugepages2M), + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := validateReservedMemory(tc.nodeAllocatableReservation, tc.preReservedMemory) + if strings.TrimSpace(tc.expectedError) != "" { + assert.Error(t, err) + assert.Equal(t, err.Error(), tc.expectedError) + } + }) + } +} + +func TestConvertPreReserved(t *testing.T) { + machineInfo := info.MachineInfo{ + Topology: []info.Node{ + info.Node{Id: 0}, + info.Node{Id: 1}, + }, + } + + testCases := []struct { + description string + reserved map[int]map[v1.ResourceName]resource.Quantity + reservedExpected reservedMemory + expectedError string + }{ + { + "Empty", + map[int]map[v1.ResourceName]resource.Quantity{}, + reservedMemory{ + 0: map[v1.ResourceName]uint64{}, + 1: map[v1.ResourceName]uint64{}, + }, + "", + }, + { + "Single NUMA node is pre-reserved", + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + }, + reservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 12, + hugepages2M: 70, + hugepages1G: 13, + }, + 1: map[v1.ResourceName]uint64{}, + }, + "", + }, + { + "Both NUMA nodes are pre-reserved", + map[int]map[v1.ResourceName]resource.Quantity{ + 0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(70, resource.DecimalSI), + hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)}, + 1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI), + hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)}, + }, + reservedMemory{ + 0: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 12, + hugepages2M: 70, + hugepages1G: 13, + }, + 1: map[v1.ResourceName]uint64{ + v1.ResourceMemory: 5, + hugepages2M: 7, + }, + }, + "", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + reserved, _ := convertReserved(&machineInfo, tc.reserved) + if !reflect.DeepEqual(reserved, tc.reservedExpected) { + t.Errorf("got %v, expected %v", reserved, tc.reservedExpected) + } + }) + } +} diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go index c21780d01b2..e3a99c872bd 100644 --- a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go +++ b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string.go @@ -23,10 +23,10 @@ import ( ) // BracketSeparatedSliceMapStringString can be set from the command line with the format `--flag {key=value, ...}, {...}`. -// Multiple comma-separated key-value pairs in a braket(`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`. -// Multiple braket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`. +// Multiple comma-separated key-value pairs in brackets (`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`. +// Multiple bracket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`. type BracketSeparatedSliceMapStringString struct { - Value *[]map[string]string + Value *[]map[string]string initialized bool // set to true after the first Set call } @@ -36,7 +36,6 @@ func NewBracketSeparatedSliceMapStringString(m *[]map[string]string) *BracketSep return &BracketSeparatedSliceMapStringString{Value: m} } - // Set implements github.com/spf13/pflag.Value func (m *BracketSeparatedSliceMapStringString) Set(value string) error { if m.Value == nil { @@ -49,13 +48,7 @@ func (m *BracketSeparatedSliceMapStringString) Set(value string) error { value = strings.TrimSpace(value) - // split here - //{numa-node=0,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi} -// for _, split := range strings.Split(value, "{") { -// split = strings.TrimRight(split, ",") -// split = strings.TrimRight(split, "}") for _, split := range strings.Split(value, ",{") { - //split = strings.TrimRight(split, ",") split = strings.TrimLeft(split, "{") split = strings.TrimRight(split, "}") @@ -66,7 +59,7 @@ func (m *BracketSeparatedSliceMapStringString) Set(value string) error { // now we have "numa-node=1,memory-type=memory,limit=1Gi" tmpRawMap := make(map[string]string) - tmpMap:= NewMapStringString(&tmpRawMap) + tmpMap := NewMapStringString(&tmpRawMap) if err := tmpMap.Set(split); err != nil { return fmt.Errorf("could not parse String: (%s): %v", value, err) @@ -100,7 +93,7 @@ func (m *BracketSeparatedSliceMapStringString) String() string { } if len(tmpPairs) != 0 { - slices = append(slices, "{" + strings.Join(tmpPairs, ",") + "}") + slices = append(slices, "{"+strings.Join(tmpPairs, ",")+"}") } } sort.Strings(slices) diff --git a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go index caea52c8761..84049d1ebc2 100644 --- a/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go +++ b/staging/src/k8s.io/component-base/cli/flag/bracket_separated_slice_map_string_string_test.go @@ -28,7 +28,7 @@ func TestStringBracketSeparatedSliceMapStringString(t *testing.T) { m *BracketSeparatedSliceMapStringString expect string }{ - {"nill", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""}, + {"nil", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""}, {"empty", NewBracketSeparatedSliceMapStringString(&[]map[string]string{}), ""}, {"one key", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string"}}), "{a=string}"}, {"two keys", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string", "b": "string"}}), "{a=string,b=string}"}, @@ -73,13 +73,13 @@ func TestSetBracketSeparatedSliceMapStringString(t *testing.T) { initialized: true, Value: &[]map[string]string{}, }, ""}, - {"empty braket", []string{"{}"}, + {"empty bracket", []string{"{}"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true, Value: &[]map[string]string{}, }, ""}, - {"missing braket", []string{"a=string, b=string"}, + {"missing bracket", []string{"a=string, b=string"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true, @@ -103,13 +103,13 @@ func TestSetBracketSeparatedSliceMapStringString(t *testing.T) { initialized: true, Value: &[]map[string]string{{"a": "string", "b": "string"}}, }, ""}, - {"two duplecated keys", []string{"{a=string,a=string}"}, + {"two duplicated keys", []string{"{a=string,a=string}"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true, Value: &[]map[string]string{{"a": "string"}}, }, ""}, - {"two keys with space", []string{"{a = string, b = string}"}, + {"two keys with spaces", []string{"{a = string, b = string}"}, NewBracketSeparatedSliceMapStringString(&nilMap), &BracketSeparatedSliceMapStringString{ initialized: true,