memory manager: validate reserved-memory against Node Allocatable

Reserved memory of all kinds (and over all
NUMA nodes) must be equal to the values determined
by Node Allocatable feature.

Signed-off-by: Cezary Zukowski <c.zukowski@samsung.com>
This commit is contained in:
Cezary Zukowski 2020-04-23 17:56:47 +02:00 committed by Artyom Lukianov
parent 711e85af24
commit 4a64102918
5 changed files with 269 additions and 45 deletions

View File

@ -703,6 +703,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
ExpandInUsePersistentVolumes: {Default: true, PreRelease: featuregate.Beta},
ExpandCSIVolumes: {Default: true, PreRelease: featuregate.Beta},
CPUManager: {Default: true, PreRelease: featuregate.Beta},
MemoryManager: {Default: false, PreRelease: featuregate.Alpha},
CPUCFSQuotaPeriod: {Default: false, PreRelease: featuregate.Alpha},
TopologyManager: {Default: true, PreRelease: featuregate.Beta},
ServiceNodeExclusion: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.22

View File

@ -28,6 +28,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/klog/v2"
corev1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
@ -101,8 +102,6 @@ type manager struct {
// for all containers a pod
containerMap containermap.ContainerMap
nodeAllocatableReservation v1.ResourceList
// sourcesReady provides the readiness of kubelet configuration sources such as apiserver update readiness.
// We use it to determine when we can purge inactive pods from checkpointed state.
sourcesReady config.SourcesReady
@ -123,12 +122,12 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll
policy = NewPolicyNone()
case policyTypeStatic:
reserved, err := getReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory)
systemReserved, err := getSystemReservedMemory(machineInfo, nodeAllocatableReservation, reservedMemory)
if err != nil {
return nil, err
}
policy, err = NewPolicyStatic(machineInfo, reserved, affinity)
policy, err = NewPolicyStatic(machineInfo, systemReserved, affinity)
if err != nil {
return nil, err
}
@ -138,9 +137,8 @@ func NewManager(policyName string, machineInfo *cadvisorapi.MachineInfo, nodeAll
}
manager := &manager{
policy: policy,
nodeAllocatableReservation: nodeAllocatableReservation,
stateFileDirectory: stateFileDirectory,
policy: policy,
stateFileDirectory: stateFileDirectory,
}
manager.sourcesReady = &sourcesReadyStub{}
return manager, nil
@ -309,35 +307,86 @@ func (m *manager) policyRemoveContainerByRef(podUID string, containerName string
return err
}
func getTotalMemoryTypeReserved(preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) map[v1.ResourceName]resource.Quantity {
totalMemoryType := map[v1.ResourceName]resource.Quantity{}
for _, node := range preReservedMemory {
for memType, memVal := range node {
if totalMem, exists := totalMemoryType[memType]; exists {
memVal.Add(totalMem)
}
totalMemoryType[memType] = memVal
}
}
return totalMemoryType
}
func validateReservedMemory(nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) error {
// TODO: this will check equality of total reserved memory by node allocatable feature and total pre-reserved memory
totalMemoryType := getTotalMemoryTypeReserved(reservedMemory)
commonMemoryTypeSet := make(map[v1.ResourceName]bool)
for resourceType := range totalMemoryType {
if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) {
continue
}
commonMemoryTypeSet[resourceType] = true
}
for resourceType := range nodeAllocatableReservation {
if !(corev1helper.IsHugePageResourceName(resourceType) || resourceType == v1.ResourceMemory) {
continue
}
commonMemoryTypeSet[resourceType] = true
}
for resourceType := range commonMemoryTypeSet {
nodeAllocatableMemory := resource.NewQuantity(0, resource.DecimalSI)
if memValue, set := nodeAllocatableReservation[resourceType]; set {
nodeAllocatableMemory.Add(memValue)
}
reservedMemory := resource.NewQuantity(0, resource.DecimalSI)
if memValue, set := totalMemoryType[resourceType]; set {
reservedMemory.Add(memValue)
}
if !(*nodeAllocatableMemory).Equal(*reservedMemory) {
return fmt.Errorf("the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature", resourceType)
}
}
return nil
}
func getReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) {
// TODO: we should add new kubelet parameter, and to get reserved memory per NUMA node from it
// currently we use kube-reserved + system-reserved + eviction reserve for each NUMA node, that creates memory over-consumption
// and no reservation for huge pages
func convertReserved(machineInfo *cadvisorapi.MachineInfo, reservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) {
preReservedMemoryConverted := make(map[int]map[v1.ResourceName]uint64)
for _, node := range machineInfo.Topology {
preReservedMemoryConverted[node.Id] = make(map[v1.ResourceName]uint64)
}
if err := validateReservedMemory(nodeAllocatableReservation, reservedMemory); err != nil {
for numaIndex := range reservedMemory {
for memoryType := range reservedMemory[numaIndex] {
tmp := reservedMemory[numaIndex][memoryType]
if val, success := tmp.AsInt64(); success {
preReservedMemoryConverted[numaIndex][memoryType] = uint64(val)
} else {
return nil, fmt.Errorf("could not covert a variable of type Quantity to int64")
}
}
}
return preReservedMemoryConverted, nil
}
func getSystemReservedMemory(machineInfo *cadvisorapi.MachineInfo, nodeAllocatableReservation v1.ResourceList, preReservedMemory map[int]map[v1.ResourceName]resource.Quantity) (systemReservedMemory, error) {
if err := validateReservedMemory(nodeAllocatableReservation, preReservedMemory); err != nil {
return nil, err
}
reserved := systemReservedMemory{}
for _, node := range machineInfo.Topology {
memory := nodeAllocatableReservation[v1.ResourceMemory]
if memory.IsZero() {
break
}
value, succeeded := memory.AsInt64()
if !succeeded {
return nil, fmt.Errorf("failed to represent reserved memory as int64")
}
reserved[node.Id] = map[v1.ResourceName]uint64{
v1.ResourceMemory: uint64(value),
}
reservedMemoryConverted, err := convertReserved(machineInfo, preReservedMemory)
if err != nil {
return nil, err
}
return reserved, nil
return reservedMemoryConverted, nil
}

View File

@ -0,0 +1,181 @@
package memorymanager
import (
"fmt"
"reflect"
"strings"
"testing"
"github.com/stretchr/testify/assert"
info "github.com/google/cadvisor/info/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
const (
hugepages2M = "hugepages-2Mi"
hugepages1G = "hugepages-1Gi"
)
type nodeResources map[v1.ResourceName]resource.Quantity
// validateReservedMemory
func TestValidatePreReservedMemory(t *testing.T) {
const msgNotEqual = "the total amount of memory of type \"%s\" is not equal to the value determined by Node Allocatable feature"
testCases := []struct {
description string
nodeAllocatableReservation v1.ResourceList
preReservedMemory map[int]map[v1.ResourceName]resource.Quantity
expectedError string
}{
{
"Node Allocatable not set, pre-reserved not set",
v1.ResourceList{},
map[int]map[v1.ResourceName]resource.Quantity{},
"",
},
{
"Node Allocatable set to zero, pre-reserved set to zero",
v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)},
map[int]map[v1.ResourceName]resource.Quantity{
0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(0, resource.DecimalSI)},
},
"",
},
{
"Node Allocatable not set (equal zero), pre-reserved set",
v1.ResourceList{},
map[int]map[v1.ResourceName]resource.Quantity{
0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)},
},
fmt.Sprintf(msgNotEqual, v1.ResourceMemory),
},
{
"Node Allocatable set, pre-reserved not set",
v1.ResourceList{hugepages2M: *resource.NewQuantity(5, resource.DecimalSI)},
map[int]map[v1.ResourceName]resource.Quantity{},
fmt.Sprintf(msgNotEqual, hugepages2M),
},
{
"Pre-reserved not equal to Node Allocatable",
v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI)},
map[int]map[v1.ResourceName]resource.Quantity{
0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI)},
},
fmt.Sprintf(msgNotEqual, v1.ResourceMemory),
},
{
"Pre-reserved total equal to Node Allocatable",
v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(77, resource.DecimalSI),
hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)},
map[int]map[v1.ResourceName]resource.Quantity{
0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(70, resource.DecimalSI),
hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)},
1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)},
},
"",
},
{
"Pre-reserved total hugapages-2M not equal to Node Allocatable",
v1.ResourceList{v1.ResourceMemory: *resource.NewQuantity(17, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(14, resource.DecimalSI),
hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)},
map[int]map[v1.ResourceName]resource.Quantity{
0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(70, resource.DecimalSI),
hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)},
1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)},
},
fmt.Sprintf(msgNotEqual, hugepages2M),
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
err := validateReservedMemory(tc.nodeAllocatableReservation, tc.preReservedMemory)
if strings.TrimSpace(tc.expectedError) != "" {
assert.Error(t, err)
assert.Equal(t, err.Error(), tc.expectedError)
}
})
}
}
func TestConvertPreReserved(t *testing.T) {
machineInfo := info.MachineInfo{
Topology: []info.Node{
info.Node{Id: 0},
info.Node{Id: 1},
},
}
testCases := []struct {
description string
reserved map[int]map[v1.ResourceName]resource.Quantity
reservedExpected reservedMemory
expectedError string
}{
{
"Empty",
map[int]map[v1.ResourceName]resource.Quantity{},
reservedMemory{
0: map[v1.ResourceName]uint64{},
1: map[v1.ResourceName]uint64{},
},
"",
},
{
"Single NUMA node is pre-reserved",
map[int]map[v1.ResourceName]resource.Quantity{
0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(70, resource.DecimalSI),
hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)},
},
reservedMemory{
0: map[v1.ResourceName]uint64{
v1.ResourceMemory: 12,
hugepages2M: 70,
hugepages1G: 13,
},
1: map[v1.ResourceName]uint64{},
},
"",
},
{
"Both NUMA nodes are pre-reserved",
map[int]map[v1.ResourceName]resource.Quantity{
0: nodeResources{v1.ResourceMemory: *resource.NewQuantity(12, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(70, resource.DecimalSI),
hugepages1G: *resource.NewQuantity(13, resource.DecimalSI)},
1: nodeResources{v1.ResourceMemory: *resource.NewQuantity(5, resource.DecimalSI),
hugepages2M: *resource.NewQuantity(7, resource.DecimalSI)},
},
reservedMemory{
0: map[v1.ResourceName]uint64{
v1.ResourceMemory: 12,
hugepages2M: 70,
hugepages1G: 13,
},
1: map[v1.ResourceName]uint64{
v1.ResourceMemory: 5,
hugepages2M: 7,
},
},
"",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
reserved, _ := convertReserved(&machineInfo, tc.reserved)
if !reflect.DeepEqual(reserved, tc.reservedExpected) {
t.Errorf("got %v, expected %v", reserved, tc.reservedExpected)
}
})
}
}

View File

@ -23,10 +23,10 @@ import (
)
// BracketSeparatedSliceMapStringString can be set from the command line with the format `--flag {key=value, ...}, {...}`.
// Multiple comma-separated key-value pairs in a braket(`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`.
// Multiple braket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`.
// Multiple comma-separated key-value pairs in brackets (`{}`) in a single invocation are supported. For example: `--flag {key=value, key=value, ...}`.
// Multiple bracket-separated list of key-value pairs in a single invocation are supported. For example: `--flag {key=value, key=value}, {key=value, key=value}`.
type BracketSeparatedSliceMapStringString struct {
Value *[]map[string]string
Value *[]map[string]string
initialized bool // set to true after the first Set call
}
@ -36,7 +36,6 @@ func NewBracketSeparatedSliceMapStringString(m *[]map[string]string) *BracketSep
return &BracketSeparatedSliceMapStringString{Value: m}
}
// Set implements github.com/spf13/pflag.Value
func (m *BracketSeparatedSliceMapStringString) Set(value string) error {
if m.Value == nil {
@ -49,13 +48,7 @@ func (m *BracketSeparatedSliceMapStringString) Set(value string) error {
value = strings.TrimSpace(value)
// split here
//{numa-node=0,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi},{numa-node=1,memory-type=memory,limit=1Gi}
// for _, split := range strings.Split(value, "{") {
// split = strings.TrimRight(split, ",")
// split = strings.TrimRight(split, "}")
for _, split := range strings.Split(value, ",{") {
//split = strings.TrimRight(split, ",")
split = strings.TrimLeft(split, "{")
split = strings.TrimRight(split, "}")
@ -66,7 +59,7 @@ func (m *BracketSeparatedSliceMapStringString) Set(value string) error {
// now we have "numa-node=1,memory-type=memory,limit=1Gi"
tmpRawMap := make(map[string]string)
tmpMap:= NewMapStringString(&tmpRawMap)
tmpMap := NewMapStringString(&tmpRawMap)
if err := tmpMap.Set(split); err != nil {
return fmt.Errorf("could not parse String: (%s): %v", value, err)
@ -100,7 +93,7 @@ func (m *BracketSeparatedSliceMapStringString) String() string {
}
if len(tmpPairs) != 0 {
slices = append(slices, "{" + strings.Join(tmpPairs, ",") + "}")
slices = append(slices, "{"+strings.Join(tmpPairs, ",")+"}")
}
}
sort.Strings(slices)

View File

@ -28,7 +28,7 @@ func TestStringBracketSeparatedSliceMapStringString(t *testing.T) {
m *BracketSeparatedSliceMapStringString
expect string
}{
{"nill", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""},
{"nil", NewBracketSeparatedSliceMapStringString(&nilSliceMap), ""},
{"empty", NewBracketSeparatedSliceMapStringString(&[]map[string]string{}), ""},
{"one key", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string"}}), "{a=string}"},
{"two keys", NewBracketSeparatedSliceMapStringString(&[]map[string]string{{"a": "string", "b": "string"}}), "{a=string,b=string}"},
@ -73,13 +73,13 @@ func TestSetBracketSeparatedSliceMapStringString(t *testing.T) {
initialized: true,
Value: &[]map[string]string{},
}, ""},
{"empty braket", []string{"{}"},
{"empty bracket", []string{"{}"},
NewBracketSeparatedSliceMapStringString(&nilMap),
&BracketSeparatedSliceMapStringString{
initialized: true,
Value: &[]map[string]string{},
}, ""},
{"missing braket", []string{"a=string, b=string"},
{"missing bracket", []string{"a=string, b=string"},
NewBracketSeparatedSliceMapStringString(&nilMap),
&BracketSeparatedSliceMapStringString{
initialized: true,
@ -103,13 +103,13 @@ func TestSetBracketSeparatedSliceMapStringString(t *testing.T) {
initialized: true,
Value: &[]map[string]string{{"a": "string", "b": "string"}},
}, ""},
{"two duplecated keys", []string{"{a=string,a=string}"},
{"two duplicated keys", []string{"{a=string,a=string}"},
NewBracketSeparatedSliceMapStringString(&nilMap),
&BracketSeparatedSliceMapStringString{
initialized: true,
Value: &[]map[string]string{{"a": "string"}},
}, ""},
{"two keys with space", []string{"{a = string, b = string}"},
{"two keys with spaces", []string{"{a = string, b = string}"},
NewBracketSeparatedSliceMapStringString(&nilMap),
&BracketSeparatedSliceMapStringString{
initialized: true,