Merge pull request #127483 from nokia/strict-cpu-reservation-core

KEP-4540: Add CPUManager policy option to restrict reservedSystemCPUs to system daemons and interrupt processing
This commit is contained in:
Kubernetes Prow Robot
2024-10-30 01:21:47 +00:00
committed by GitHub
4 changed files with 98 additions and 20 deletions

View File

@@ -33,6 +33,7 @@ const (
DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa"
AlignBySocketOption string = "align-by-socket"
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
StrictCPUReservationOption string = "strict-cpu-reservation"
)
var (
@@ -40,6 +41,7 @@ var (
DistributeCPUsAcrossNUMAOption,
AlignBySocketOption,
DistributeCPUsAcrossCoresOption,
StrictCPUReservationOption,
)
betaOptions = sets.New[string](
FullPCPUsOnlyOption,
@@ -86,6 +88,8 @@ type StaticPolicyOptions struct {
// cpus (HT) on different physical core.
// This is a preferred policy so do not throw error if they have to packed in one physical core.
DistributeCPUsAcrossCores bool
// Flag to remove reserved cores from the list of available cores
StrictCPUReservation bool
}
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
@@ -121,7 +125,12 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
}
opts.DistributeCPUsAcrossCores = optValue
case StrictCPUReservationOption:
optValue, err := strconv.ParseBool(value)
if err != nil {
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
}
opts.StrictCPUReservation = optValue
default:
// this should never be reached, we already detect unknown options,
// but we keep it as further safety.

View File

@@ -118,6 +118,18 @@ func TestPolicyOptionsAvailable(t *testing.T) {
featureGateEnable: true,
expectedAvailable: false,
},
{
option: StrictCPUReservationOption,
featureGate: pkgfeatures.CPUManagerPolicyAlphaOptions,
featureGateEnable: true,
expectedAvailable: true,
},
{
option: StrictCPUReservationOption,
featureGate: pkgfeatures.CPUManagerPolicyBetaOptions,
featureGateEnable: true,
expectedAvailable: false,
},
}
for _, testCase := range testCases {
t.Run(testCase.option, func(t *testing.T) {

View File

@@ -202,14 +202,19 @@ func (p *staticPolicy) validateState(s state.State) error {
tmpAssignments := s.GetCPUAssignments()
tmpDefaultCPUset := s.GetDefaultCPUSet()
allCPUs := p.topology.CPUDetails.CPUs()
if p.options.StrictCPUReservation {
allCPUs = allCPUs.Difference(p.reservedCPUs)
}
// Default cpuset cannot be empty when assignments exist
if tmpDefaultCPUset.IsEmpty() {
if len(tmpAssignments) != 0 {
return fmt.Errorf("default cpuset cannot be empty")
}
// state is empty initialize
allCPUs := p.topology.CPUDetails.CPUs()
s.SetDefaultCPUSet(allCPUs)
klog.InfoS("Static policy initialized", "defaultCPUSet", allCPUs)
return nil
}
@@ -217,9 +222,16 @@ func (p *staticPolicy) validateState(s state.State) error {
// 1. Check if the reserved cpuset is not part of default cpuset because:
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
// - user tampered with file
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
p.reservedCPUs.String(), tmpDefaultCPUset.String())
if p.options.StrictCPUReservation {
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).IsEmpty() {
return fmt.Errorf("some of strictly reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
p.reservedCPUs.Intersection(tmpDefaultCPUset).String(), tmpDefaultCPUset.String())
}
} else {
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
p.reservedCPUs.String(), tmpDefaultCPUset.String())
}
}
// 2. Check if state for static policy is consistent
@@ -248,9 +260,10 @@ func (p *staticPolicy) validateState(s state.State) error {
}
}
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
if !totalKnownCPUs.Equals(allCPUs) {
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
allCPUs.String(), totalKnownCPUs.String())
}
return nil

View File

@@ -107,6 +107,15 @@ func TestStaticPolicyStart(t *testing.T) {
stDefaultCPUSet: cpuset.New(0, 1),
expErr: fmt.Errorf("not all reserved cpus: \"0,6\" are present in defaultCpuSet: \"0-1\""),
},
{
description: "some of reserved cores are present in available cpuset (StrictCPUReservationOption)",
topo: topoDualSocketHT,
numReservedCPUs: 2,
options: map[string]string{StrictCPUReservationOption: "true"},
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.New(0, 1),
expErr: fmt.Errorf("some of strictly reserved cpus: \"0\" are present in defaultCpuSet: \"0-1\""),
},
{
description: "assigned core 2 is still present in available cpuset",
topo: topoDualSocketHT,
@@ -118,6 +127,18 @@ func TestStaticPolicyStart(t *testing.T) {
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
expErr: fmt.Errorf("pod: fakePod, container: 0 cpuset: \"0-2\" overlaps with default cpuset \"2-11\""),
},
{
description: "assigned core 2 is still present in available cpuset (StrictCPUReservationOption)",
topo: topoDualSocketHT,
options: map[string]string{StrictCPUReservationOption: "true"},
stAssignments: state.ContainerCPUAssignments{
"fakePod": map[string]cpuset.CPUSet{
"0": cpuset.New(0, 1, 2),
},
},
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
expErr: fmt.Errorf("pod: fakePod, container: 0 cpuset: \"0-2\" overlaps with default cpuset \"2-11\""),
},
{
description: "core 12 is not present in topology but is in state cpuset",
topo: topoDualSocketHT,
@@ -145,7 +166,8 @@ func TestStaticPolicyStart(t *testing.T) {
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true)
p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), testCase.options)
policy := p.(*staticPolicy)
st := &mockState{
assignments: testCase.stAssignments,
@@ -939,17 +961,18 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
// above test cases are without kubelet --reserved-cpus cmd option
// the following tests are with --reserved-cpus configured
type staticPolicyTestWithResvList struct {
description string
topo *topology.CPUTopology
numReservedCPUs int
reserved cpuset.CPUSet
stAssignments state.ContainerCPUAssignments
stDefaultCPUSet cpuset.CPUSet
pod *v1.Pod
expErr error
expNewErr error
expCPUAlloc bool
expCSet cpuset.CPUSet
description string
topo *topology.CPUTopology
numReservedCPUs int
reserved cpuset.CPUSet
cpuPolicyOptions map[string]string
stAssignments state.ContainerCPUAssignments
stDefaultCPUSet cpuset.CPUSet
pod *v1.Pod
expErr error
expNewErr error
expCPUAlloc bool
expCSet cpuset.CPUSet
}
func TestStaticPolicyStartWithResvList(t *testing.T) {
@@ -963,6 +986,16 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
stDefaultCPUSet: cpuset.New(),
expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
},
{
description: "empty cpuset with StrictCPUReservationOption enabled",
topo: topoDualSocketHT,
numReservedCPUs: 2,
reserved: cpuset.New(0, 1),
cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"},
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.New(),
expCSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
},
{
description: "reserved cores 0 & 1 are not present in available cpuset",
topo: topoDualSocketHT,
@@ -972,6 +1005,16 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
stDefaultCPUSet: cpuset.New(2, 3, 4, 5),
expErr: fmt.Errorf("not all reserved cpus: \"0-1\" are present in defaultCpuSet: \"2-5\""),
},
{
description: "reserved cores 0 & 1 are present in available cpuset with StrictCPUReservationOption enabled",
topo: topoDualSocketHT,
numReservedCPUs: 2,
reserved: cpuset.New(0, 1),
cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"},
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5),
expErr: fmt.Errorf("some of strictly reserved cpus: \"0-1\" are present in defaultCpuSet: \"0-5\""),
},
{
description: "inconsistency between numReservedCPUs and reserved",
topo: topoDualSocketHT,
@@ -984,7 +1027,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true)
p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testCase.cpuPolicyOptions)
if !reflect.DeepEqual(err, testCase.expNewErr) {
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
testCase.description, testCase.expNewErr, err)