mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-14 13:45:06 +00:00
Merge pull request #127483 from nokia/strict-cpu-reservation-core
KEP-4540: Add CPUManager policy option to restrict reservedSystemCPUs to system daemons and interrupt processing
This commit is contained in:
@@ -33,6 +33,7 @@ const (
|
||||
DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa"
|
||||
AlignBySocketOption string = "align-by-socket"
|
||||
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
|
||||
StrictCPUReservationOption string = "strict-cpu-reservation"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -40,6 +41,7 @@ var (
|
||||
DistributeCPUsAcrossNUMAOption,
|
||||
AlignBySocketOption,
|
||||
DistributeCPUsAcrossCoresOption,
|
||||
StrictCPUReservationOption,
|
||||
)
|
||||
betaOptions = sets.New[string](
|
||||
FullPCPUsOnlyOption,
|
||||
@@ -86,6 +88,8 @@ type StaticPolicyOptions struct {
|
||||
// cpus (HT) on different physical core.
|
||||
// This is a preferred policy so do not throw error if they have to packed in one physical core.
|
||||
DistributeCPUsAcrossCores bool
|
||||
// Flag to remove reserved cores from the list of available cores
|
||||
StrictCPUReservation bool
|
||||
}
|
||||
|
||||
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
|
||||
@@ -121,7 +125,12 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.DistributeCPUsAcrossCores = optValue
|
||||
|
||||
case StrictCPUReservationOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.StrictCPUReservation = optValue
|
||||
default:
|
||||
// this should never be reached, we already detect unknown options,
|
||||
// but we keep it as further safety.
|
||||
|
@@ -118,6 +118,18 @@ func TestPolicyOptionsAvailable(t *testing.T) {
|
||||
featureGateEnable: true,
|
||||
expectedAvailable: false,
|
||||
},
|
||||
{
|
||||
option: StrictCPUReservationOption,
|
||||
featureGate: pkgfeatures.CPUManagerPolicyAlphaOptions,
|
||||
featureGateEnable: true,
|
||||
expectedAvailable: true,
|
||||
},
|
||||
{
|
||||
option: StrictCPUReservationOption,
|
||||
featureGate: pkgfeatures.CPUManagerPolicyBetaOptions,
|
||||
featureGateEnable: true,
|
||||
expectedAvailable: false,
|
||||
},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.option, func(t *testing.T) {
|
||||
|
@@ -202,14 +202,19 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
tmpAssignments := s.GetCPUAssignments()
|
||||
tmpDefaultCPUset := s.GetDefaultCPUSet()
|
||||
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
if p.options.StrictCPUReservation {
|
||||
allCPUs = allCPUs.Difference(p.reservedCPUs)
|
||||
}
|
||||
|
||||
// Default cpuset cannot be empty when assignments exist
|
||||
if tmpDefaultCPUset.IsEmpty() {
|
||||
if len(tmpAssignments) != 0 {
|
||||
return fmt.Errorf("default cpuset cannot be empty")
|
||||
}
|
||||
// state is empty initialize
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
s.SetDefaultCPUSet(allCPUs)
|
||||
klog.InfoS("Static policy initialized", "defaultCPUSet", allCPUs)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -217,9 +222,16 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||
// - user tampered with file
|
||||
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||
if p.options.StrictCPUReservation {
|
||||
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).IsEmpty() {
|
||||
return fmt.Errorf("some of strictly reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
p.reservedCPUs.Intersection(tmpDefaultCPUset).String(), tmpDefaultCPUset.String())
|
||||
}
|
||||
} else {
|
||||
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Check if state for static policy is consistent
|
||||
@@ -248,9 +260,10 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
}
|
||||
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
|
||||
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||
if !totalKnownCPUs.Equals(allCPUs) {
|
||||
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||
allCPUs.String(), totalKnownCPUs.String())
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@@ -107,6 +107,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.New(0, 1),
|
||||
expErr: fmt.Errorf("not all reserved cpus: \"0,6\" are present in defaultCpuSet: \"0-1\""),
|
||||
},
|
||||
{
|
||||
description: "some of reserved cores are present in available cpuset (StrictCPUReservationOption)",
|
||||
topo: topoDualSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
options: map[string]string{StrictCPUReservationOption: "true"},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.New(0, 1),
|
||||
expErr: fmt.Errorf("some of strictly reserved cpus: \"0\" are present in defaultCpuSet: \"0-1\""),
|
||||
},
|
||||
{
|
||||
description: "assigned core 2 is still present in available cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -118,6 +127,18 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
expErr: fmt.Errorf("pod: fakePod, container: 0 cpuset: \"0-2\" overlaps with default cpuset \"2-11\""),
|
||||
},
|
||||
{
|
||||
description: "assigned core 2 is still present in available cpuset (StrictCPUReservationOption)",
|
||||
topo: topoDualSocketHT,
|
||||
options: map[string]string{StrictCPUReservationOption: "true"},
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"0": cpuset.New(0, 1, 2),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
expErr: fmt.Errorf("pod: fakePod, container: 0 cpuset: \"0-2\" overlaps with default cpuset \"2-11\""),
|
||||
},
|
||||
{
|
||||
description: "core 12 is not present in topology but is in state cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -145,7 +166,8 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true)
|
||||
p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), testCase.options)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -939,17 +961,18 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
// above test cases are without kubelet --reserved-cpus cmd option
|
||||
// the following tests are with --reserved-cpus configured
|
||||
type staticPolicyTestWithResvList struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
reserved cpuset.CPUSet
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
pod *v1.Pod
|
||||
expErr error
|
||||
expNewErr error
|
||||
expCPUAlloc bool
|
||||
expCSet cpuset.CPUSet
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
reserved cpuset.CPUSet
|
||||
cpuPolicyOptions map[string]string
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
pod *v1.Pod
|
||||
expErr error
|
||||
expNewErr error
|
||||
expCPUAlloc bool
|
||||
expCSet cpuset.CPUSet
|
||||
}
|
||||
|
||||
func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
@@ -963,6 +986,16 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.New(),
|
||||
expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
{
|
||||
description: "empty cpuset with StrictCPUReservationOption enabled",
|
||||
topo: topoDualSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.New(0, 1),
|
||||
cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.New(),
|
||||
expCSet: cpuset.New(2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
{
|
||||
description: "reserved cores 0 & 1 are not present in available cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -972,6 +1005,16 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.New(2, 3, 4, 5),
|
||||
expErr: fmt.Errorf("not all reserved cpus: \"0-1\" are present in defaultCpuSet: \"2-5\""),
|
||||
},
|
||||
{
|
||||
description: "reserved cores 0 & 1 are present in available cpuset with StrictCPUReservationOption enabled",
|
||||
topo: topoDualSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.New(0, 1),
|
||||
cpuPolicyOptions: map[string]string{StrictCPUReservationOption: "true"},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5),
|
||||
expErr: fmt.Errorf("some of strictly reserved cpus: \"0-1\" are present in defaultCpuSet: \"0-5\""),
|
||||
},
|
||||
{
|
||||
description: "inconsistency between numReservedCPUs and reserved",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -984,7 +1027,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
||||
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.CPUManagerPolicyAlphaOptions, true)
|
||||
p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testCase.cpuPolicyOptions)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
|
Reference in New Issue
Block a user