KEP-4540: Add CPUManager policy option strict-cpu-reservation

Signed-off-by: Jing Zhang <jing.c.zhang.ext@nokia.com>
This commit is contained in:
Jing Zhang 2024-07-03 20:13:56 -04:00 committed by Mark Sasnal
parent be8ea98a5c
commit 0365cf4b20
2 changed files with 33 additions and 7 deletions

View File

@ -33,6 +33,7 @@ const (
DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa" DistributeCPUsAcrossNUMAOption string = "distribute-cpus-across-numa"
AlignBySocketOption string = "align-by-socket" AlignBySocketOption string = "align-by-socket"
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores" DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
StrictCPUReservationOption string = "strict-cpu-reservation"
) )
var ( var (
@ -40,6 +41,7 @@ var (
DistributeCPUsAcrossNUMAOption, DistributeCPUsAcrossNUMAOption,
AlignBySocketOption, AlignBySocketOption,
DistributeCPUsAcrossCoresOption, DistributeCPUsAcrossCoresOption,
StrictCPUReservationOption,
) )
betaOptions = sets.New[string]( betaOptions = sets.New[string](
FullPCPUsOnlyOption, FullPCPUsOnlyOption,
@ -86,6 +88,8 @@ type StaticPolicyOptions struct {
// cpus (HT) on different physical core. // cpus (HT) on different physical core.
// This is a preferred policy so do not throw error if they have to packed in one physical core. // This is a preferred policy so do not throw error if they have to packed in one physical core.
DistributeCPUsAcrossCores bool DistributeCPUsAcrossCores bool
// Flag to remove reserved cores from the list of available cores
StrictCPUReservation bool
} }
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration. // NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
@ -121,7 +125,12 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
return opts, fmt.Errorf("bad value for option %q: %w", name, err) return opts, fmt.Errorf("bad value for option %q: %w", name, err)
} }
opts.DistributeCPUsAcrossCores = optValue opts.DistributeCPUsAcrossCores = optValue
case StrictCPUReservationOption:
optValue, err := strconv.ParseBool(value)
if err != nil {
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
}
opts.StrictCPUReservation = optValue
default: default:
// this should never be reached, we already detect unknown options, // this should never be reached, we already detect unknown options,
// but we keep it as further safety. // but we keep it as further safety.

View File

@ -195,14 +195,19 @@ func (p *staticPolicy) validateState(s state.State) error {
tmpAssignments := s.GetCPUAssignments() tmpAssignments := s.GetCPUAssignments()
tmpDefaultCPUset := s.GetDefaultCPUSet() tmpDefaultCPUset := s.GetDefaultCPUSet()
allCPUs := p.topology.CPUDetails.CPUs()
if p.options.StrictCPUReservation {
allCPUs = allCPUs.Difference(p.reservedCPUs)
}
// Default cpuset cannot be empty when assignments exist // Default cpuset cannot be empty when assignments exist
if tmpDefaultCPUset.IsEmpty() { if tmpDefaultCPUset.IsEmpty() {
if len(tmpAssignments) != 0 { if len(tmpAssignments) != 0 {
return fmt.Errorf("default cpuset cannot be empty") return fmt.Errorf("default cpuset cannot be empty")
} }
// state is empty initialize // state is empty initialize
allCPUs := p.topology.CPUDetails.CPUs()
s.SetDefaultCPUSet(allCPUs) s.SetDefaultCPUSet(allCPUs)
klog.InfoS("Static policy initialized", "defaultCPUSet", allCPUs)
return nil return nil
} }
@ -210,9 +215,16 @@ func (p *staticPolicy) validateState(s state.State) error {
// 1. Check if the reserved cpuset is not part of default cpuset because: // 1. Check if the reserved cpuset is not part of default cpuset because:
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start // - kube/system reserved have changed (increased) - may lead to some containers not being able to start
// - user tampered with file // - user tampered with file
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) { if p.options.StrictCPUReservation {
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"", if !p.reservedCPUs.Intersection(tmpDefaultCPUset).IsEmpty() {
p.reservedCPUs.String(), tmpDefaultCPUset.String()) return fmt.Errorf("some of strictly reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
p.reservedCPUs.Intersection(tmpDefaultCPUset).String(), tmpDefaultCPUset.String())
}
} else {
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
p.reservedCPUs.String(), tmpDefaultCPUset.String())
}
} }
// 2. Check if state for static policy is consistent // 2. Check if state for static policy is consistent
@ -235,15 +247,20 @@ func (p *staticPolicy) validateState(s state.State) error {
// the set of CPUs stored in the state. // the set of CPUs stored in the state.
totalKnownCPUs := tmpDefaultCPUset.Clone() totalKnownCPUs := tmpDefaultCPUset.Clone()
tmpCPUSets := []cpuset.CPUSet{} tmpCPUSets := []cpuset.CPUSet{}
tmpAllCPUs := p.topology.CPUDetails.CPUs()
for pod := range tmpAssignments { for pod := range tmpAssignments {
for _, cset := range tmpAssignments[pod] { for _, cset := range tmpAssignments[pod] {
tmpCPUSets = append(tmpCPUSets, cset) tmpCPUSets = append(tmpCPUSets, cset)
} }
} }
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...) totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) { if p.options.StrictCPUReservation {
tmpAllCPUs = tmpAllCPUs.Difference(p.reservedCPUs)
}
if !totalKnownCPUs.Equals(tmpAllCPUs) {
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"", return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String()) tmpAllCPUs.String(), totalKnownCPUs.String())
} }
return nil return nil