Improved multi-numa alignment in Topology Manager: introduce TopologyManagerOptions

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
This commit is contained in:
PiotrProkop 2022-11-03 10:45:21 +01:00
parent 58ef3f202a
commit d5dd42dfac
5 changed files with 315 additions and 22 deletions

View File

@ -709,6 +709,16 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
s.CPUManagerPolicyOptions, features.CPUManager, features.CPUManagerPolicyOptions)
}
var topologyManagerPolicyOptions map[string]string
if utilfeature.DefaultFeatureGate.Enabled(features.TopologyManager) {
if utilfeature.DefaultFeatureGate.Enabled(features.TopologyManagerPolicyOptions) {
topologyManagerPolicyOptions = s.TopologyManagerPolicyOptions
} else if s.TopologyManagerPolicyOptions != nil {
return fmt.Errorf("topology manager policy options %v require feature gates %q, %q enabled",
s.TopologyManagerPolicyOptions, features.TopologyManager, features.TopologyManagerPolicyOptions)
}
}
kubeDeps.ContainerManager, err = cm.NewContainerManager(
kubeDeps.Mounter,
kubeDeps.CAdvisorInterface,
@ -731,17 +741,18 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
ReservedSystemCPUs: reservedSystemCPUs,
HardEvictionThresholds: hardEvictionThresholds,
},
QOSReserved: *experimentalQOSReserved,
CPUManagerPolicy: s.CPUManagerPolicy,
CPUManagerPolicyOptions: cpuManagerPolicyOptions,
CPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
ExperimentalMemoryManagerPolicy: s.MemoryManagerPolicy,
ExperimentalMemoryManagerReservedMemory: s.ReservedMemory,
ExperimentalPodPidsLimit: s.PodPidsLimit,
EnforceCPULimits: s.CPUCFSQuota,
CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration,
ExperimentalTopologyManagerPolicy: s.TopologyManagerPolicy,
ExperimentalTopologyManagerScope: s.TopologyManagerScope,
QOSReserved: *experimentalQOSReserved,
CPUManagerPolicy: s.CPUManagerPolicy,
CPUManagerPolicyOptions: cpuManagerPolicyOptions,
CPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
ExperimentalMemoryManagerPolicy: s.MemoryManagerPolicy,
ExperimentalMemoryManagerReservedMemory: s.ReservedMemory,
ExperimentalPodPidsLimit: s.PodPidsLimit,
EnforceCPULimits: s.CPUCFSQuota,
CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration,
ExperimentalTopologyManagerPolicy: s.TopologyManagerPolicy,
ExperimentalTopologyManagerScope: s.TopologyManagerScope,
ExperimentalTopologyManagerPolicyOptions: topologyManagerPolicyOptions,
},
s.FailSwapOn,
kubeDeps.Recorder)

View File

@ -767,6 +767,34 @@ const (
// Enable resource managers to make NUMA aligned decisions
TopologyManager featuregate.Feature = "TopologyManager"
// owner: @PiotrProkop
// kep: https://kep.k8s.io/3545
// alpha: v1.26
//
// Allow fine-tuning of topology manager policies with alpha options.
// This feature gate:
// - will guard *a group* of topology manager options whose quality level is alpha.
// - will never graduate to beta or stable.
TopologyManagerPolicyAlphaOptions featuregate.Feature = "TopologyManagerPolicyAlphaOptions"
// owner: @PiotrProkop
// kep: https://kep.k8s.io/3545
// alpha: v1.26
//
// Allow fine-tuning of topology manager policies with beta options.
// This feature gate:
// - will guard *a group* of topology manager options whose quality level is beta.
// - is thus *introduced* as beta
// - will never graduate to stable.
TopologyManagerPolicyBetaOptions featuregate.Feature = "TopologyManagerPolicyBetaOptions"
// owner: @PiotrProkop
// kep: https://kep.k8s.io/3545
// alpha: v1.26
//
// Allow the usage of options to fine-tune the topology manager policies.
TopologyManagerPolicyOptions featuregate.Feature = "TopologyManagerPolicyOptions"
// owner: @rata, @giuseppe
// kep: https://kep.k8s.io/127
// alpha: v1.25
@ -1027,6 +1055,12 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
TopologyManager: {Default: true, PreRelease: featuregate.Beta},
TopologyManagerPolicyAlphaOptions: {Default: false, PreRelease: featuregate.Alpha},
TopologyManagerPolicyBetaOptions: {Default: false, PreRelease: featuregate.Beta},
TopologyManagerPolicyOptions: {Default: false, PreRelease: featuregate.Alpha},
VolumeCapacityPriority: {Default: false, PreRelease: featuregate.Alpha},
UserNamespacesStatelessPodsSupport: {Default: false, PreRelease: featuregate.Alpha},

View File

@ -133,17 +133,18 @@ type NodeConfig struct {
KubeletRootDir string
ProtectKernelDefaults bool
NodeAllocatableConfig
QOSReserved map[v1.ResourceName]int64
CPUManagerPolicy string
CPUManagerPolicyOptions map[string]string
ExperimentalTopologyManagerScope string
CPUManagerReconcilePeriod time.Duration
ExperimentalMemoryManagerPolicy string
ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation
ExperimentalPodPidsLimit int64
EnforceCPULimits bool
CPUCFSQuotaPeriod time.Duration
ExperimentalTopologyManagerPolicy string
QOSReserved map[v1.ResourceName]int64
CPUManagerPolicy string
CPUManagerPolicyOptions map[string]string
ExperimentalTopologyManagerScope string
CPUManagerReconcilePeriod time.Duration
ExperimentalMemoryManagerPolicy string
ExperimentalMemoryManagerReservedMemory []kubeletconfig.MemoryReservation
ExperimentalPodPidsLimit int64
EnforceCPULimits bool
CPUCFSQuotaPeriod time.Duration
ExperimentalTopologyManagerPolicy string
ExperimentalTopologyManagerPolicyOptions map[string]string
}
type NodeAllocatableConfig struct {

View File

@ -0,0 +1,81 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package topologymanager
import (
"fmt"
"strconv"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
kubefeatures "k8s.io/kubernetes/pkg/features"
)
const (
PreferClosestNUMANodes string = "prefer-closest-numa-nodes"
)
var (
alphaOptions = sets.NewString(
PreferClosestNUMANodes,
)
betaOptions = sets.NewString()
stableOptions = sets.NewString()
)
func CheckPolicyOptionAvailable(option string) error {
if !alphaOptions.Has(option) && !betaOptions.Has(option) && !stableOptions.Has(option) {
return fmt.Errorf("unknown Topology Manager Policy option: %q", option)
}
if alphaOptions.Has(option) && !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManagerPolicyAlphaOptions) {
return fmt.Errorf("Topology Manager Policy Alpha-level Options not enabled, but option %q provided", option)
}
if betaOptions.Has(option) && !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManagerPolicyBetaOptions) {
return fmt.Errorf("Topology Manager Policy Beta-level Options not enabled, but option %q provided", option)
}
return nil
}
type PolicyOptions struct {
PreferClosestNUMA bool
}
func NewPolicyOptions(policyOptions map[string]string) (PolicyOptions, error) {
opts := PolicyOptions{}
for name, value := range policyOptions {
if err := CheckPolicyOptionAvailable(name); err != nil {
return opts, err
}
switch name {
case PreferClosestNUMANodes:
optValue, err := strconv.ParseBool(value)
if err != nil {
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
}
opts.PreferClosestNUMA = optValue
default:
// this should never be reached, we already detect unknown options,
// but we keep it as further safety.
return opts, fmt.Errorf("unsupported topologymanager option: %q (%s)", name, value)
}
}
return opts, nil
}

View File

@ -0,0 +1,166 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package topologymanager
import (
"fmt"
"strings"
"testing"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
featuregatetesting "k8s.io/component-base/featuregate/testing"
pkgfeatures "k8s.io/kubernetes/pkg/features"
)
var fancyBetaOption = "fancy-new-option"
type optionAvailTest struct {
option string
featureGate featuregate.Feature
featureGateEnable bool
expectedAvailable bool
}
func TestNewTopologyManagerOptions(t *testing.T) {
testCases := []struct {
description string
policyOptions map[string]string
featureGate featuregate.Feature
expectedErr error
expectedOptions PolicyOptions
}{
{
description: "return TopologyManagerOptions with PreferClosestNUMA set to true",
featureGate: pkgfeatures.TopologyManagerPolicyAlphaOptions,
expectedOptions: PolicyOptions{
PreferClosestNUMA: true,
},
policyOptions: map[string]string{
PreferClosestNUMANodes: "true",
},
},
{
description: "return empty TopologyManagerOptions",
},
{
description: "fail to parse options",
featureGate: pkgfeatures.TopologyManagerPolicyAlphaOptions,
policyOptions: map[string]string{
PreferClosestNUMANodes: "not a boolean",
},
expectedErr: fmt.Errorf("bad value for option"),
},
{
description: "test beta options success",
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
policyOptions: map[string]string{
fancyBetaOption: "true",
},
},
{
description: "test beta options success",
policyOptions: map[string]string{
fancyBetaOption: "true",
},
expectedErr: fmt.Errorf("Topology Manager Policy Beta-level Options not enabled,"),
},
}
betaOptions = sets.NewString(fancyBetaOption)
for _, tcase := range testCases {
t.Run(tcase.description, func(t *testing.T) {
if tcase.featureGate != "" {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, tcase.featureGate, true)()
}
opts, err := NewPolicyOptions(tcase.policyOptions)
if tcase.expectedErr != nil {
if !strings.Contains(err.Error(), tcase.expectedErr.Error()) {
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tcase.expectedErr.Error())
}
}
if opts != tcase.expectedOptions {
t.Errorf("Expected TopologyManagerOptions to equal %v, not %v", tcase.expectedOptions, opts)
}
})
}
}
func TestPolicyDefaultsAvailable(t *testing.T) {
testCases := []optionAvailTest{
{
option: "this-option-does-not-exist",
expectedAvailable: false,
},
{
option: PreferClosestNUMANodes,
expectedAvailable: false,
},
}
for _, testCase := range testCases {
t.Run(testCase.option, func(t *testing.T) {
err := CheckPolicyOptionAvailable(testCase.option)
isEnabled := (err == nil)
if isEnabled != testCase.expectedAvailable {
t.Errorf("option %q available got=%v expected=%v", testCase.option, isEnabled, testCase.expectedAvailable)
}
})
}
}
func TestPolicyOptionsAvailable(t *testing.T) {
testCases := []optionAvailTest{
{
option: "this-option-does-not-exist",
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
featureGateEnable: false,
expectedAvailable: false,
},
{
option: "this-option-does-not-exist",
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
featureGateEnable: true,
expectedAvailable: false,
},
{
option: PreferClosestNUMANodes,
featureGate: pkgfeatures.TopologyManagerPolicyAlphaOptions,
featureGateEnable: true,
expectedAvailable: true,
},
{
option: PreferClosestNUMANodes,
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
featureGateEnable: true,
expectedAvailable: false,
},
}
for _, testCase := range testCases {
t.Run(testCase.option, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, testCase.featureGate, testCase.featureGateEnable)()
err := CheckPolicyOptionAvailable(testCase.option)
isEnabled := (err == nil)
if isEnabled != testCase.expectedAvailable {
t.Errorf("option %q available got=%v expected=%v", testCase.option, isEnabled, testCase.expectedAvailable)
}
})
}
}