diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index b87cd735659..32a0b5b8b5e 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -492,7 +492,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig fs.BoolVar(&c.ProtectKernelDefaults, "protect-kernel-defaults", c.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.") fs.StringVar(&c.ReservedSystemCPUs, "reserved-cpus", c.ReservedSystemCPUs, "A comma-separated list of CPUs or CPU ranges that are reserved for system and kubernetes usage. This specific list will supersede cpu counts in --system-reserved and --kube-reserved.") fs.StringVar(&c.TopologyManagerScope, "topology-manager-scope", c.TopologyManagerScope, "Scope to which topology hints applied. Topology Manager collects hints from Hint Providers and applies them to defined scope to ensure the pod admission. Possible values: 'container', 'pod'.") - fs.Var(cliflag.NewMapStringStringNoSplit(&c.TopologyManagerPolicyOptions), "topology-manager-policy-options", "A set of key=value Topology Manager policy options to use, to fine tune their behaviour. If not supplied, keep the default behaviour.") + fs.Var(cliflag.NewMapStringString(&c.TopologyManagerPolicyOptions), "topology-manager-policy-options", "A set of key=value Topology Manager policy options to use, to fine tune their behaviour. If not supplied, keep the default behaviour.") // Node Allocatable Flags fs.Var(cliflag.NewMapStringString(&c.SystemReserved), "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=1000) pairs that describe resources reserved for non-kubernetes components. Currently only cpu, memory, pid and local ephemeral storage for root file system are supported. See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ for more detail. [default=none]") fs.Var(cliflag.NewMapStringString(&c.KubeReserved), "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=1000) pairs that describe resources reserved for kubernetes system components. Currently only cpu, memory, pid and local ephemeral storage for root file system are supported. See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ for more detail. [default=none]") diff --git a/pkg/kubelet/cm/topologymanager/policy_options.go b/pkg/kubelet/cm/topologymanager/policy_options.go index 1ef211cd0bd..2ea86d2fd3b 100644 --- a/pkg/kubelet/cm/topologymanager/policy_options.go +++ b/pkg/kubelet/cm/topologymanager/policy_options.go @@ -22,17 +22,20 @@ import ( "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/klog/v2" kubefeatures "k8s.io/kubernetes/pkg/features" ) const ( PreferClosestNUMANodes string = "prefer-closest-numa-nodes" + MaxAllowableNUMANodes string = "max-allowable-numa-nodes" ) var ( alphaOptions = sets.New[string]() betaOptions = sets.New[string]( PreferClosestNUMANodes, + MaxAllowableNUMANodes, ) stableOptions = sets.New[string]() ) @@ -54,11 +57,17 @@ func CheckPolicyOptionAvailable(option string) error { } type PolicyOptions struct { - PreferClosestNUMA bool + PreferClosestNUMA bool + MaxAllowableNUMANodes int } func NewPolicyOptions(policyOptions map[string]string) (PolicyOptions, error) { - opts := PolicyOptions{} + opts := PolicyOptions{ + // Set MaxAllowableNUMANodes to the default. This will be overwritten + // if the user has specified a policy option for MaxAllowableNUMANodes. + MaxAllowableNUMANodes: defaultMaxAllowableNUMANodes, + } + for name, value := range policyOptions { if err := CheckPolicyOptionAvailable(name); err != nil { return opts, err @@ -71,6 +80,20 @@ func NewPolicyOptions(policyOptions map[string]string) (PolicyOptions, error) { return opts, fmt.Errorf("bad value for option %q: %w", name, err) } opts.PreferClosestNUMA = optValue + case MaxAllowableNUMANodes: + optValue, err := strconv.Atoi(value) + if err != nil { + return opts, fmt.Errorf("unable to convert policy option to integer %q: %w", name, err) + } + + if optValue < defaultMaxAllowableNUMANodes { + return opts, fmt.Errorf("the minimum value of %q should not be less than %v", name, defaultMaxAllowableNUMANodes) + } + + if optValue > defaultMaxAllowableNUMANodes { + klog.InfoS("WARNING: the value of max-allowable-numa-nodes is more than the default recommended value", "max-allowable-numa-nodes", optValue, "defaultMaxAllowableNUMANodes", defaultMaxAllowableNUMANodes) + } + opts.MaxAllowableNUMANodes = optValue default: // this should never be reached, we already detect unknown options, // but we keep it as further safety. diff --git a/pkg/kubelet/cm/topologymanager/policy_options_test.go b/pkg/kubelet/cm/topologymanager/policy_options_test.go index 36316bb2f4e..4876f6af7b7 100644 --- a/pkg/kubelet/cm/topologymanager/policy_options_test.go +++ b/pkg/kubelet/cm/topologymanager/policy_options_test.go @@ -21,7 +21,6 @@ import ( "strings" "testing" - "k8s.io/apimachinery/pkg/util/sets" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/component-base/featuregate" featuregatetesting "k8s.io/component-base/featuregate/testing" @@ -52,10 +51,23 @@ func TestNewTopologyManagerOptions(t *testing.T) { featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions, featureGateEnable: true, expectedOptions: PolicyOptions{ - PreferClosestNUMA: true, + PreferClosestNUMA: true, + MaxAllowableNUMANodes: 8, }, policyOptions: map[string]string{ PreferClosestNUMANodes: "true", + MaxAllowableNUMANodes: "8", + }, + }, + { + description: "return TopologyManagerOptions with MaxAllowableNUMANodes set to 12", + featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions, + featureGateEnable: true, + expectedOptions: PolicyOptions{ + MaxAllowableNUMANodes: 12, + }, + policyOptions: map[string]string{ + MaxAllowableNUMANodes: "12", }, }, { @@ -63,14 +75,18 @@ func TestNewTopologyManagerOptions(t *testing.T) { featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions, policyOptions: map[string]string{ PreferClosestNUMANodes: "true", + MaxAllowableNUMANodes: "8", }, expectedErr: fmt.Errorf("Topology Manager Policy Beta-level Options not enabled,"), }, { description: "return empty TopologyManagerOptions", + expectedOptions: PolicyOptions{ + MaxAllowableNUMANodes: 8, + }, }, { - description: "fail to parse options", + description: "fail to parse options with error PreferClosestNUMANodes", featureGate: pkgfeatures.TopologyManagerPolicyAlphaOptions, featureGateEnable: true, policyOptions: map[string]string{ @@ -78,6 +94,15 @@ func TestNewTopologyManagerOptions(t *testing.T) { }, expectedErr: fmt.Errorf("bad value for option"), }, + { + description: "fail to parse options with error MaxAllowableNUMANodes", + featureGate: pkgfeatures.TopologyManagerPolicyAlphaOptions, + featureGateEnable: true, + policyOptions: map[string]string{ + MaxAllowableNUMANodes: "can't parse to int", + }, + expectedErr: fmt.Errorf("unable to convert policy option to integer"), + }, { description: "test beta options success", featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions, @@ -85,6 +110,10 @@ func TestNewTopologyManagerOptions(t *testing.T) { policyOptions: map[string]string{ fancyBetaOption: "true", }, + expectedOptions: PolicyOptions{ + PreferClosestNUMA: false, + MaxAllowableNUMANodes: 8, + }, }, { description: "test beta options fail", @@ -101,6 +130,10 @@ func TestNewTopologyManagerOptions(t *testing.T) { policyOptions: map[string]string{ fancyAlphaOption: "true", }, + expectedOptions: PolicyOptions{ + PreferClosestNUMA: false, + MaxAllowableNUMANodes: 8, + }, }, { description: "test alpha options fail", @@ -112,7 +145,7 @@ func TestNewTopologyManagerOptions(t *testing.T) { } betaOptions.Insert(fancyBetaOption) - alphaOptions = sets.New[string](fancyAlphaOption) + alphaOptions.Insert(fancyAlphaOption) for _, tcase := range testCases { t.Run(tcase.description, func(t *testing.T) { @@ -122,13 +155,13 @@ func TestNewTopologyManagerOptions(t *testing.T) { opts, err := NewPolicyOptions(tcase.policyOptions) if tcase.expectedErr != nil { if !strings.Contains(err.Error(), tcase.expectedErr.Error()) { - t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tcase.expectedErr.Error()) + t.Errorf("Unexpected error message. Have: %s, wants %s", err.Error(), tcase.expectedErr.Error()) } + return } if opts != tcase.expectedOptions { t.Errorf("Expected TopologyManagerOptions to equal %v, not %v", tcase.expectedOptions, opts) - } }) } diff --git a/pkg/kubelet/cm/topologymanager/topology_manager.go b/pkg/kubelet/cm/topologymanager/topology_manager.go index 71787a25732..7c5e9d3d8cc 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager.go @@ -29,7 +29,7 @@ import ( ) const ( - // maxAllowableNUMANodes specifies the maximum number of NUMA Nodes that + // defaultMaxAllowableNUMANodes specifies the maximum number of NUMA Nodes that // the TopologyManager supports on the underlying machine. // // At present, having more than this number of NUMA Nodes will result in a @@ -37,7 +37,7 @@ const ( // generate hints for them. As such, if more NUMA Nodes than this are // present on a machine and the TopologyManager is enabled, an error will // be returned and the TopologyManager will not be loaded. - maxAllowableNUMANodes = 8 + defaultMaxAllowableNUMANodes = 8 // ErrorTopologyAffinity represents the type for a TopologyAffinityError ErrorTopologyAffinity = "TopologyAffinityError" ) @@ -151,8 +151,8 @@ func NewManager(topology []cadvisorapi.Node, topologyPolicyName string, topology return nil, fmt.Errorf("cannot discover NUMA topology: %w", err) } - if topologyPolicyName != PolicyNone && len(numaInfo.Nodes) > maxAllowableNUMANodes { - return nil, fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", maxAllowableNUMANodes) + if topologyPolicyName != PolicyNone && len(numaInfo.Nodes) > opts.MaxAllowableNUMANodes { + return nil, fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", opts.MaxAllowableNUMANodes) } var policy Policy diff --git a/pkg/kubelet/cm/topologymanager/topology_manager_test.go b/pkg/kubelet/cm/topologymanager/topology_manager_test.go index 24404981d6e..7f7c454f683 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager_test.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager_test.go @@ -105,7 +105,7 @@ func TestNewManager(t *testing.T) { description: "more than 8 NUMA nodes", policyName: "best-effort", expectedPolicy: "best-effort", - expectedError: fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", maxAllowableNUMANodes), + expectedError: fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", defaultMaxAllowableNUMANodes), topology: []cadvisorapi.Node{ { Id: 0, diff --git a/test/e2e_node/topology_manager_metrics_test.go b/test/e2e_node/topology_manager_metrics_test.go index ce985ec804b..817a6beeec7 100644 --- a/test/e2e_node/topology_manager_metrics_test.go +++ b/test/e2e_node/topology_manager_metrics_test.go @@ -66,7 +66,7 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature. policy := topologymanager.PolicySingleNumaNode scope := podScopeTopology - newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, 0) + newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, nil, 0) updateKubeletConfig(ctx, f, newCfg, true) }) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index a51b30f2a1b..6502f960619 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -203,13 +203,17 @@ func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (in return findNUMANodeWithoutSRIOVDevicesFromSysfs(numaNodes) } -func configureTopologyManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, policy, scope string, configMap *v1.ConfigMap, numaNodes int) (*kubeletconfig.KubeletConfiguration, string) { +func configureTopologyManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, policy, scope string, topologyOptions map[string]string, configMap *v1.ConfigMap, numaNodes int) (*kubeletconfig.KubeletConfiguration, string) { // Configure Topology Manager in Kubelet with policy. newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { newCfg.FeatureGates = make(map[string]bool) } + if topologyOptions != nil { + newCfg.TopologyManagerPolicyOptions = topologyOptions + } + // Set the Topology Manager policy newCfg.TopologyManagerPolicy = policy @@ -858,7 +862,7 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework } } -func runTopologyManagerTests(f *framework.Framework) { +func runTopologyManagerTests(f *framework.Framework, topologyOptions map[string]string) { var oldCfg *kubeletconfig.KubeletConfiguration var err error @@ -879,7 +883,7 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, 0) + newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, topologyOptions, nil, 0) updateKubeletConfig(ctx, f, newCfg, true) // Run the tests runTopologyManagerPolicySuiteTests(ctx, f) @@ -903,7 +907,7 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, configMap, numaNodes) + newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, topologyOptions, configMap, numaNodes) updateKubeletConfig(ctx, f, newCfg, true) runTopologyManagerNodeAlignmentSuiteTests(ctx, f, sd, reservedSystemCPUs, policy, numaNodes, coreCount) @@ -921,7 +925,7 @@ func runTopologyManagerTests(f *framework.Framework) { policy := topologymanager.PolicySingleNumaNode scope := podScopeTopology - newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, configMap, numaNodes) + newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, topologyOptions, configMap, numaNodes) updateKubeletConfig(ctx, f, newCfg, true) runTMScopeResourceAlignmentTestSuite(ctx, f, configMap, reservedSystemCPUs, policy, numaNodes, coreCount) @@ -960,6 +964,13 @@ var _ = SIGDescribe("Topology Manager", framework.WithSerial(), feature.Topology f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged ginkgo.Context("With kubeconfig updated to static CPU Manager policy run the Topology Manager tests", func() { - runTopologyManagerTests(f) + runTopologyManagerTests(f, nil) + }) + ginkgo.Context("With kubeconfig's topologyOptions updated to static CPU Manager policy run the Topology Manager tests", ginkgo.Label("MaxAllowableNUMANodes"), func() { + // At present, the default value of defaultMaxAllowableNUMANode is 8, we run + // the same tests with 2 * defaultMaxAllowableNUMANode(16). This is the + // most basic verification that the changed setting is not breaking stuff. + doubleDefaultMaxAllowableNUMANodes := strconv.Itoa(8 * 2) + runTopologyManagerTests(f, map[string]string{topologymanager.MaxAllowableNUMANodes: doubleDefaultMaxAllowableNUMANodes}) }) })