diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index 2a59ffce09b..1b60f2cea45 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -518,7 +518,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig fs.StringVar(&c.CPUManagerPolicy, "cpu-manager-policy", c.CPUManagerPolicy, "CPU Manager policy to use. Possible values: 'none', 'static'. Default: 'none'") fs.DurationVar(&c.CPUManagerReconcilePeriod.Duration, "cpu-manager-reconcile-period", c.CPUManagerReconcilePeriod.Duration, " CPU Manager reconciliation period. Examples: '10s', or '1m'. If not supplied, defaults to `NodeStatusUpdateFrequency`") fs.Var(cliflag.NewMapStringString(&c.QOSReserved), "qos-reserved", " A set of ResourceName=Percentage (e.g. memory=50%) pairs that describe how pod resource requests are reserved at the QoS level. Currently only memory is supported. Requires the QOSReserved feature gate to be enabled.") - fs.StringVar(&c.TopologyManagerPolicy, "topology-manager-policy", c.TopologyManagerPolicy, "Topology Manager policy to use. Possible values: 'none', 'best-effort', 'restricted'.") + fs.StringVar(&c.TopologyManagerPolicy, "topology-manager-policy", c.TopologyManagerPolicy, "Topology Manager policy to use. Possible values: 'none', 'best-effort', 'restricted', 'single-numa-node'.") fs.DurationVar(&c.RuntimeRequestTimeout.Duration, "runtime-request-timeout", c.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later.") fs.StringVar(&c.HairpinMode, "hairpin-mode", c.HairpinMode, "How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are \"promiscuous-bridge\", \"hairpin-veth\" and \"none\".") fs.Int32Var(&c.MaxPods, "max-pods", c.MaxPods, "Number of Pods that can run on this Kubelet.") diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index 54930f33cf2..446ad78326c 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -55,7 +55,7 @@ const ( // watches to observe changes to objects that are in its interest. WatchChangeDetectionStrategy ResourceChangeDetectionStrategy = "Watch" // RestrictedTopologyManagerPolicy is a mode in which kubelet only allows - // pods with a single NUMA alignment of CPU and device resources. + // pods with optimal NUMA node alignment for requested resources RestrictedTopologyManagerPolicy = "restricted" // BestEffortTopologyManagerPolicy is a mode in which kubelet will favour // pods with NUMA alignment of CPU and device resources. @@ -63,6 +63,9 @@ const ( // NoneTopologyManager Policy is a mode in which kubelet has no knowledge // of NUMA alignment of a pod's CPU and device resources. NoneTopologyManagerPolicy = "none" + // SingleNumaNodeTopologyManager Policy iis a mode in which kubelet only allows + // pods with a single NUMA alignment of CPU and device resources. + SingleNumaNodeTopologyManager = "single-numa-node" ) // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/kubelet/cm/topologymanager/BUILD b/pkg/kubelet/cm/topologymanager/BUILD index b34a6ea24f9..d69ba9ba597 100644 --- a/pkg/kubelet/cm/topologymanager/BUILD +++ b/pkg/kubelet/cm/topologymanager/BUILD @@ -8,6 +8,7 @@ go_library( "policy_best_effort.go", "policy_none.go", "policy_restricted.go", + "policy_single_numa_node.go", "topology_manager.go", ], importpath = "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager", @@ -45,6 +46,7 @@ go_test( "policy_best_effort_test.go", "policy_none_test.go", "policy_restricted_test.go", + "policy_single_numa_node_test.go", "topology_manager_test.go", ], embed = [":go_default_library"], diff --git a/pkg/kubelet/cm/topologymanager/policy.go b/pkg/kubelet/cm/topologymanager/policy.go index ac5bfa443e7..7ffa0a79f10 100644 --- a/pkg/kubelet/cm/topologymanager/policy.go +++ b/pkg/kubelet/cm/topologymanager/policy.go @@ -25,5 +25,5 @@ type Policy interface { //Returns Policy Name Name() string //Returns Pod Admit Handler Response based on hints and policy type - CanAdmitPodResult(admit bool) lifecycle.PodAdmitResult + CanAdmitPodResult(hint *TopologyHint) lifecycle.PodAdmitResult } diff --git a/pkg/kubelet/cm/topologymanager/policy_best_effort.go b/pkg/kubelet/cm/topologymanager/policy_best_effort.go index 0dc7e83aa09..b5f81b2fdb4 100644 --- a/pkg/kubelet/cm/topologymanager/policy_best_effort.go +++ b/pkg/kubelet/cm/topologymanager/policy_best_effort.go @@ -36,7 +36,7 @@ func (p *bestEffortPolicy) Name() string { return PolicyBestEffort } -func (p *bestEffortPolicy) CanAdmitPodResult(admit bool) lifecycle.PodAdmitResult { +func (p *bestEffortPolicy) CanAdmitPodResult(hint *TopologyHint) lifecycle.PodAdmitResult { return lifecycle.PodAdmitResult{ Admit: true, } diff --git a/pkg/kubelet/cm/topologymanager/policy_best_effort_test.go b/pkg/kubelet/cm/topologymanager/policy_best_effort_test.go index 0de12121d1f..2fea3df9920 100644 --- a/pkg/kubelet/cm/topologymanager/policy_best_effort_test.go +++ b/pkg/kubelet/cm/topologymanager/policy_best_effort_test.go @@ -23,25 +23,24 @@ import ( func TestPolicyBestEffortCanAdmitPodResult(t *testing.T) { tcases := []struct { name string - admit bool + hint TopologyHint expected bool }{ { - name: "Affinity is set to false in topology hints", - admit: false, + name: "Preferred is set to false in topology hints", + hint: TopologyHint{nil, false}, expected: true, }, { - name: "Affinity is set to true in topology hints", - admit: true, + name: "Preferred is set to true in topology hints", + hint: TopologyHint{nil, true}, expected: true, }, } for _, tc := range tcases { policy := NewBestEffortPolicy() - admit := tc.admit - result := policy.CanAdmitPodResult(admit) + result := policy.CanAdmitPodResult(&tc.hint) if result.Admit != tc.expected { t.Errorf("Expected Admit field in result to be %t, got %t", tc.expected, result.Admit) diff --git a/pkg/kubelet/cm/topologymanager/policy_none.go b/pkg/kubelet/cm/topologymanager/policy_none.go index ae46bc37a6a..aacdf19aa10 100644 --- a/pkg/kubelet/cm/topologymanager/policy_none.go +++ b/pkg/kubelet/cm/topologymanager/policy_none.go @@ -36,7 +36,7 @@ func (p *nonePolicy) Name() string { return PolicyNone } -func (p *nonePolicy) CanAdmitPodResult(admit bool) lifecycle.PodAdmitResult { +func (p *nonePolicy) CanAdmitPodResult(hint *TopologyHint) lifecycle.PodAdmitResult { return lifecycle.PodAdmitResult{ Admit: true, } diff --git a/pkg/kubelet/cm/topologymanager/policy_none_test.go b/pkg/kubelet/cm/topologymanager/policy_none_test.go index c45b1388bb4..76f46e490b4 100644 --- a/pkg/kubelet/cm/topologymanager/policy_none_test.go +++ b/pkg/kubelet/cm/topologymanager/policy_none_test.go @@ -41,25 +41,24 @@ func TestName(t *testing.T) { func TestPolicyNoneCanAdmitPodResult(t *testing.T) { tcases := []struct { name string - admit bool + hint TopologyHint expected bool }{ { - name: "Affinity is set to false in topology hints", - admit: false, + name: "Preferred is set to false in topology hints", + hint: TopologyHint{nil, false}, expected: true, }, { - name: "Affinity is set to true in topology hints", - admit: true, + name: "Preferred is set to true in topology hints", + hint: TopologyHint{nil, true}, expected: true, }, } for _, tc := range tcases { policy := NewNonePolicy() - admit := tc.admit - result := policy.CanAdmitPodResult(admit) + result := policy.CanAdmitPodResult(&tc.hint) if result.Admit != tc.expected { t.Errorf("Expected Admit field in result to be %t, got %t", tc.expected, result.Admit) diff --git a/pkg/kubelet/cm/topologymanager/policy_restricted.go b/pkg/kubelet/cm/topologymanager/policy_restricted.go index 63766ba7db9..7993675ce35 100644 --- a/pkg/kubelet/cm/topologymanager/policy_restricted.go +++ b/pkg/kubelet/cm/topologymanager/policy_restricted.go @@ -36,8 +36,8 @@ func (p *restrictedPolicy) Name() string { return PolicyRestricted } -func (p *restrictedPolicy) CanAdmitPodResult(admit bool) lifecycle.PodAdmitResult { - if !admit { +func (p *restrictedPolicy) CanAdmitPodResult(hint *TopologyHint) lifecycle.PodAdmitResult { + if !hint.Preferred { return lifecycle.PodAdmitResult{ Admit: false, Reason: "Topology Affinity Error", diff --git a/pkg/kubelet/cm/topologymanager/policy_restricted_test.go b/pkg/kubelet/cm/topologymanager/policy_restricted_test.go index 66c4c806a2d..d011971632a 100644 --- a/pkg/kubelet/cm/topologymanager/policy_restricted_test.go +++ b/pkg/kubelet/cm/topologymanager/policy_restricted_test.go @@ -23,25 +23,24 @@ import ( func TestPolicyRestrictedCanAdmitPodResult(t *testing.T) { tcases := []struct { name string - admit bool + hint TopologyHint expected bool }{ { - name: "Affinity is set to false in topology hints", - admit: false, + name: "Preferred is set to false in topology hints", + hint: TopologyHint{nil, false}, expected: false, }, { - name: "Affinity is set to true in topology hints", - admit: true, + name: "Preferred is set to true in topology hints", + hint: TopologyHint{nil, true}, expected: true, }, } for _, tc := range tcases { policy := NewRestrictedPolicy() - admit := tc.admit - result := policy.CanAdmitPodResult(admit) + result := policy.CanAdmitPodResult(&tc.hint) if result.Admit != tc.expected { t.Errorf("Expected Admit field in result to be %t, got %t", tc.expected, result.Admit) diff --git a/pkg/kubelet/cm/topologymanager/policy_single_numa_node.go b/pkg/kubelet/cm/topologymanager/policy_single_numa_node.go new file mode 100644 index 00000000000..1d672a99cb2 --- /dev/null +++ b/pkg/kubelet/cm/topologymanager/policy_single_numa_node.go @@ -0,0 +1,50 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package topologymanager + +import ( + "k8s.io/kubernetes/pkg/kubelet/lifecycle" +) + +type singleNumaNodePolicy struct{} + +var _ Policy = &singleNumaNodePolicy{} + +// PolicySingleNumaNode policy name. +const PolicySingleNumaNode string = "single-numa-node" + +// NewSingleNumaNodePolicy returns single-numa-node policy. +func NewSingleNumaNodePolicy() Policy { + return &singleNumaNodePolicy{} +} + +func (p *singleNumaNodePolicy) Name() string { + return PolicySingleNumaNode +} + +func (p *singleNumaNodePolicy) CanAdmitPodResult(hint *TopologyHint) lifecycle.PodAdmitResult { + if !hint.Preferred || hint.NUMANodeAffinity.Count() > 1 { + return lifecycle.PodAdmitResult{ + Admit: false, + Reason: "Topology Affinity Error", + Message: "Resources cannot be allocated with Topology Locality", + } + } + return lifecycle.PodAdmitResult{ + Admit: true, + } +} diff --git a/pkg/kubelet/cm/topologymanager/policy_single_numa_node_test.go b/pkg/kubelet/cm/topologymanager/policy_single_numa_node_test.go new file mode 100644 index 00000000000..88cec2977b0 --- /dev/null +++ b/pkg/kubelet/cm/topologymanager/policy_single_numa_node_test.go @@ -0,0 +1,63 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package topologymanager + +import ( + "testing" +) + +func TestPolicySingleNumaNodeCanAdmitPodResult(t *testing.T) { + tcases := []struct { + name string + hint TopologyHint + expected bool + }{ + { + name: "Preferred is set to false in topology hints", + hint: TopologyHint{nil, false}, + expected: false, + }, + { + name: "NUMANodeAffinity has multiple NUMA Nodes masked in topology hints", + hint: TopologyHint{NewTestSocketMask(0, 1), true}, + expected: false, + }, + { + name: "NUMANodeAffinity has one NUMA Node masked in topology hints", + hint: TopologyHint{NewTestSocketMask(0), true}, + expected: true, + }, + } + + for _, tc := range tcases { + policy := NewSingleNumaNodePolicy() + result := policy.CanAdmitPodResult(&tc.hint) + + if result.Admit != tc.expected { + t.Errorf("Expected Admit field in result to be %t, got %t", tc.expected, result.Admit) + } + + if tc.expected == false { + if len(result.Reason) == 0 { + t.Errorf("Expected Reason field to be not empty") + } + if len(result.Message) == 0 { + t.Errorf("Expected Message field to be not empty") + } + } + } +} diff --git a/pkg/kubelet/cm/topologymanager/topology_manager.go b/pkg/kubelet/cm/topologymanager/topology_manager.go index e912916c5ea..ab54b9be4ce 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager.go @@ -103,6 +103,9 @@ func NewManager(numaNodeInfo cputopology.NUMANodeInfo, topologyPolicyName string case PolicyRestricted: policy = NewRestrictedPolicy() + case PolicySingleNumaNode: + policy = NewSingleNumaNodePolicy() + default: return nil, fmt.Errorf("unknown policy: \"%s\"", topologyPolicyName) } @@ -228,6 +231,11 @@ func (m *manager) calculateAffinity(pod v1.Pod, container v1.Container) Topology if !hint.Preferred { preferred = false } + // Special case PolicySingleNumaNode to only prefer hints where + // all providers have a single NUMA affinity set. + if m.policy != nil && m.policy.Name() == PolicySingleNumaNode && hint.NUMANodeAffinity.Count() > 1 { + preferred = false + } numaAffinities = append(numaAffinities, hint.NUMANodeAffinity) } } @@ -308,7 +316,7 @@ func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitR if pod.Status.QOSClass == v1.PodQOSGuaranteed { for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) { result := m.calculateAffinity(*pod, container) - admitPod := m.policy.CanAdmitPodResult(result.Preferred) + admitPod := m.policy.CanAdmitPodResult(&result) if !admitPod.Admit { return admitPod } diff --git a/pkg/kubelet/cm/topologymanager/topology_manager_test.go b/pkg/kubelet/cm/topologymanager/topology_manager_test.go index cb9127591ba..83e1fd49edd 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager_test.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager_test.go @@ -111,6 +111,7 @@ func TestCalculateAffinity(t *testing.T) { name string hp []HintProvider expected TopologyHint + policy Policy }{ { name: "TopologyHint not set", @@ -655,10 +656,45 @@ func TestCalculateAffinity(t *testing.T) { Preferred: true, }, }, + { + name: "Special cased PolicySingleNumaNode for single NUMA hint generation", + policy: NewSingleNumaNodePolicy(), + hp: []HintProvider{ + &mockHintProvider{ + map[string][]TopologyHint{ + "resource1": { + { + NUMANodeAffinity: NewTestSocketMask(0, 1), + Preferred: true, + }, + }, + "resource2": { + { + NUMANodeAffinity: NewTestSocketMask(0), + Preferred: true, + }, + { + NUMANodeAffinity: NewTestSocketMask(1), + Preferred: true, + }, + { + NUMANodeAffinity: NewTestSocketMask(0, 1), + Preferred: false, + }, + }, + }, + }, + }, + expected: TopologyHint{ + NUMANodeAffinity: NewTestSocketMask(0), + Preferred: false, + }, + }, } for _, tc := range tcases { mngr := manager{ + policy: tc.policy, hintProviders: tc.hp, numaNodes: numaNodes, } diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index e52426234ea..b2aeb87cc7a 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -55,7 +55,7 @@ const ( // watches to observe changes to objects that are in its interest. WatchChangeDetectionStrategy ResourceChangeDetectionStrategy = "Watch" // RestrictedTopologyManagerPolicy is a mode in which kubelet only allows - // pods with a single NUMA alignment of CPU and device resources. + // pods with optimal NUMA node alignment for requested resources RestrictedTopologyManagerPolicy = "restricted" // BestEffortTopologyManagerPolicy is a mode in which kubelet will favour // pods with NUMA alignment of CPU and device resources. @@ -63,6 +63,9 @@ const ( // NoneTopologyManager Policy is a mode in which kubelet has no knowledge // of NUMA alignment of a pod's CPU and device resources. NoneTopologyManagerPolicy = "none" + // SingleNumaNodeTopologyManager Policy iis a mode in which kubelet only allows + // pods with a single NUMA alignment of CPU and device resources. + SingleNumaNodeTopologyManager = "single-numa-node" ) // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object