diff --git a/pkg/kubelet/cm/cpumanager/BUILD b/pkg/kubelet/cm/cpumanager/BUILD index 93ba44e4f85..ccbaef80690 100644 --- a/pkg/kubelet/cm/cpumanager/BUILD +++ b/pkg/kubelet/cm/cpumanager/BUILD @@ -10,7 +10,6 @@ go_library( "policy.go", "policy_none.go", "policy_static.go", - "topology_hints.go", ], importpath = "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager", visibility = ["//visibility:public"], diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index 877b71dd353..1cc3cff652f 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -66,9 +66,10 @@ type Manager interface { // State returns a read-only interface to the internal CPU manager state. State() state.Reader - // GetTopologyHints implements the Topology Manager Interface and is - // consulted to make Topology aware resource alignments - GetTopologyHints(pod v1.Pod, container v1.Container) map[string][]topologymanager.TopologyHint + // GetTopologyHints implements the topologymanager.HintProvider Interface + // and is consulted to achieve NUMA aware resource alignment among this + // and other resource controllers. + GetTopologyHints(v1.Pod, v1.Container) map[string][]topologymanager.TopologyHint } type manager struct { @@ -216,6 +217,11 @@ func (m *manager) State() state.Reader { return m.state } +func (m *manager) GetTopologyHints(pod v1.Pod, container v1.Container) map[string][]topologymanager.TopologyHint { + // Delegate to active policy + return m.policy.GetTopologyHints(m.state, pod, container) +} + type reconciledContainer struct { podName string containerName string diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index 09347718475..43fa26657ca 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -103,6 +103,10 @@ func (p *mockPolicy) RemoveContainer(s state.State, containerID string) error { return p.err } +func (p *mockPolicy) GetTopologyHints(s state.State, pod v1.Pod, container v1.Container) map[string][]topologymanager.TopologyHint { + return nil +} + type mockRuntimeService struct { err error } diff --git a/pkg/kubelet/cm/cpumanager/policy.go b/pkg/kubelet/cm/cpumanager/policy.go index c79091659e3..83b5d07eedc 100644 --- a/pkg/kubelet/cm/cpumanager/policy.go +++ b/pkg/kubelet/cm/cpumanager/policy.go @@ -19,6 +19,7 @@ package cpumanager import ( "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" ) // Policy implements logic for pod container to CPU assignment. @@ -29,4 +30,8 @@ type Policy interface { AddContainer(s state.State, pod *v1.Pod, container *v1.Container, containerID string) error // RemoveContainer call is idempotent RemoveContainer(s state.State, containerID string) error + // GetTopologyHints implements the topologymanager.HintProvider Interface + // and is consulted to achieve NUMA aware resource alignment among this + // and other resource controllers. + GetTopologyHints(s state.State, pod v1.Pod, container v1.Container) map[string][]topologymanager.TopologyHint } diff --git a/pkg/kubelet/cm/cpumanager/policy_none.go b/pkg/kubelet/cm/cpumanager/policy_none.go index 294edc6bf31..fd56d08f89a 100644 --- a/pkg/kubelet/cm/cpumanager/policy_none.go +++ b/pkg/kubelet/cm/cpumanager/policy_none.go @@ -20,6 +20,7 @@ import ( "k8s.io/api/core/v1" "k8s.io/klog" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" ) type nonePolicy struct{} @@ -49,3 +50,7 @@ func (p *nonePolicy) AddContainer(s state.State, pod *v1.Pod, container *v1.Cont func (p *nonePolicy) RemoveContainer(s state.State, containerID string) error { return nil } + +func (p *nonePolicy) GetTopologyHints(s state.State, pod v1.Pod, container v1.Container) map[string][]topologymanager.TopologyHint { + return nil +} diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 241f14463a8..e37c6114022 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -301,3 +301,99 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int // https://golang.org/ref/spec#Numeric_types return int(cpuQuantity.Value()) } + +func (p *staticPolicy) GetTopologyHints(s state.State, pod v1.Pod, container v1.Container) map[string][]topologymanager.TopologyHint { + // If there are no CPU resources requested for this container, we do not + // generate any topology hints. + if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok { + return nil + } + + // Get a count of how many guaranteed CPUs have been requested. + requested := p.guaranteedCPUs(&pod, &container) + + // If there are no guaranteed CPUs being requested, we do not generate + // any topology hints. This can happen, for example, because init + // containers don't have to have guaranteed CPUs in order for the pod + // to still be in the Guaranteed QOS tier. + if requested == 0 { + return nil + } + + // Get a list of available CPUs. + available := p.assignableCPUs(s) + + // Generate hints. + cpuHints := p.generateCPUTopologyHints(available, requested) + klog.Infof("[cpumanager] TopologyHints generated for pod '%v', container '%v': %v", pod.Name, container.Name, cpuHints) + + return map[string][]topologymanager.TopologyHint{ + string(v1.ResourceCPU): cpuHints, + } +} + +// generateCPUtopologyHints generates a set of TopologyHints given the set of +// available CPUs and the number of CPUs being requested. +// +// It follows the convention of marking all hints that have the same number of +// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and +// marking all others with 'Preferred: false'. +func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint { + // Initialize minAffinitySize to include all NUMA Nodes. + minAffinitySize := p.topology.CPUDetails.NUMANodes().Size() + // Initialize minSocketsOnMinAffinity to include all Sockets. + minSocketsOnMinAffinity := p.topology.CPUDetails.Sockets().Size() + + // Iterate through all combinations of socket bitmask and build hints from them. + hints := []topologymanager.TopologyHint{} + bitmask.IterateBitMasks(p.topology.CPUDetails.NUMANodes().ToSlice(), func(mask bitmask.BitMask) { + // First, update minAffinitySize and minSocketsOnMinAffinity for the + // current request size. + cpusInMask := p.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() + socketsInMask := p.topology.CPUDetails.SocketsInNUMANodes(mask.GetBits()...).Size() + if cpusInMask >= request && mask.Count() < minAffinitySize { + minAffinitySize = mask.Count() + if socketsInMask < minSocketsOnMinAffinity { + minSocketsOnMinAffinity = socketsInMask + } + } + + // Then check to see if we have enough CPUs available on the current + // socket bitmask to satisfy the CPU request. + numMatching := 0 + for _, c := range availableCPUs.ToSlice() { + if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) { + numMatching++ + } + } + + // If we don't, then move onto the next combination. + if numMatching < request { + return + } + + // Otherwise, create a new hint from the socket bitmask and add it to the + // list of hints. We set all hint preferences to 'false' on the first + // pass through. + hints = append(hints, topologymanager.TopologyHint{ + NUMANodeAffinity: mask, + Preferred: false, + }) + }) + + // Loop back through all hints and update the 'Preferred' field based on + // counting the number of bits sets in the affinity mask and comparing it + // to the minAffinitySize. Only those with an equal number of bits set (and + // with a minimal set of sockets) will be considered preferred. + for i := range hints { + if hints[i].NUMANodeAffinity.Count() == minAffinitySize { + nodes := hints[i].NUMANodeAffinity.GetBits() + numSockets := p.topology.CPUDetails.SocketsInNUMANodes(nodes...).Size() + if numSockets == minSocketsOnMinAffinity { + hints[i].Preferred = true + } + } + } + + return hints +} diff --git a/pkg/kubelet/cm/cpumanager/topology_hints.go b/pkg/kubelet/cm/cpumanager/topology_hints.go deleted file mode 100644 index 1648b7678d3..00000000000 --- a/pkg/kubelet/cm/cpumanager/topology_hints.go +++ /dev/null @@ -1,131 +0,0 @@ -/* -Copyright 2019 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package cpumanager - -import ( - "k8s.io/api/core/v1" - "k8s.io/klog" - - "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" - "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" - "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" -) - -func (m *manager) GetTopologyHints(pod v1.Pod, container v1.Container) map[string][]topologymanager.TopologyHint { - // The 'none' policy does not generate topology hints. - if m.policy.Name() == string(PolicyNone) { - return nil - } - - // For all other policies, if there are no CPU resources requested for this - // container, we do not generate any topology hints. - if _, ok := container.Resources.Requests[v1.ResourceCPU]; !ok { - return nil - } - - // Otherwise, attempt to generate TopologyHints for the CPUManager. - // For now, this implementation assumes the 'static' CPUManager policy. - // TODO: Generalize this so that its applicable to future CPUManager polices. - - // Get a count of how many guaranteed CPUs have been requested. - requested := m.policy.(*staticPolicy).guaranteedCPUs(&pod, &container) - - // If there are no guaranteed CPUs being requested, we do not generate - // any topology hints. This can happen, for example, because init - // containers don't have to have guaranteed CPUs in order for the pod - // to still be in the Guaranteed QOS tier. - if requested == 0 { - return nil - } - - // Get a list of available CPUs. - available := m.policy.(*staticPolicy).assignableCPUs(m.state) - - // Generate hints. - cpuHints := m.generateCPUTopologyHints(available, requested) - klog.Infof("[cpumanager] TopologyHints generated for pod '%v', container '%v': %v", pod.Name, container.Name, cpuHints) - - return map[string][]topologymanager.TopologyHint{ - string(v1.ResourceCPU): cpuHints, - } -} - -// generateCPUtopologyHints generates a set of TopologyHints given the set of -// available CPUs and the number of CPUs being requested. -// -// It follows the convention of marking all hints that have the same number of -// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and -// marking all others with 'Preferred: false'. -func (m *manager) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint { - // Initialize minAffinitySize to include all NUMA Nodes. - minAffinitySize := m.topology.CPUDetails.NUMANodes().Size() - // Initialize minSocketsOnMinAffinity to include all Sockets. - minSocketsOnMinAffinity := m.topology.CPUDetails.Sockets().Size() - - // Iterate through all combinations of bitmasks and build hints from them. - hints := []topologymanager.TopologyHint{} - bitmask.IterateBitMasks(m.topology.CPUDetails.NUMANodes().ToSlice(), func(mask bitmask.BitMask) { - // First, update minAffinitySize and minSocketsOnMinAffinity for the - // current request size. - cpusInMask := m.topology.CPUDetails.CPUsInNUMANodes(mask.GetBits()...).Size() - socketsInMask := m.topology.CPUDetails.SocketsInNUMANodes(mask.GetBits()...).Size() - if cpusInMask >= request && mask.Count() < minAffinitySize { - minAffinitySize = mask.Count() - if socketsInMask < minSocketsOnMinAffinity { - minSocketsOnMinAffinity = socketsInMask - } - } - - // Then check to see if we have enough CPUs available on the current - // Socket bitmask to satisfy the CPU request. - numMatching := 0 - for _, c := range availableCPUs.ToSlice() { - if mask.IsSet(m.topology.CPUDetails[c].NUMANodeID) { - numMatching++ - } - } - - // If we don't, then move onto the next combination. - if numMatching < request { - return - } - - // Otherwise, create a new hint from the socket bitmask and add it to the - // list of hints. We set all hint preferences to 'false' on the first - // pass through. - hints = append(hints, topologymanager.TopologyHint{ - NUMANodeAffinity: mask, - Preferred: false, - }) - }) - - // Loop back through all hints and update the 'Preferred' field based on - // counting the number of bits sets in the affinity mask and comparing it - // to the minAffinitySize. Only those with an equal number of bits set (and - // with a minimal set of sockets) will be considered preferred. - for i := range hints { - if hints[i].NUMANodeAffinity.Count() == minAffinitySize { - nodes := hints[i].NUMANodeAffinity.GetBits() - numSockets := m.topology.CPUDetails.SocketsInNUMANodes(nodes...).Size() - if numSockets == minSocketsOnMinAffinity { - hints[i].Preferred = true - } - } - } - - return hints -} diff --git a/pkg/kubelet/cm/topologymanager/topology_manager.go b/pkg/kubelet/cm/topologymanager/topology_manager.go index 5c25a183b92..f8e7c662f23 100644 --- a/pkg/kubelet/cm/topologymanager/topology_manager.go +++ b/pkg/kubelet/cm/topologymanager/topology_manager.go @@ -67,8 +67,17 @@ type manager struct { numaNodes []int } -//HintProvider interface is to be implemented by Hint Providers +// HintProvider is an interface for components that want to collaborate to +// achieve globally optimal concrete resource alignment with respect to +// NUMA locality. type HintProvider interface { + // GetTopologyHints returns a map of resource names to a list of possible + // concrete resource allocations in terms of NUMA locality hints. Each hint + // is optionally marked "preferred" and indicates the set of NUMA nodes + // involved in the hypothetical allocation. The topology manager calls + // this function for each hint provider, and merges the hints to produce + // a consensus "best" hint. The hint providers may subsequently query the + // topology manager to influence actual resource assignment. GetTopologyHints(pod v1.Pod, container v1.Container) map[string][]TopologyHint }