mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 17:30:00 +00:00
Merge pull request #126750 from AMDEPYC/uncore_v1
Split L3 Cache Topology Awareness in CPU Manager
This commit is contained in:
commit
50d0f920c0
@ -118,6 +118,17 @@ func (n *numaFirst) takeFullSecondLevel() {
|
||||
n.acc.takeFullSockets()
|
||||
}
|
||||
|
||||
// Sort the UncoreCaches within the NUMA nodes.
|
||||
func (a *cpuAccumulator) sortAvailableUncoreCaches() []int {
|
||||
var result []int
|
||||
for _, numa := range a.sortAvailableNUMANodes() {
|
||||
uncore := a.details.UncoreInNUMANodes(numa).UnsortedList()
|
||||
a.sort(uncore, a.details.CPUsInUncoreCaches)
|
||||
result = append(result, uncore...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// If NUMA nodes are higher in the memory hierarchy than sockets, then just
|
||||
// sort the NUMA nodes directly, and return them.
|
||||
func (n *numaFirst) sortAvailableNUMANodes() []int {
|
||||
@ -318,6 +329,12 @@ func (a *cpuAccumulator) isSocketFree(socketID int) bool {
|
||||
return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket()
|
||||
}
|
||||
|
||||
// Returns true if the supplied UnCoreCache is fully available,
|
||||
// "fully available" means that all the CPUs in it are free.
|
||||
func (a *cpuAccumulator) isUncoreCacheFree(uncoreID int) bool {
|
||||
return a.details.CPUsInUncoreCaches(uncoreID).Size() == a.topo.CPUDetails.CPUsInUncoreCaches(uncoreID).Size()
|
||||
}
|
||||
|
||||
// Returns true if the supplied core is fully available in `a.details`.
|
||||
// "fully available" means that all the CPUs in it are free.
|
||||
func (a *cpuAccumulator) isCoreFree(coreID int) bool {
|
||||
@ -346,6 +363,17 @@ func (a *cpuAccumulator) freeSockets() []int {
|
||||
return free
|
||||
}
|
||||
|
||||
// Returns free UncoreCache IDs as a slice sorted by sortAvailableUnCoreCache().
|
||||
func (a *cpuAccumulator) freeUncoreCache() []int {
|
||||
free := []int{}
|
||||
for _, uncore := range a.sortAvailableUncoreCaches() {
|
||||
if a.isUncoreCacheFree(uncore) {
|
||||
free = append(free, uncore)
|
||||
}
|
||||
}
|
||||
return free
|
||||
}
|
||||
|
||||
// Returns free core IDs as a slice sorted by sortAvailableCores().
|
||||
func (a *cpuAccumulator) freeCores() []int {
|
||||
free := []int{}
|
||||
@ -519,6 +547,62 @@ func (a *cpuAccumulator) takeFullSockets() {
|
||||
}
|
||||
}
|
||||
|
||||
func (a *cpuAccumulator) takeFullUncore() {
|
||||
for _, uncore := range a.freeUncoreCache() {
|
||||
cpusInUncore := a.topo.CPUDetails.CPUsInUncoreCaches(uncore)
|
||||
if !a.needsAtLeast(cpusInUncore.Size()) {
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("takeFullUncore: claiming uncore", "uncore", uncore)
|
||||
a.take(cpusInUncore)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *cpuAccumulator) takePartialUncore(uncoreID int) {
|
||||
numCoresNeeded := a.numCPUsNeeded / a.topo.CPUsPerCore()
|
||||
|
||||
// determine the N number of free cores (physical cpus) within the UncoreCache, then
|
||||
// determine the M number of free cpus (virtual cpus) that correspond with the free cores
|
||||
freeCores := a.details.CoresNeededInUncoreCache(numCoresNeeded, uncoreID)
|
||||
freeCPUs := a.details.CPUsInCores(freeCores.UnsortedList()...)
|
||||
|
||||
// claim the cpus if the free cpus within the UncoreCache can satisfy the needed cpus
|
||||
claimed := (a.numCPUsNeeded == freeCPUs.Size())
|
||||
klog.V(4).InfoS("takePartialUncore: trying to claim partial uncore",
|
||||
"uncore", uncoreID,
|
||||
"claimed", claimed,
|
||||
"needed", a.numCPUsNeeded,
|
||||
"cores", freeCores.String(),
|
||||
"cpus", freeCPUs.String())
|
||||
if !claimed {
|
||||
return
|
||||
|
||||
}
|
||||
a.take(freeCPUs)
|
||||
}
|
||||
|
||||
// First try to take full UncoreCache, if available and need is at least the size of the UncoreCache group.
|
||||
// Second try to take the partial UncoreCache if available and the request size can fit w/in the UncoreCache.
|
||||
func (a *cpuAccumulator) takeUncoreCache() {
|
||||
numCPUsInUncore := a.topo.CPUsPerUncore()
|
||||
for _, uncore := range a.sortAvailableUncoreCaches() {
|
||||
// take full UncoreCache if the CPUs needed is greater than free UncoreCache size
|
||||
if a.needsAtLeast(numCPUsInUncore) {
|
||||
a.takeFullUncore()
|
||||
}
|
||||
|
||||
if a.isSatisfied() {
|
||||
return
|
||||
}
|
||||
|
||||
// take partial UncoreCache if the CPUs needed is less than free UncoreCache size
|
||||
a.takePartialUncore(uncore)
|
||||
if a.isSatisfied() {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *cpuAccumulator) takeFullCores() {
|
||||
for _, core := range a.freeCores() {
|
||||
cpusInCore := a.topo.CPUDetails.CPUsInCores(core)
|
||||
@ -637,6 +721,14 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC
|
||||
// or the remaining number of CPUs to take after having taken full sockets and NUMA nodes is less
|
||||
// than a whole NUMA node, the function tries to take whole physical cores (cores).
|
||||
//
|
||||
// If `PreferAlignByUncoreCache` is enabled, the function will try to optimally assign Uncorecaches.
|
||||
// If `numCPUs` is larger than or equal to the total number of CPUs in a Uncorecache, and there are
|
||||
// free (i.e. all CPUs within the Uncorecache are free) Uncorecaches, the function takes as many entire
|
||||
// cores from free Uncorecaches as possible. If/Once `numCPUs` is smaller than the total number of
|
||||
// CPUs in a free Uncorecache, the function scans each Uncorecache index in numerical order to assign
|
||||
// cores that will fit within the Uncorecache. If `numCPUs` cannot fit within any Uncorecache, the
|
||||
// function tries to take whole physical cores.
|
||||
//
|
||||
// If `numCPUs` is bigger than the total number of CPUs in a core, and there are
|
||||
// free (i.e. all CPUs in them are free) cores, the function takes as many entire free cores as possible.
|
||||
// The cores are taken from one socket at a time, and the sockets are considered by
|
||||
@ -658,7 +750,7 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC
|
||||
// the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending
|
||||
// order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with
|
||||
// the least amount of free CPUs to the one with the highest amount of free CPUs.
|
||||
func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) {
|
||||
func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool) (cpuset.CPUSet, error) {
|
||||
acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy)
|
||||
if acc.isSatisfied() {
|
||||
return acc.result, nil
|
||||
@ -681,7 +773,17 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
|
||||
return acc.result, nil
|
||||
}
|
||||
|
||||
// 2. Acquire whole cores, if available and the container requires at least
|
||||
// 2. If PreferAlignByUncoreCache is enabled, acquire whole UncoreCaches
|
||||
// if available and the container requires at least a UncoreCache's-worth
|
||||
// of CPUs. Otherwise, acquire CPUs from the least amount of UncoreCaches.
|
||||
if preferAlignByUncoreCache {
|
||||
acc.takeUncoreCache()
|
||||
if acc.isSatisfied() {
|
||||
return acc.result, nil
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Acquire whole cores, if available and the container requires at least
|
||||
// a core's-worth of CPUs.
|
||||
// If `CPUSortingStrategySpread` is specified, skip taking the whole core.
|
||||
if cpuSortingStrategy != CPUSortingStrategySpread {
|
||||
@ -691,7 +793,7 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Acquire single threads, preferring to fill partially-allocated cores
|
||||
// 4. Acquire single threads, preferring to fill partially-allocated cores
|
||||
// on the same sockets as the whole cores we have already taken in this
|
||||
// allocation.
|
||||
acc.takeRemainingCPUs()
|
||||
@ -769,8 +871,10 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
|
||||
// If the number of CPUs requested cannot be handed out in chunks of
|
||||
// 'cpuGroupSize', then we just call out the packing algorithm since we
|
||||
// can't distribute CPUs in this chunk size.
|
||||
// PreferAlignByUncoreCache feature not implemented here yet and set to false.
|
||||
// Support for PreferAlignByUncoreCache to be done at beta release.
|
||||
if (numCPUs % cpuGroupSize) != 0 {
|
||||
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy)
|
||||
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false)
|
||||
}
|
||||
|
||||
// Otherwise build an accumulator to start allocating CPUs from.
|
||||
@ -953,7 +1057,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
|
||||
// size 'cpuGroupSize' from 'bestCombo'.
|
||||
distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize
|
||||
for _, numa := range bestCombo {
|
||||
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy)
|
||||
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false)
|
||||
acc.take(cpus)
|
||||
}
|
||||
|
||||
@ -968,7 +1072,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
|
||||
if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize {
|
||||
continue
|
||||
}
|
||||
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy)
|
||||
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false)
|
||||
acc.take(cpus)
|
||||
remainder -= cpuGroupSize
|
||||
}
|
||||
@ -992,5 +1096,5 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
|
||||
|
||||
// If we never found a combination of NUMA nodes that we could properly
|
||||
// distribute CPUs across, fall back to the packing algorithm.
|
||||
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy)
|
||||
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false)
|
||||
}
|
||||
|
@ -668,6 +668,79 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) {
|
||||
"",
|
||||
mustParseCPUSet(t, "0-29,40-69,30,31,70,71"),
|
||||
},
|
||||
// Test cases for PreferAlignByUncoreCache
|
||||
{
|
||||
"take cpus from two full UncoreCaches and partial from a single UncoreCache",
|
||||
topoUncoreSingleSocketNoSMT,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
mustParseCPUSet(t, "1-15"),
|
||||
10,
|
||||
"",
|
||||
cpuset.New(1, 2, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
{
|
||||
"take one cpu from dual socket with HT - core from Socket 0",
|
||||
topoDualSocketHT,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||
1,
|
||||
"",
|
||||
cpuset.New(2),
|
||||
},
|
||||
{
|
||||
"take first available UncoreCache from first socket",
|
||||
topoUncoreDualSocketNoSMT,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
mustParseCPUSet(t, "0-15"),
|
||||
4,
|
||||
"",
|
||||
cpuset.New(0, 1, 2, 3),
|
||||
},
|
||||
{
|
||||
"take all available UncoreCache from first socket",
|
||||
topoUncoreDualSocketNoSMT,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
mustParseCPUSet(t, "2-15"),
|
||||
6,
|
||||
"",
|
||||
cpuset.New(2, 3, 4, 5, 6, 7),
|
||||
},
|
||||
{
|
||||
"take first available UncoreCache from second socket",
|
||||
topoUncoreDualSocketNoSMT,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
mustParseCPUSet(t, "8-15"),
|
||||
4,
|
||||
"",
|
||||
cpuset.New(8, 9, 10, 11),
|
||||
},
|
||||
{
|
||||
"take first available UncoreCache from available NUMA",
|
||||
topoUncoreSingleSocketMultiNuma,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
mustParseCPUSet(t, "3,4-8,12"),
|
||||
2,
|
||||
"",
|
||||
cpuset.New(4, 5),
|
||||
},
|
||||
{
|
||||
"take cpus from best available UncoreCache group of multi uncore cache single socket - SMT enabled",
|
||||
topoUncoreSingleSocketSMT,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
mustParseCPUSet(t, "2-3,10-11,4-7,12-15"),
|
||||
6,
|
||||
"",
|
||||
cpuset.New(4, 5, 6, 12, 13, 14),
|
||||
},
|
||||
{
|
||||
"take cpus from multiple UncoreCache of single socket - SMT enabled",
|
||||
topoUncoreSingleSocketSMT,
|
||||
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
|
||||
mustParseCPUSet(t, "1-7,9-15"),
|
||||
10,
|
||||
"",
|
||||
mustParseCPUSet(t, "4-7,12-15,1,9"),
|
||||
},
|
||||
}...)
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -677,7 +750,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) {
|
||||
strategy = CPUSortingStrategySpread
|
||||
}
|
||||
|
||||
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy)
|
||||
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption)
|
||||
if tc.expErr != "" && err != nil && err.Error() != tc.expErr {
|
||||
t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err)
|
||||
}
|
||||
@ -778,7 +851,7 @@ func TestTakeByTopologyWithSpreadPhysicalCPUsPreferredOption(t *testing.T) {
|
||||
if tc.opts.DistributeCPUsAcrossCores {
|
||||
strategy = CPUSortingStrategySpread
|
||||
}
|
||||
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy)
|
||||
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption)
|
||||
if tc.expErr != "" && err.Error() != tc.expErr {
|
||||
t.Errorf("testCase %q failed, expected error to be [%v] but it was [%v]", tc.description, tc.expErr, err)
|
||||
}
|
||||
|
@ -668,20 +668,24 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||
{
|
||||
Cores: []cadvisorapi.Core{
|
||||
{
|
||||
Id: 0,
|
||||
Threads: []int{0},
|
||||
Id: 0,
|
||||
Threads: []int{0},
|
||||
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
|
||||
},
|
||||
{
|
||||
Id: 1,
|
||||
Threads: []int{1},
|
||||
Id: 1,
|
||||
Threads: []int{1},
|
||||
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
|
||||
},
|
||||
{
|
||||
Id: 2,
|
||||
Threads: []int{2},
|
||||
Id: 2,
|
||||
Threads: []int{2},
|
||||
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
|
||||
},
|
||||
{
|
||||
Id: 3,
|
||||
Threads: []int{3},
|
||||
Id: 3,
|
||||
Threads: []int{3},
|
||||
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -34,6 +34,7 @@ const (
|
||||
AlignBySocketOption string = "align-by-socket"
|
||||
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
|
||||
StrictCPUReservationOption string = "strict-cpu-reservation"
|
||||
PreferAlignByUnCoreCacheOption string = "prefer-align-cpus-by-uncorecache"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -42,6 +43,7 @@ var (
|
||||
AlignBySocketOption,
|
||||
DistributeCPUsAcrossCoresOption,
|
||||
StrictCPUReservationOption,
|
||||
PreferAlignByUnCoreCacheOption,
|
||||
)
|
||||
betaOptions = sets.New[string](
|
||||
FullPCPUsOnlyOption,
|
||||
@ -90,6 +92,9 @@ type StaticPolicyOptions struct {
|
||||
DistributeCPUsAcrossCores bool
|
||||
// Flag to remove reserved cores from the list of available cores
|
||||
StrictCPUReservation bool
|
||||
// Flag that makes best-effort to align CPUs to a uncorecache boundary
|
||||
// As long as there are CPUs available, pods will be admitted if the condition is not met.
|
||||
PreferAlignByUncoreCacheOption bool
|
||||
}
|
||||
|
||||
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
|
||||
@ -131,6 +136,12 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.StrictCPUReservation = optValue
|
||||
case PreferAlignByUnCoreCacheOption:
|
||||
optValue, err := strconv.ParseBool(value)
|
||||
if err != nil {
|
||||
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
|
||||
}
|
||||
opts.PreferAlignByUncoreCacheOption = optValue
|
||||
default:
|
||||
// this should never be reached, we already detect unknown options,
|
||||
// but we keep it as further safety.
|
||||
@ -147,6 +158,14 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
|
||||
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", DistributeCPUsAcrossNUMAOption, DistributeCPUsAcrossCoresOption)
|
||||
}
|
||||
|
||||
if opts.PreferAlignByUncoreCacheOption && opts.DistributeCPUsAcrossCores {
|
||||
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", PreferAlignByUnCoreCacheOption, DistributeCPUsAcrossCoresOption)
|
||||
}
|
||||
|
||||
if opts.PreferAlignByUncoreCacheOption && opts.DistributeCPUsAcrossNUMA {
|
||||
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", PreferAlignByUnCoreCacheOption, DistributeCPUsAcrossNUMAOption)
|
||||
}
|
||||
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
|
@ -525,7 +525,8 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int)
|
||||
}
|
||||
return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy)
|
||||
}
|
||||
return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy)
|
||||
|
||||
return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
|
@ -38,9 +38,10 @@ var (
|
||||
}
|
||||
|
||||
topoDualSocketHT = &topology.CPUTopology{
|
||||
NumCPUs: 12,
|
||||
NumSockets: 2,
|
||||
NumCores: 6,
|
||||
NumCPUs: 12,
|
||||
NumSockets: 2,
|
||||
NumCores: 6,
|
||||
NumUncoreCache: 1,
|
||||
CPUDetails: map[int]topology.CPUInfo{
|
||||
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0},
|
||||
1: {CoreID: 1, SocketID: 1, NUMANodeID: 1},
|
||||
@ -57,6 +58,106 @@ var (
|
||||
},
|
||||
}
|
||||
|
||||
topoUncoreDualSocketNoSMT = &topology.CPUTopology{
|
||||
NumCPUs: 16,
|
||||
NumSockets: 2,
|
||||
NumCores: 16,
|
||||
NumUncoreCache: 4,
|
||||
CPUDetails: map[int]topology.CPUInfo{
|
||||
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
8: {CoreID: 8, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
9: {CoreID: 9, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
10: {CoreID: 10, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
11: {CoreID: 11, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
12: {CoreID: 12, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
13: {CoreID: 13, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
14: {CoreID: 14, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
15: {CoreID: 15, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
},
|
||||
}
|
||||
|
||||
topoUncoreSingleSocketMultiNuma = &topology.CPUTopology{
|
||||
NumCPUs: 16,
|
||||
NumSockets: 1,
|
||||
NumCores: 16,
|
||||
NumUncoreCache: 4,
|
||||
CPUDetails: map[int]topology.CPUInfo{
|
||||
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
8: {CoreID: 8, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
|
||||
9: {CoreID: 9, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
|
||||
10: {CoreID: 10, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
|
||||
11: {CoreID: 11, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
|
||||
12: {CoreID: 12, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
|
||||
13: {CoreID: 13, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
|
||||
14: {CoreID: 14, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
|
||||
15: {CoreID: 15, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
|
||||
},
|
||||
}
|
||||
|
||||
topoUncoreSingleSocketSMT = &topology.CPUTopology{
|
||||
NumCPUs: 16,
|
||||
NumSockets: 1,
|
||||
NumCores: 8,
|
||||
NumUncoreCache: 2,
|
||||
CPUDetails: map[int]topology.CPUInfo{
|
||||
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
8: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
9: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
10: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
11: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
12: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
13: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
14: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
15: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
},
|
||||
}
|
||||
|
||||
topoUncoreSingleSocketNoSMT = &topology.CPUTopology{
|
||||
NumCPUs: 16,
|
||||
NumSockets: 1,
|
||||
NumCores: 16,
|
||||
NumUncoreCache: 4,
|
||||
CPUDetails: map[int]topology.CPUInfo{
|
||||
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
|
||||
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
|
||||
8: {CoreID: 8, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
9: {CoreID: 9, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
10: {CoreID: 10, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
11: {CoreID: 11, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
|
||||
12: {CoreID: 12, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
13: {CoreID: 13, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
14: {CoreID: 14, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
15: {CoreID: 15, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
|
||||
},
|
||||
}
|
||||
|
||||
topoDualSocketNoHT = &topology.CPUTopology{
|
||||
NumCPUs: 8,
|
||||
NumSockets: 2,
|
||||
|
@ -36,12 +36,14 @@ type CPUDetails map[int]CPUInfo
|
||||
// Core - physical CPU, cadvisor - Core
|
||||
// Socket - socket, cadvisor - Socket
|
||||
// NUMA Node - NUMA cell, cadvisor - Node
|
||||
// UncoreCache - Split L3 Cache Topology, cadvisor
|
||||
type CPUTopology struct {
|
||||
NumCPUs int
|
||||
NumCores int
|
||||
NumSockets int
|
||||
NumNUMANodes int
|
||||
CPUDetails CPUDetails
|
||||
NumCPUs int
|
||||
NumCores int
|
||||
NumUncoreCache int
|
||||
NumSockets int
|
||||
NumNUMANodes int
|
||||
CPUDetails CPUDetails
|
||||
}
|
||||
|
||||
// CPUsPerCore returns the number of logical CPUs are associated with
|
||||
@ -62,6 +64,15 @@ func (topo *CPUTopology) CPUsPerSocket() int {
|
||||
return topo.NumCPUs / topo.NumSockets
|
||||
}
|
||||
|
||||
// CPUsPerUncore returns the number of logicial CPUs that are associated with
|
||||
// each UncoreCache
|
||||
func (topo *CPUTopology) CPUsPerUncore() int {
|
||||
if topo.NumUncoreCache == 0 {
|
||||
return 0
|
||||
}
|
||||
return topo.NumCPUs / topo.NumUncoreCache
|
||||
}
|
||||
|
||||
// CPUCoreID returns the physical core ID which the given logical CPU
|
||||
// belongs to.
|
||||
func (topo *CPUTopology) CPUCoreID(cpu int) (int, error) {
|
||||
@ -90,11 +101,12 @@ func (topo *CPUTopology) CPUNUMANodeID(cpu int) (int, error) {
|
||||
return info.NUMANodeID, nil
|
||||
}
|
||||
|
||||
// CPUInfo contains the NUMA, socket, and core IDs associated with a CPU.
|
||||
// CPUInfo contains the NUMA, socket, UncoreCache and core IDs associated with a CPU.
|
||||
type CPUInfo struct {
|
||||
NUMANodeID int
|
||||
SocketID int
|
||||
CoreID int
|
||||
NUMANodeID int
|
||||
SocketID int
|
||||
CoreID int
|
||||
UncoreCacheID int
|
||||
}
|
||||
|
||||
// KeepOnly returns a new CPUDetails object with only the supplied cpus.
|
||||
@ -108,6 +120,67 @@ func (d CPUDetails) KeepOnly(cpus cpuset.CPUSet) CPUDetails {
|
||||
return result
|
||||
}
|
||||
|
||||
// UncoreCaches returns all the uncorecache Id (L3 Index) associated with the CPUs in this CPUDetails
|
||||
func (d CPUDetails) UncoreCaches() cpuset.CPUSet {
|
||||
var numUnCoreIDs []int
|
||||
for _, info := range d {
|
||||
numUnCoreIDs = append(numUnCoreIDs, info.UncoreCacheID)
|
||||
}
|
||||
return cpuset.New(numUnCoreIDs...)
|
||||
}
|
||||
|
||||
// UnCoresInNUMANodes returns all of the uncore IDs associated with the given
|
||||
// NUMANode IDs in this CPUDetails.
|
||||
func (d CPUDetails) UncoreInNUMANodes(ids ...int) cpuset.CPUSet {
|
||||
var unCoreIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.NUMANodeID == id {
|
||||
unCoreIDs = append(unCoreIDs, info.UncoreCacheID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(unCoreIDs...)
|
||||
}
|
||||
|
||||
// CoresNeededInUncoreCache returns either the full list of all available unique core IDs associated with the given
|
||||
// UnCoreCache IDs in this CPUDetails or subset that matches the ask.
|
||||
func (d CPUDetails) CoresNeededInUncoreCache(numCoresNeeded int, ids ...int) cpuset.CPUSet {
|
||||
coreIDs := d.coresInUncoreCache(ids...)
|
||||
if coreIDs.Size() <= numCoresNeeded {
|
||||
return coreIDs
|
||||
}
|
||||
tmpCoreIDs := coreIDs.List()
|
||||
return cpuset.New(tmpCoreIDs[:numCoresNeeded]...)
|
||||
}
|
||||
|
||||
// Helper function that just gets the cores
|
||||
func (d CPUDetails) coresInUncoreCache(ids ...int) cpuset.CPUSet {
|
||||
var coreIDs []int
|
||||
for _, id := range ids {
|
||||
for _, info := range d {
|
||||
if info.UncoreCacheID == id {
|
||||
coreIDs = append(coreIDs, info.CoreID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(coreIDs...)
|
||||
}
|
||||
|
||||
// CPUsInUncoreCaches returns all the logical CPU IDs associated with the given
|
||||
// UnCoreCache IDs in this CPUDetails
|
||||
func (d CPUDetails) CPUsInUncoreCaches(ids ...int) cpuset.CPUSet {
|
||||
var cpuIDs []int
|
||||
for _, id := range ids {
|
||||
for cpu, info := range d {
|
||||
if info.UncoreCacheID == id {
|
||||
cpuIDs = append(cpuIDs, cpu)
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuset.New(cpuIDs...)
|
||||
}
|
||||
|
||||
// NUMANodes returns all of the NUMANode IDs associated with the CPUs in this
|
||||
// CPUDetails.
|
||||
func (d CPUDetails) NUMANodes() cpuset.CPUSet {
|
||||
@ -245,6 +318,16 @@ func (d CPUDetails) CPUsInCores(ids ...int) cpuset.CPUSet {
|
||||
return cpuset.New(cpuIDs...)
|
||||
}
|
||||
|
||||
func getUncoreCacheID(core cadvisorapi.Core) int {
|
||||
if len(core.UncoreCaches) < 1 {
|
||||
// In case cAdvisor is nil, failback to socket alignment since uncorecache is not shared
|
||||
return core.SocketID
|
||||
}
|
||||
// Even though cadvisor API returns a slice, we only expect either 0 or a 1 uncore caches,
|
||||
// so everything past the first entry should be discarded or ignored
|
||||
return core.UncoreCaches[0].Id
|
||||
}
|
||||
|
||||
// Discover returns CPUTopology based on cadvisor node info
|
||||
func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
||||
if machineInfo.NumCores == 0 {
|
||||
@ -260,9 +343,10 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
||||
if coreID, err := getUniqueCoreID(core.Threads); err == nil {
|
||||
for _, cpu := range core.Threads {
|
||||
CPUDetails[cpu] = CPUInfo{
|
||||
CoreID: coreID,
|
||||
SocketID: core.SocketID,
|
||||
NUMANodeID: node.Id,
|
||||
CoreID: coreID,
|
||||
SocketID: core.SocketID,
|
||||
NUMANodeID: node.Id,
|
||||
UncoreCacheID: getUncoreCacheID(core),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -273,11 +357,12 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
||||
}
|
||||
|
||||
return &CPUTopology{
|
||||
NumCPUs: machineInfo.NumCores,
|
||||
NumSockets: machineInfo.NumSockets,
|
||||
NumCores: numPhysicalCores,
|
||||
NumNUMANodes: CPUDetails.NUMANodes().Size(),
|
||||
CPUDetails: CPUDetails,
|
||||
NumCPUs: machineInfo.NumCores,
|
||||
NumSockets: machineInfo.NumSockets,
|
||||
NumCores: numPhysicalCores,
|
||||
NumNUMANodes: CPUDetails.NUMANodes().Size(),
|
||||
NumUncoreCache: CPUDetails.UncoreCaches().Size(),
|
||||
CPUDetails: CPUDetails,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -49,16 +49,16 @@ func returnMachineInfo() cadvisorapi.MachineInfo {
|
||||
Topology: []cadvisorapi.Node{
|
||||
{Id: 0,
|
||||
Cores: []cadvisorapi.Core{
|
||||
{SocketID: 0, Id: 0, Threads: []int{0, 6}},
|
||||
{SocketID: 0, Id: 1, Threads: []int{1, 7}},
|
||||
{SocketID: 0, Id: 2, Threads: []int{2, 8}},
|
||||
{SocketID: 0, Id: 0, Threads: []int{0, 6}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
|
||||
{SocketID: 0, Id: 1, Threads: []int{1, 7}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
|
||||
{SocketID: 0, Id: 2, Threads: []int{2, 8}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
|
||||
},
|
||||
},
|
||||
{Id: 1,
|
||||
Cores: []cadvisorapi.Core{
|
||||
{SocketID: 1, Id: 0, Threads: []int{3, 9}},
|
||||
{SocketID: 1, Id: 1, Threads: []int{4, 10}},
|
||||
{SocketID: 1, Id: 2, Threads: []int{5, 11}},
|
||||
{SocketID: 1, Id: 0, Threads: []int{3, 9}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
|
||||
{SocketID: 1, Id: 1, Threads: []int{4, 10}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
|
||||
{SocketID: 1, Id: 2, Threads: []int{5, 11}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user