Merge pull request #126750 from AMDEPYC/uncore_v1

Split L3 Cache Topology Awareness in CPU Manager
This commit is contained in:
Kubernetes Prow Robot 2024-11-06 11:13:29 +00:00 committed by GitHub
commit 50d0f920c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 1265 additions and 350 deletions

View File

@ -118,6 +118,17 @@ func (n *numaFirst) takeFullSecondLevel() {
n.acc.takeFullSockets()
}
// Sort the UncoreCaches within the NUMA nodes.
func (a *cpuAccumulator) sortAvailableUncoreCaches() []int {
var result []int
for _, numa := range a.sortAvailableNUMANodes() {
uncore := a.details.UncoreInNUMANodes(numa).UnsortedList()
a.sort(uncore, a.details.CPUsInUncoreCaches)
result = append(result, uncore...)
}
return result
}
// If NUMA nodes are higher in the memory hierarchy than sockets, then just
// sort the NUMA nodes directly, and return them.
func (n *numaFirst) sortAvailableNUMANodes() []int {
@ -318,6 +329,12 @@ func (a *cpuAccumulator) isSocketFree(socketID int) bool {
return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket()
}
// Returns true if the supplied UnCoreCache is fully available,
// "fully available" means that all the CPUs in it are free.
func (a *cpuAccumulator) isUncoreCacheFree(uncoreID int) bool {
return a.details.CPUsInUncoreCaches(uncoreID).Size() == a.topo.CPUDetails.CPUsInUncoreCaches(uncoreID).Size()
}
// Returns true if the supplied core is fully available in `a.details`.
// "fully available" means that all the CPUs in it are free.
func (a *cpuAccumulator) isCoreFree(coreID int) bool {
@ -346,6 +363,17 @@ func (a *cpuAccumulator) freeSockets() []int {
return free
}
// Returns free UncoreCache IDs as a slice sorted by sortAvailableUnCoreCache().
func (a *cpuAccumulator) freeUncoreCache() []int {
free := []int{}
for _, uncore := range a.sortAvailableUncoreCaches() {
if a.isUncoreCacheFree(uncore) {
free = append(free, uncore)
}
}
return free
}
// Returns free core IDs as a slice sorted by sortAvailableCores().
func (a *cpuAccumulator) freeCores() []int {
free := []int{}
@ -519,6 +547,62 @@ func (a *cpuAccumulator) takeFullSockets() {
}
}
func (a *cpuAccumulator) takeFullUncore() {
for _, uncore := range a.freeUncoreCache() {
cpusInUncore := a.topo.CPUDetails.CPUsInUncoreCaches(uncore)
if !a.needsAtLeast(cpusInUncore.Size()) {
continue
}
klog.V(4).InfoS("takeFullUncore: claiming uncore", "uncore", uncore)
a.take(cpusInUncore)
}
}
func (a *cpuAccumulator) takePartialUncore(uncoreID int) {
numCoresNeeded := a.numCPUsNeeded / a.topo.CPUsPerCore()
// determine the N number of free cores (physical cpus) within the UncoreCache, then
// determine the M number of free cpus (virtual cpus) that correspond with the free cores
freeCores := a.details.CoresNeededInUncoreCache(numCoresNeeded, uncoreID)
freeCPUs := a.details.CPUsInCores(freeCores.UnsortedList()...)
// claim the cpus if the free cpus within the UncoreCache can satisfy the needed cpus
claimed := (a.numCPUsNeeded == freeCPUs.Size())
klog.V(4).InfoS("takePartialUncore: trying to claim partial uncore",
"uncore", uncoreID,
"claimed", claimed,
"needed", a.numCPUsNeeded,
"cores", freeCores.String(),
"cpus", freeCPUs.String())
if !claimed {
return
}
a.take(freeCPUs)
}
// First try to take full UncoreCache, if available and need is at least the size of the UncoreCache group.
// Second try to take the partial UncoreCache if available and the request size can fit w/in the UncoreCache.
func (a *cpuAccumulator) takeUncoreCache() {
numCPUsInUncore := a.topo.CPUsPerUncore()
for _, uncore := range a.sortAvailableUncoreCaches() {
// take full UncoreCache if the CPUs needed is greater than free UncoreCache size
if a.needsAtLeast(numCPUsInUncore) {
a.takeFullUncore()
}
if a.isSatisfied() {
return
}
// take partial UncoreCache if the CPUs needed is less than free UncoreCache size
a.takePartialUncore(uncore)
if a.isSatisfied() {
return
}
}
}
func (a *cpuAccumulator) takeFullCores() {
for _, core := range a.freeCores() {
cpusInCore := a.topo.CPUDetails.CPUsInCores(core)
@ -637,6 +721,14 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC
// or the remaining number of CPUs to take after having taken full sockets and NUMA nodes is less
// than a whole NUMA node, the function tries to take whole physical cores (cores).
//
// If `PreferAlignByUncoreCache` is enabled, the function will try to optimally assign Uncorecaches.
// If `numCPUs` is larger than or equal to the total number of CPUs in a Uncorecache, and there are
// free (i.e. all CPUs within the Uncorecache are free) Uncorecaches, the function takes as many entire
// cores from free Uncorecaches as possible. If/Once `numCPUs` is smaller than the total number of
// CPUs in a free Uncorecache, the function scans each Uncorecache index in numerical order to assign
// cores that will fit within the Uncorecache. If `numCPUs` cannot fit within any Uncorecache, the
// function tries to take whole physical cores.
//
// If `numCPUs` is bigger than the total number of CPUs in a core, and there are
// free (i.e. all CPUs in them are free) cores, the function takes as many entire free cores as possible.
// The cores are taken from one socket at a time, and the sockets are considered by
@ -658,7 +750,7 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC
// the least amount of free CPUs to the one with the highest amount of free CPUs (i.e. in ascending
// order of free CPUs). For any NUMA node, the cores are selected from the ones in the socket with
// the least amount of free CPUs to the one with the highest amount of free CPUs.
func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) {
func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy, preferAlignByUncoreCache bool) (cpuset.CPUSet, error) {
acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy)
if acc.isSatisfied() {
return acc.result, nil
@ -681,7 +773,17 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
return acc.result, nil
}
// 2. Acquire whole cores, if available and the container requires at least
// 2. If PreferAlignByUncoreCache is enabled, acquire whole UncoreCaches
// if available and the container requires at least a UncoreCache's-worth
// of CPUs. Otherwise, acquire CPUs from the least amount of UncoreCaches.
if preferAlignByUncoreCache {
acc.takeUncoreCache()
if acc.isSatisfied() {
return acc.result, nil
}
}
// 3. Acquire whole cores, if available and the container requires at least
// a core's-worth of CPUs.
// If `CPUSortingStrategySpread` is specified, skip taking the whole core.
if cpuSortingStrategy != CPUSortingStrategySpread {
@ -691,7 +793,7 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
}
}
// 3. Acquire single threads, preferring to fill partially-allocated cores
// 4. Acquire single threads, preferring to fill partially-allocated cores
// on the same sockets as the whole cores we have already taken in this
// allocation.
acc.takeRemainingCPUs()
@ -769,8 +871,10 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
// If the number of CPUs requested cannot be handed out in chunks of
// 'cpuGroupSize', then we just call out the packing algorithm since we
// can't distribute CPUs in this chunk size.
// PreferAlignByUncoreCache feature not implemented here yet and set to false.
// Support for PreferAlignByUncoreCache to be done at beta release.
if (numCPUs % cpuGroupSize) != 0 {
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy)
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false)
}
// Otherwise build an accumulator to start allocating CPUs from.
@ -953,7 +1057,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
// size 'cpuGroupSize' from 'bestCombo'.
distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize
for _, numa := range bestCombo {
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy)
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, cpuSortingStrategy, false)
acc.take(cpus)
}
@ -968,7 +1072,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize {
continue
}
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy)
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, cpuSortingStrategy, false)
acc.take(cpus)
remainder -= cpuGroupSize
}
@ -992,5 +1096,5 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
// If we never found a combination of NUMA nodes that we could properly
// distribute CPUs across, fall back to the packing algorithm.
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy)
return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, cpuSortingStrategy, false)
}

View File

@ -668,6 +668,79 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) {
"",
mustParseCPUSet(t, "0-29,40-69,30,31,70,71"),
},
// Test cases for PreferAlignByUncoreCache
{
"take cpus from two full UncoreCaches and partial from a single UncoreCache",
topoUncoreSingleSocketNoSMT,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
mustParseCPUSet(t, "1-15"),
10,
"",
cpuset.New(1, 2, 4, 5, 6, 7, 8, 9, 10, 11),
},
{
"take one cpu from dual socket with HT - core from Socket 0",
topoDualSocketHT,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
1,
"",
cpuset.New(2),
},
{
"take first available UncoreCache from first socket",
topoUncoreDualSocketNoSMT,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
mustParseCPUSet(t, "0-15"),
4,
"",
cpuset.New(0, 1, 2, 3),
},
{
"take all available UncoreCache from first socket",
topoUncoreDualSocketNoSMT,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
mustParseCPUSet(t, "2-15"),
6,
"",
cpuset.New(2, 3, 4, 5, 6, 7),
},
{
"take first available UncoreCache from second socket",
topoUncoreDualSocketNoSMT,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
mustParseCPUSet(t, "8-15"),
4,
"",
cpuset.New(8, 9, 10, 11),
},
{
"take first available UncoreCache from available NUMA",
topoUncoreSingleSocketMultiNuma,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
mustParseCPUSet(t, "3,4-8,12"),
2,
"",
cpuset.New(4, 5),
},
{
"take cpus from best available UncoreCache group of multi uncore cache single socket - SMT enabled",
topoUncoreSingleSocketSMT,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
mustParseCPUSet(t, "2-3,10-11,4-7,12-15"),
6,
"",
cpuset.New(4, 5, 6, 12, 13, 14),
},
{
"take cpus from multiple UncoreCache of single socket - SMT enabled",
topoUncoreSingleSocketSMT,
StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
mustParseCPUSet(t, "1-7,9-15"),
10,
"",
mustParseCPUSet(t, "4-7,12-15,1,9"),
},
}...)
for _, tc := range testCases {
@ -677,7 +750,7 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) {
strategy = CPUSortingStrategySpread
}
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy)
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption)
if tc.expErr != "" && err != nil && err.Error() != tc.expErr {
t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err)
}
@ -778,7 +851,7 @@ func TestTakeByTopologyWithSpreadPhysicalCPUsPreferredOption(t *testing.T) {
if tc.opts.DistributeCPUsAcrossCores {
strategy = CPUSortingStrategySpread
}
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy)
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption)
if tc.expErr != "" && err.Error() != tc.expErr {
t.Errorf("testCase %q failed, expected error to be [%v] but it was [%v]", tc.description, tc.expErr, err)
}

View File

@ -668,20 +668,24 @@ func TestCPUManagerGenerate(t *testing.T) {
{
Cores: []cadvisorapi.Core{
{
Id: 0,
Threads: []int{0},
Id: 0,
Threads: []int{0},
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
},
{
Id: 1,
Threads: []int{1},
Id: 1,
Threads: []int{1},
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
},
{
Id: 2,
Threads: []int{2},
Id: 2,
Threads: []int{2},
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
},
{
Id: 3,
Threads: []int{3},
Id: 3,
Threads: []int{3},
UncoreCaches: []cadvisorapi.Cache{{Id: 1}},
},
},
},

View File

@ -34,6 +34,7 @@ const (
AlignBySocketOption string = "align-by-socket"
DistributeCPUsAcrossCoresOption string = "distribute-cpus-across-cores"
StrictCPUReservationOption string = "strict-cpu-reservation"
PreferAlignByUnCoreCacheOption string = "prefer-align-cpus-by-uncorecache"
)
var (
@ -42,6 +43,7 @@ var (
AlignBySocketOption,
DistributeCPUsAcrossCoresOption,
StrictCPUReservationOption,
PreferAlignByUnCoreCacheOption,
)
betaOptions = sets.New[string](
FullPCPUsOnlyOption,
@ -90,6 +92,9 @@ type StaticPolicyOptions struct {
DistributeCPUsAcrossCores bool
// Flag to remove reserved cores from the list of available cores
StrictCPUReservation bool
// Flag that makes best-effort to align CPUs to a uncorecache boundary
// As long as there are CPUs available, pods will be admitted if the condition is not met.
PreferAlignByUncoreCacheOption bool
}
// NewStaticPolicyOptions creates a StaticPolicyOptions struct from the user configuration.
@ -131,6 +136,12 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
}
opts.StrictCPUReservation = optValue
case PreferAlignByUnCoreCacheOption:
optValue, err := strconv.ParseBool(value)
if err != nil {
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
}
opts.PreferAlignByUncoreCacheOption = optValue
default:
// this should never be reached, we already detect unknown options,
// but we keep it as further safety.
@ -147,6 +158,14 @@ func NewStaticPolicyOptions(policyOptions map[string]string) (StaticPolicyOption
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", DistributeCPUsAcrossNUMAOption, DistributeCPUsAcrossCoresOption)
}
if opts.PreferAlignByUncoreCacheOption && opts.DistributeCPUsAcrossCores {
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", PreferAlignByUnCoreCacheOption, DistributeCPUsAcrossCoresOption)
}
if opts.PreferAlignByUncoreCacheOption && opts.DistributeCPUsAcrossNUMA {
return opts, fmt.Errorf("static policy options %s and %s can not be used at the same time", PreferAlignByUnCoreCacheOption, DistributeCPUsAcrossNUMAOption)
}
return opts, nil
}

View File

@ -525,7 +525,8 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int)
}
return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy)
}
return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy)
return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption)
}
func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {

View File

@ -38,9 +38,10 @@ var (
}
topoDualSocketHT = &topology.CPUTopology{
NumCPUs: 12,
NumSockets: 2,
NumCores: 6,
NumCPUs: 12,
NumSockets: 2,
NumCores: 6,
NumUncoreCache: 1,
CPUDetails: map[int]topology.CPUInfo{
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0},
1: {CoreID: 1, SocketID: 1, NUMANodeID: 1},
@ -57,6 +58,106 @@ var (
},
}
topoUncoreDualSocketNoSMT = &topology.CPUTopology{
NumCPUs: 16,
NumSockets: 2,
NumCores: 16,
NumUncoreCache: 4,
CPUDetails: map[int]topology.CPUInfo{
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
8: {CoreID: 8, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
9: {CoreID: 9, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
10: {CoreID: 10, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
11: {CoreID: 11, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
12: {CoreID: 12, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
13: {CoreID: 13, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
14: {CoreID: 14, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
15: {CoreID: 15, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
},
}
topoUncoreSingleSocketMultiNuma = &topology.CPUTopology{
NumCPUs: 16,
NumSockets: 1,
NumCores: 16,
NumUncoreCache: 4,
CPUDetails: map[int]topology.CPUInfo{
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
8: {CoreID: 8, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
9: {CoreID: 9, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
10: {CoreID: 10, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
11: {CoreID: 11, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 2},
12: {CoreID: 12, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
13: {CoreID: 13, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
14: {CoreID: 14, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
15: {CoreID: 15, SocketID: 0, NUMANodeID: 1, UncoreCacheID: 3},
},
}
topoUncoreSingleSocketSMT = &topology.CPUTopology{
NumCPUs: 16,
NumSockets: 1,
NumCores: 8,
NumUncoreCache: 2,
CPUDetails: map[int]topology.CPUInfo{
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
8: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
9: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
10: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
11: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
12: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
13: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
14: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
15: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
},
}
topoUncoreSingleSocketNoSMT = &topology.CPUTopology{
NumCPUs: 16,
NumSockets: 1,
NumCores: 16,
NumUncoreCache: 4,
CPUDetails: map[int]topology.CPUInfo{
0: {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
1: {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
2: {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
3: {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
4: {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
5: {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
6: {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
7: {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
8: {CoreID: 8, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
9: {CoreID: 9, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
10: {CoreID: 10, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
11: {CoreID: 11, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
12: {CoreID: 12, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
13: {CoreID: 13, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
14: {CoreID: 14, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
15: {CoreID: 15, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
},
}
topoDualSocketNoHT = &topology.CPUTopology{
NumCPUs: 8,
NumSockets: 2,

View File

@ -36,12 +36,14 @@ type CPUDetails map[int]CPUInfo
// Core - physical CPU, cadvisor - Core
// Socket - socket, cadvisor - Socket
// NUMA Node - NUMA cell, cadvisor - Node
// UncoreCache - Split L3 Cache Topology, cadvisor
type CPUTopology struct {
NumCPUs int
NumCores int
NumSockets int
NumNUMANodes int
CPUDetails CPUDetails
NumCPUs int
NumCores int
NumUncoreCache int
NumSockets int
NumNUMANodes int
CPUDetails CPUDetails
}
// CPUsPerCore returns the number of logical CPUs are associated with
@ -62,6 +64,15 @@ func (topo *CPUTopology) CPUsPerSocket() int {
return topo.NumCPUs / topo.NumSockets
}
// CPUsPerUncore returns the number of logicial CPUs that are associated with
// each UncoreCache
func (topo *CPUTopology) CPUsPerUncore() int {
if topo.NumUncoreCache == 0 {
return 0
}
return topo.NumCPUs / topo.NumUncoreCache
}
// CPUCoreID returns the physical core ID which the given logical CPU
// belongs to.
func (topo *CPUTopology) CPUCoreID(cpu int) (int, error) {
@ -90,11 +101,12 @@ func (topo *CPUTopology) CPUNUMANodeID(cpu int) (int, error) {
return info.NUMANodeID, nil
}
// CPUInfo contains the NUMA, socket, and core IDs associated with a CPU.
// CPUInfo contains the NUMA, socket, UncoreCache and core IDs associated with a CPU.
type CPUInfo struct {
NUMANodeID int
SocketID int
CoreID int
NUMANodeID int
SocketID int
CoreID int
UncoreCacheID int
}
// KeepOnly returns a new CPUDetails object with only the supplied cpus.
@ -108,6 +120,67 @@ func (d CPUDetails) KeepOnly(cpus cpuset.CPUSet) CPUDetails {
return result
}
// UncoreCaches returns all the uncorecache Id (L3 Index) associated with the CPUs in this CPUDetails
func (d CPUDetails) UncoreCaches() cpuset.CPUSet {
var numUnCoreIDs []int
for _, info := range d {
numUnCoreIDs = append(numUnCoreIDs, info.UncoreCacheID)
}
return cpuset.New(numUnCoreIDs...)
}
// UnCoresInNUMANodes returns all of the uncore IDs associated with the given
// NUMANode IDs in this CPUDetails.
func (d CPUDetails) UncoreInNUMANodes(ids ...int) cpuset.CPUSet {
var unCoreIDs []int
for _, id := range ids {
for _, info := range d {
if info.NUMANodeID == id {
unCoreIDs = append(unCoreIDs, info.UncoreCacheID)
}
}
}
return cpuset.New(unCoreIDs...)
}
// CoresNeededInUncoreCache returns either the full list of all available unique core IDs associated with the given
// UnCoreCache IDs in this CPUDetails or subset that matches the ask.
func (d CPUDetails) CoresNeededInUncoreCache(numCoresNeeded int, ids ...int) cpuset.CPUSet {
coreIDs := d.coresInUncoreCache(ids...)
if coreIDs.Size() <= numCoresNeeded {
return coreIDs
}
tmpCoreIDs := coreIDs.List()
return cpuset.New(tmpCoreIDs[:numCoresNeeded]...)
}
// Helper function that just gets the cores
func (d CPUDetails) coresInUncoreCache(ids ...int) cpuset.CPUSet {
var coreIDs []int
for _, id := range ids {
for _, info := range d {
if info.UncoreCacheID == id {
coreIDs = append(coreIDs, info.CoreID)
}
}
}
return cpuset.New(coreIDs...)
}
// CPUsInUncoreCaches returns all the logical CPU IDs associated with the given
// UnCoreCache IDs in this CPUDetails
func (d CPUDetails) CPUsInUncoreCaches(ids ...int) cpuset.CPUSet {
var cpuIDs []int
for _, id := range ids {
for cpu, info := range d {
if info.UncoreCacheID == id {
cpuIDs = append(cpuIDs, cpu)
}
}
}
return cpuset.New(cpuIDs...)
}
// NUMANodes returns all of the NUMANode IDs associated with the CPUs in this
// CPUDetails.
func (d CPUDetails) NUMANodes() cpuset.CPUSet {
@ -245,6 +318,16 @@ func (d CPUDetails) CPUsInCores(ids ...int) cpuset.CPUSet {
return cpuset.New(cpuIDs...)
}
func getUncoreCacheID(core cadvisorapi.Core) int {
if len(core.UncoreCaches) < 1 {
// In case cAdvisor is nil, failback to socket alignment since uncorecache is not shared
return core.SocketID
}
// Even though cadvisor API returns a slice, we only expect either 0 or a 1 uncore caches,
// so everything past the first entry should be discarded or ignored
return core.UncoreCaches[0].Id
}
// Discover returns CPUTopology based on cadvisor node info
func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
if machineInfo.NumCores == 0 {
@ -260,9 +343,10 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
if coreID, err := getUniqueCoreID(core.Threads); err == nil {
for _, cpu := range core.Threads {
CPUDetails[cpu] = CPUInfo{
CoreID: coreID,
SocketID: core.SocketID,
NUMANodeID: node.Id,
CoreID: coreID,
SocketID: core.SocketID,
NUMANodeID: node.Id,
UncoreCacheID: getUncoreCacheID(core),
}
}
} else {
@ -273,11 +357,12 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
}
return &CPUTopology{
NumCPUs: machineInfo.NumCores,
NumSockets: machineInfo.NumSockets,
NumCores: numPhysicalCores,
NumNUMANodes: CPUDetails.NUMANodes().Size(),
CPUDetails: CPUDetails,
NumCPUs: machineInfo.NumCores,
NumSockets: machineInfo.NumSockets,
NumCores: numPhysicalCores,
NumNUMANodes: CPUDetails.NUMANodes().Size(),
NumUncoreCache: CPUDetails.UncoreCaches().Size(),
CPUDetails: CPUDetails,
}, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -49,16 +49,16 @@ func returnMachineInfo() cadvisorapi.MachineInfo {
Topology: []cadvisorapi.Node{
{Id: 0,
Cores: []cadvisorapi.Core{
{SocketID: 0, Id: 0, Threads: []int{0, 6}},
{SocketID: 0, Id: 1, Threads: []int{1, 7}},
{SocketID: 0, Id: 2, Threads: []int{2, 8}},
{SocketID: 0, Id: 0, Threads: []int{0, 6}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
{SocketID: 0, Id: 1, Threads: []int{1, 7}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
{SocketID: 0, Id: 2, Threads: []int{2, 8}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
},
},
{Id: 1,
Cores: []cadvisorapi.Core{
{SocketID: 1, Id: 0, Threads: []int{3, 9}},
{SocketID: 1, Id: 1, Threads: []int{4, 10}},
{SocketID: 1, Id: 2, Threads: []int{5, 11}},
{SocketID: 1, Id: 0, Threads: []int{3, 9}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
{SocketID: 1, Id: 1, Threads: []int{4, 10}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
{SocketID: 1, Id: 2, Threads: []int{5, 11}, UncoreCaches: []cadvisorapi.Cache{{Id: 1}}},
},
},
},