Generalize the NUMA distribution algorithm to take cpuGroupSize

This parameter ensures that CPUs are always allocated in groups of size
'cpuGroupSize'. This is important, for example, to ensure that all CPUs (i.e.
hyperthreads) from the same core are handed out together.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
This commit is contained in:
Kevin Klues 2021-10-14 23:20:05 +00:00
parent 1436e33642
commit d54445a84d
2 changed files with 114 additions and 21 deletions

View File

@ -482,7 +482,13 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
// evenly distributed and remainder CPUs are allocated. The subset with the
// lowest "balance score" will receive the CPUs in order to keep the overall
// allocation of CPUs as "balanced" as possible.
func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
//
// NOTE: This algorithm has been generalized to take an additional
// 'cpuGroupSize' parameter to ensure that CPUs are always allocated in groups
// of size 'cpuGroupSize' according to the algorithm described above. This is
// important, for example, to ensure that all CPUs (i.e. all hyperthreads) from
// a single core are allocated together.
func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuGroupSize int) (cpuset.CPUSet, error) {
acc := newCPUAccumulator(topo, availableCPUs, numCPUs)
if acc.isSatisfied() {
return acc.result, nil
@ -519,9 +525,20 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
return
}
// Check that each NUMA node in this combination can provide
// (at least) numCPUs/len(combo) of the total cpus required.
distribution := numCPUs / len(combo)
// Check that CPUs can be handed out in groups of size
// 'cpuGroupSize' across the NUMA nodes in this combo.
numCPUGroups := 0
for _, numa := range combo {
numCPUGroups += (acc.details.CPUsInNUMANodes(numa).Size() / cpuGroupSize)
}
if (numCPUGroups * cpuGroupSize) < numCPUs {
return
}
// Check that each NUMA node in this combination can allocate an
// even distribution of CPUs in groups of size 'cpuGroupSize',
// modulo some remainder.
distribution := (numCPUs / len(combo) / cpuGroupSize) * cpuGroupSize
for _, numa := range combo {
cpus := acc.details.CPUsInNUMANodes(numa)
if cpus.Size() < distribution {
@ -529,17 +546,19 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
}
}
// Calculate how many CPUs will be available on each NUMA node
// in 'combo' ater allocating an even distribution of CPUs from
// them. This will be used to calculate a "balance" score for the
// combo to help decide which combo should ultimately be chosen.
// Calculate how many CPUs will be available on each NUMA node in
// 'combo' after allocating an even distribution of CPU groups of
// size 'cpuGroupSize' from them. This will be used in the "balance
// score" calculation to help decide if this combo should
// ultimately be chosen.
availableAfterAllocation := make(mapIntInt, len(combo))
for _, numa := range combo {
availableAfterAllocation[numa] = acc.details.CPUsInNUMANodes(numa).Size() - distribution
}
// Check if there are any remaining CPUs to distribute across the
// NUMA nodes once CPUs have been evenly distributed.
// NUMA nodes once CPUs have been evenly distributed in groups of
// size 'cpuGroupSize'.
remainder := numCPUs - (distribution * len(combo))
// Declare a set of local variables to help track the "balance
@ -558,19 +577,22 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
// Otherwise, find the best "balance score" when allocating the
// remainder CPUs across different subsets of NUMA nodes in 'combo'.
acc.iterateCombinations(combo, remainder, func(subset []int) {
// These remainder CPUs are handed out in groups of size 'cpuGroupSize'.
acc.iterateCombinations(combo, remainder/cpuGroupSize, func(subset []int) {
// Make a local copy of 'availableAfterAllocation'.
availableAfterAllocation := availableAfterAllocation.Clone()
// For all NUMA nodes in 'subset', remove 1 more CPU (to account
// for any remainder CPUs that will be allocated on them.
// For all NUMA nodes in 'subset', remove another
// 'cpuGroupSize' number of CPUs (to account for any remainder
// CPUs that will be allocated on them).
for _, numa := range subset {
availableAfterAllocation[numa] -= 1
availableAfterAllocation[numa] -= cpuGroupSize
}
// Calculate the "balance score" as the standard deviation of
// the number of CPUs available on all NUMA nodes in 'combo'
// assuming the remainder CPUs are spread across 'subset'.
// after the remainder CPUs have been allocated across 'subset'
// in groups of size 'cpuGroupSize'.
balance := standardDeviation(availableAfterAllocation.Values())
if balance < bestLocalBalance {
bestLocalBalance = balance
@ -596,16 +618,18 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
}
// Otherwise, start allocating CPUs from the NUMA node combination
// chosen. First allocate numCPUs / len(bestCombo) CPUs from each node.
distribution := numCPUs / len(bestCombo)
// chosen. First allocate an even distribution of CPUs in groups of
// size 'cpuGroupSize' from 'bestCombo'.
distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize
for _, numa := range bestCombo {
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution)
acc.take(cpus)
}
// Then allocate any remaining CPUs from each NUMA node in the remainder set.
// Then allocate any remaining CPUs in groups of size 'cpuGroupSize'
// from each NUMA node in the remainder set.
for _, numa := range bestRemainder {
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), 1)
cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize)
acc.take(cpus)
}

View File

@ -666,14 +666,39 @@ func TestTakeByTopologyNUMAPacked(t *testing.T) {
}
}
func TestTakeByTopologyNUMADistributed(t *testing.T) {
type takeByTopologyExtendedTestCase struct {
description string
topo *topology.CPUTopology
availableCPUs cpuset.CPUSet
numCPUs int
cpuGroupSize int
expErr string
expResult cpuset.CPUSet
}
func commonTakeByTopologyExtendedTestCases(t *testing.T) []takeByTopologyExtendedTestCase {
var extendedTestCases []takeByTopologyExtendedTestCase
testCases := commonTakeByTopologyTestCases(t)
testCases = append(testCases, []takeByTopologyTestCase{
for _, tc := range testCases {
extendedTestCases = append(extendedTestCases, takeByTopologyExtendedTestCase{
tc.description,
tc.topo,
tc.availableCPUs,
tc.numCPUs,
1,
tc.expErr,
tc.expResult,
})
}
extendedTestCases = append(extendedTestCases, []takeByTopologyExtendedTestCase{
{
"allocate 4 full cores with 2 distributed across each NUMA node",
topoDualSocketHT,
mustParseCPUSet(t, "0-11"),
8,
1,
"",
mustParseCPUSet(t, "0,6,2,8,1,7,3,9"),
},
@ -682,6 +707,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-79"),
64,
1,
"",
mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-47,50-57,60-67,70-77"),
},
@ -690,6 +716,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-79"),
48,
1,
"",
mustParseCPUSet(t, "0-7,10-17,20-27,40-47,50-57,60-67"),
},
@ -698,6 +725,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "2-39,42-79"),
48,
1,
"",
mustParseCPUSet(t, "2-9,10-17,20-27,42-49,50-57,60-67"),
},
@ -706,6 +734,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "3-39,43-79"),
48,
1,
"",
mustParseCPUSet(t, "10-17,20-27,30-37,50-57,60-67,70-77"),
},
@ -714,6 +743,7 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-2,10-12,20-22,30-32,40-41,50-51,60-61,70-71"),
16,
1,
"",
mustParseCPUSet(t, "0-1,10-11,20-21,30-31,40-41,50-51,60-61,70-71"),
},
@ -722,38 +752,77 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-2,10-12,20-22,30-32,40-41,50-51,60-61,70-71"),
16,
1,
"",
mustParseCPUSet(t, "0-1,10-11,20-21,30-31,40-41,50-51,60-61,70-71"),
},
}...)
return extendedTestCases
}
func TestTakeByTopologyNUMADistributed(t *testing.T) {
testCases := commonTakeByTopologyExtendedTestCases(t)
testCases = append(testCases, []takeByTopologyExtendedTestCase{
{
"allocate 13 full cores distributed across the first 2 NUMA nodes",
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-79"),
26,
1,
"",
mustParseCPUSet(t, "0-6,10-16,40-45,50-55"),
},
{
"allocate 13 full cores distributed across the first 2 NUMA nodes (cpuGroupSize 2)",
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-79"),
26,
2,
"",
mustParseCPUSet(t, "0-6,10-15,40-46,50-55"),
},
{
"allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 0, 1",
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-79"),
62,
1,
"",
mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-47,50-57,60-66,70-76"),
},
{
"allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 1, 2 (cpuGroupSize 2)",
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-79"),
62,
2,
"",
mustParseCPUSet(t, "0-7,10-17,20-27,30-36,40-47,50-57,60-67,70-76"),
},
{
"allocate 31 full cores with 15 CPUs distributed across each NUMA node and 1 CPU spilling over to each of NUMA 2, 3 (to keep balance)",
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-8,10-18,20-39,40-48,50-58,60-79"),
62,
1,
"",
mustParseCPUSet(t, "0-7,10-17,20-27,30-37,40-46,50-56,60-67,70-77"),
},
{
"allocate 31 full cores with 14 CPUs distributed across each NUMA node and 2 CPUs spilling over to each of NUMA 0, 2, 3 (to keep balance with cpuGroupSize 2)",
topoDualSocketMultiNumaPerSocketHT,
mustParseCPUSet(t, "0-8,10-18,20-39,40-48,50-58,60-79"),
62,
2,
"",
mustParseCPUSet(t, "0-7,10-16,20-27,30-37,40-47,50-56,60-67,70-77"),
},
}...)
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs)
result, err := takeByTopologyNUMADistributed(tc.topo, tc.availableCPUs, tc.numCPUs, tc.cpuGroupSize)
if tc.expErr != "" && err.Error() != tc.expErr {
t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err)
}