Fix discovering numa distance when node ids are not starting from 0 or their ids are not sequential

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
This commit is contained in:
PiotrProkop 2022-11-09 13:11:09 +01:00
parent 37e73b419e
commit 315f0dc6f1
9 changed files with 281 additions and 114 deletions

View File

@ -162,7 +162,7 @@ func TestValidateStaticPolicyOptions(t *testing.T) {
t.Run(testCase.description, func(t *testing.T) {
topoMgrPolicy := topologymanager.NewNonePolicy()
if testCase.topoMgrPolicy == topologymanager.PolicySingleNumaNode {
topoMgrPolicy, _ = topologymanager.NewSingleNumaNodePolicy(&topologymanager.NUMAInfo{}, map[string]string{})
topoMgrPolicy = topologymanager.NewSingleNumaNodePolicy(&topologymanager.NUMAInfo{}, topologymanager.PolicyOptions{})
}
topoMgrStore := topologymanager.NewFakeManagerWithPolicy(topoMgrPolicy)

View File

@ -30,7 +30,7 @@ const (
defaultNodeDir = "/sys/devices/system/node/"
)
type NUMADistances [][]uint64
type NUMADistances map[int][]uint64
type NUMAInfo struct {
Nodes []int
@ -41,13 +41,13 @@ type NUMASysFs struct {
nodeDir string
}
func NewNUMAInfo(topology []cadvisorapi.Node) (*NUMAInfo, error) {
return newNUMAInfo(topology, &NUMASysFs{nodeDir: defaultNodeDir})
func NewNUMAInfo(topology []cadvisorapi.Node, opts PolicyOptions) (*NUMAInfo, error) {
return newNUMAInfo(topology, &NUMASysFs{nodeDir: defaultNodeDir}, opts)
}
func newNUMAInfo(topology []cadvisorapi.Node, sysFs *NUMASysFs) (*NUMAInfo, error) {
func newNUMAInfo(topology []cadvisorapi.Node, sysFs *NUMASysFs, opts PolicyOptions) (*NUMAInfo, error) {
var numaNodes []int
distances := make([][]uint64, len(topology))
distances := map[int][]uint64{}
for _, node := range topology {
numaNodes = append(numaNodes, node.Id)
@ -55,9 +55,13 @@ func newNUMAInfo(topology []cadvisorapi.Node, sysFs *NUMASysFs) (*NUMAInfo, erro
// For now we need to retrieve this information from sysfs.
// TODO: Update this as follows once a version of cadvisor with this commit is vendored in https://github.com/google/cadvisor/commit/24dd1de08a72cfee661f6178454db995900c0fee
// distances[node.Id] = node.Distances[:]
nodeDistance, err := sysFs.GetDistances(node.Id)
if err != nil {
return nil, fmt.Errorf("error getting NUMA distances from sysfs: %w", err)
var nodeDistance []uint64
if opts.PreferClosestNUMA {
var err error
nodeDistance, err = sysFs.GetDistances(node.Id)
if err != nil {
return nil, fmt.Errorf("error getting NUMA distances from sysfs: %w", err)
}
}
distances[node.Id] = nodeDistance
}

View File

@ -36,6 +36,7 @@ func TestNUMAInfo(t *testing.T) {
topology []cadvisorapi.Node
expectedNUMAInfo *NUMAInfo
expectedErr error
opts PolicyOptions
}{
{
name: "positive test 1 node",
@ -47,7 +48,22 @@ func TestNUMAInfo(t *testing.T) {
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0},
NUMADistances: NUMADistances{
{
0: nil,
},
},
opts: PolicyOptions{},
},
{
name: "positive test 1 node, with PreferClosestNUMA",
topology: []cadvisorapi.Node{
{
Id: 0,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0},
NUMADistances: NUMADistances{
0: {
10,
11,
12,
@ -55,6 +71,9 @@ func TestNUMAInfo(t *testing.T) {
},
},
},
opts: PolicyOptions{
PreferClosestNUMA: true,
},
},
{
name: "positive test 2 nodes",
@ -69,13 +88,31 @@ func TestNUMAInfo(t *testing.T) {
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1},
NUMADistances: NUMADistances{
{
0: nil,
1: nil,
},
},
},
{
name: "positive test 2 nodes, with PreferClosestNUMA",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 1,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1},
NUMADistances: NUMADistances{
0: {
10,
11,
12,
12,
},
{
1: {
11,
10,
12,
@ -83,6 +120,9 @@ func TestNUMAInfo(t *testing.T) {
},
},
},
opts: PolicyOptions{
PreferClosestNUMA: true,
},
},
{
name: "positive test 3 nodes",
@ -100,19 +140,41 @@ func TestNUMAInfo(t *testing.T) {
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1, 2},
NUMADistances: NUMADistances{
{
0: nil,
1: nil,
2: nil,
},
},
},
{
name: "positive test 3 nodes, with PreferClosestNUMA",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 1,
},
{
Id: 2,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1, 2},
NUMADistances: NUMADistances{
0: {
10,
11,
12,
12,
},
{
1: {
11,
10,
12,
12,
},
{
2: {
12,
12,
10,
@ -120,6 +182,9 @@ func TestNUMAInfo(t *testing.T) {
},
},
},
opts: PolicyOptions{
PreferClosestNUMA: true,
},
},
{
name: "positive test 4 nodes",
@ -140,25 +205,51 @@ func TestNUMAInfo(t *testing.T) {
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1, 2, 3},
NUMADistances: NUMADistances{
{
0: nil,
1: nil,
2: nil,
3: nil,
},
},
},
{
name: "positive test 4 nodes, with PreferClosestNUMA",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 1,
},
{
Id: 2,
},
{
Id: 3,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1, 2, 3},
NUMADistances: NUMADistances{
0: {
10,
11,
12,
12,
},
{
1: {
11,
10,
12,
12,
},
{
2: {
12,
12,
10,
11,
},
{
3: {
12,
12,
11,
@ -166,9 +257,12 @@ func TestNUMAInfo(t *testing.T) {
},
},
},
opts: PolicyOptions{
PreferClosestNUMA: true,
},
},
{
name: "negative test 1 node",
name: "negative test 1 node, no distance file with PreferClosestNUMA",
topology: []cadvisorapi.Node{
{
Id: 9,
@ -176,6 +270,94 @@ func TestNUMAInfo(t *testing.T) {
},
expectedNUMAInfo: nil,
expectedErr: fmt.Errorf("no such file or directory"),
opts: PolicyOptions{
PreferClosestNUMA: true,
},
},
{
name: "one node and its id is 1",
topology: []cadvisorapi.Node{
{
Id: 1,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{1},
NUMADistances: NUMADistances{
1: nil,
},
},
},
{
name: "one node and its id is 1, with PreferClosestNUMA",
topology: []cadvisorapi.Node{
{
Id: 1,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{1},
NUMADistances: NUMADistances{
1: {
11,
10,
12,
12,
},
},
},
opts: PolicyOptions{
PreferClosestNUMA: true,
},
},
{
name: "two nodes not sequential",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 2,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 2},
NUMADistances: NUMADistances{
0: nil,
2: nil,
},
},
},
{
name: "two nodes not sequential, with PreferClosestNUMA",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 2,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 2},
NUMADistances: NUMADistances{
0: {
10,
11,
12,
12,
},
2: {
12,
12,
10,
11,
},
},
},
opts: PolicyOptions{
PreferClosestNUMA: true,
},
},
}
@ -209,7 +391,7 @@ func TestNUMAInfo(t *testing.T) {
sysFs := &NUMASysFs{nodeDir: nodeDir}
for _, tcase := range tcases {
topology, err := newNUMAInfo(tcase.topology, sysFs)
topology, err := newNUMAInfo(tcase.topology, sysFs, tcase.opts)
if tcase.expectedErr == nil && err != nil {
t.Fatalf("Expected err to equal nil, not %v", err)
} else if tcase.expectedErr != nil && err == nil {
@ -231,7 +413,7 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
tcases := []struct {
name string
bm []int
distance [][]uint64
distance NUMADistances
expectedAvg float64
}{
{
@ -239,8 +421,8 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
bm: []int{
0,
},
distance: [][]uint64{
{
distance: NUMADistances{
0: {
10,
},
},
@ -251,12 +433,12 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
bm: []int{
0,
},
distance: [][]uint64{
{
distance: NUMADistances{
0: {
10,
11,
},
{
1: {
11,
10,
},
@ -269,12 +451,12 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
0,
1,
},
distance: [][]uint64{
{
distance: NUMADistances{
0: {
10,
11,
},
{
1: {
11,
10,
},
@ -287,26 +469,26 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
0,
2,
},
distance: [][]uint64{
{
distance: NUMADistances{
0: {
10,
11,
12,
12,
},
{
1: {
11,
10,
12,
12,
},
{
2: {
12,
12,
10,
11,
},
{
3: {
12,
12,
11,
@ -322,26 +504,26 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
2,
3,
},
distance: [][]uint64{
{
distance: NUMADistances{
0: {
10,
11,
12,
12,
},
{
1: {
11,
10,
12,
12,
},
{
2: {
12,
12,
10,
11,
},
{
3: {
12,
12,
11,
@ -353,7 +535,7 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
{
name: "0 NUMA node, 0 set in bitmask",
bm: []int{},
distance: [][]uint64{},
distance: NUMADistances{},
expectedAvg: 0,
},
}
@ -515,10 +697,10 @@ func TestClosest(t *testing.T) {
candidate: NewTestBitMask(0, 2),
expected: "current",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 10, 10},
{10, 10, 10},
{10, 10, 10},
NUMADistances: NUMADistances{
0: {10, 10, 10},
1: {10, 10, 10},
2: {10, 10, 10},
},
},
},
@ -528,11 +710,11 @@ func TestClosest(t *testing.T) {
candidate: NewTestBitMask(0, 2),
expected: "candidate",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 10, 10, 10},
{10, 10, 10, 10},
{10, 10, 10, 10},
{10, 10, 10, 10},
NUMADistances: NUMADistances{
0: {10, 10, 10, 10},
1: {10, 10, 10, 10},
2: {10, 10, 10, 10},
3: {10, 10, 10, 10},
},
},
},
@ -542,11 +724,11 @@ func TestClosest(t *testing.T) {
candidate: NewTestBitMask(0, 1),
expected: "candidate",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 11, 12, 12},
{11, 10, 12, 12},
{12, 12, 10, 11},
{12, 12, 11, 10},
NUMADistances: NUMADistances{
0: {10, 11, 12, 12},
1: {11, 10, 12, 12},
2: {12, 12, 10, 11},
3: {12, 12, 11, 10},
},
},
},
@ -556,11 +738,11 @@ func TestClosest(t *testing.T) {
candidate: NewTestBitMask(0, 3),
expected: "current",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 11, 12, 12},
{11, 10, 12, 12},
{12, 12, 10, 11},
{12, 12, 11, 10},
NUMADistances: NUMADistances{
0: {10, 11, 12, 12},
1: {11, 10, 12, 12},
2: {12, 12, 10, 11},
3: {12, 12, 11, 10},
},
},
},

View File

@ -28,13 +28,8 @@ var _ Policy = &bestEffortPolicy{}
const PolicyBestEffort string = "best-effort"
// NewBestEffortPolicy returns best-effort policy.
func NewBestEffortPolicy(numaInfo *NUMAInfo, topologyPolicyOptions map[string]string) (Policy, error) {
opts, err := NewPolicyOptions(topologyPolicyOptions)
if err != nil {
return nil, err
}
return &bestEffortPolicy{numaInfo: numaInfo, opts: opts}, nil
func NewBestEffortPolicy(numaInfo *NUMAInfo, opts PolicyOptions) Policy {
return &bestEffortPolicy{numaInfo: numaInfo, opts: opts}
}
func (p *bestEffortPolicy) Name() string {

View File

@ -26,13 +26,8 @@ var _ Policy = &restrictedPolicy{}
const PolicyRestricted string = "restricted"
// NewRestrictedPolicy returns restricted policy.
func NewRestrictedPolicy(numaInfo *NUMAInfo, topologyPolicyOptions map[string]string) (Policy, error) {
opts, err := NewPolicyOptions(topologyPolicyOptions)
if err != nil {
return nil, err
}
return &restrictedPolicy{bestEffortPolicy{numaInfo: numaInfo, opts: opts}}, nil
func NewRestrictedPolicy(numaInfo *NUMAInfo, opts PolicyOptions) Policy {
return &restrictedPolicy{bestEffortPolicy{numaInfo: numaInfo, opts: opts}}
}
func (p *restrictedPolicy) Name() string {

View File

@ -28,13 +28,8 @@ var _ Policy = &singleNumaNodePolicy{}
const PolicySingleNumaNode string = "single-numa-node"
// NewSingleNumaNodePolicy returns single-numa-node policy.
func NewSingleNumaNodePolicy(numaInfo *NUMAInfo, topologyPolicyOptions map[string]string) (Policy, error) {
opts, err := NewPolicyOptions(topologyPolicyOptions)
if err != nil {
return nil, err
}
return &singleNumaNodePolicy{numaInfo: numaInfo, opts: opts}, nil
func NewSingleNumaNodePolicy(numaInfo *NUMAInfo, opts PolicyOptions) Policy {
return &singleNumaNodePolicy{numaInfo: numaInfo, opts: opts}
}
func (p *singleNumaNodePolicy) Name() string {

View File

@ -1575,8 +1575,8 @@ func commonNUMAInfoTwoNodes() *NUMAInfo {
return &NUMAInfo{
Nodes: []int{0, 1},
NUMADistances: NUMADistances{
{10, 11},
{11, 10},
0: {10, 11},
1: {11, 10},
},
}
}
@ -1585,10 +1585,10 @@ func commonNUMAInfoFourNodes() *NUMAInfo {
return &NUMAInfo{
Nodes: []int{0, 1, 2, 3},
NUMADistances: NUMADistances{
{10, 11, 12, 12},
{11, 10, 12, 12},
{12, 12, 10, 11},
{12, 12, 11, 10},
0: {10, 11, 12, 12},
1: {11, 10, 12, 12},
2: {12, 12, 10, 11},
3: {12, 12, 11, 10},
},
}
}
@ -1597,14 +1597,14 @@ func commonNUMAInfoEightNodes() *NUMAInfo {
return &NUMAInfo{
Nodes: []int{0, 1, 2, 3, 4, 5, 6, 7},
NUMADistances: NUMADistances{
{10, 11, 12, 12, 30, 30, 30, 30},
{11, 10, 12, 12, 30, 30, 30, 30},
{12, 12, 10, 11, 30, 30, 30, 30},
{12, 12, 11, 10, 30, 30, 30, 30},
{30, 30, 30, 30, 10, 11, 12, 12},
{30, 30, 30, 30, 11, 10, 12, 12},
{30, 30, 30, 30, 12, 12, 10, 11},
{30, 30, 30, 30, 12, 12, 13, 10},
0: {10, 11, 12, 12, 30, 30, 30, 30},
1: {11, 10, 12, 12, 30, 30, 30, 30},
2: {12, 12, 10, 11, 30, 30, 30, 30},
3: {12, 12, 11, 10, 30, 30, 30, 30},
4: {30, 30, 30, 30, 10, 11, 12, 12},
5: {30, 30, 30, 30, 11, 10, 12, 12},
6: {30, 30, 30, 30, 12, 12, 10, 11},
7: {30, 30, 30, 30, 12, 12, 13, 10},
},
}
}

View File

@ -133,7 +133,12 @@ var _ Manager = &manager{}
func NewManager(topology []cadvisorapi.Node, topologyPolicyName string, topologyScopeName string, topologyPolicyOptions map[string]string) (Manager, error) {
klog.InfoS("Creating topology manager with policy per scope", "topologyPolicyName", topologyPolicyName, "topologyScopeName", topologyScopeName)
numaInfo, err := NewNUMAInfo(topology)
opts, err := NewPolicyOptions(topologyPolicyOptions)
if err != nil {
return nil, err
}
numaInfo, err := NewNUMAInfo(topology, opts)
if err != nil {
return nil, fmt.Errorf("cannot discover NUMA topology: %w", err)
}
@ -149,22 +154,13 @@ func NewManager(topology []cadvisorapi.Node, topologyPolicyName string, topology
policy = NewNonePolicy()
case PolicyBestEffort:
policy, err = NewBestEffortPolicy(numaInfo, topologyPolicyOptions)
if err != nil {
return nil, err
}
policy = NewBestEffortPolicy(numaInfo, opts)
case PolicyRestricted:
policy, err = NewRestrictedPolicy(numaInfo, topologyPolicyOptions)
if err != nil {
return nil, err
}
policy = NewRestrictedPolicy(numaInfo, opts)
case PolicySingleNumaNode:
policy, err = NewSingleNumaNodePolicy(numaInfo, topologyPolicyOptions)
if err != nil {
return nil, err
}
policy = NewSingleNumaNodePolicy(numaInfo, opts)
default:
return nil, fmt.Errorf("unknown policy: \"%s\"", topologyPolicyName)

View File

@ -206,15 +206,15 @@ func TestAdmit(t *testing.T) {
numaInfo := &NUMAInfo{
Nodes: []int{0, 1},
NUMADistances: NUMADistances{
{10, 11},
{11, 10},
0: {10, 11},
1: {11, 10},
},
}
opts := map[string]string{}
bePolicy, _ := NewBestEffortPolicy(numaInfo, opts)
restrictedPolicy, _ := NewRestrictedPolicy(numaInfo, opts)
singleNumaPolicy, _ := NewSingleNumaNodePolicy(numaInfo, opts)
opts := PolicyOptions{}
bePolicy := NewBestEffortPolicy(numaInfo, opts)
restrictedPolicy := NewRestrictedPolicy(numaInfo, opts)
singleNumaPolicy := NewSingleNumaNodePolicy(numaInfo, opts)
tcases := []struct {
name string