Improved multi-numa alignment in Topology Manager: add NUMAInfo

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
This commit is contained in:
PiotrProkop 2022-10-27 10:37:20 +02:00
parent daee219210
commit 58ef3f202a
4 changed files with 819 additions and 3 deletions

View File

@ -35,6 +35,8 @@ type BitMask interface {
IsSet(bit int) bool
AnySet(bits []int) bool
IsNarrowerThan(mask BitMask) bool
IsLessThan(mask BitMask) bool
IsGreaterThan(mask BitMask) bool
String() string
Count() int
GetBits() []int
@ -143,13 +145,21 @@ func (s *bitMask) IsEqual(mask BitMask) bool {
// lower-numbered bits set wins out.
func (s *bitMask) IsNarrowerThan(mask BitMask) bool {
if s.Count() == mask.Count() {
if *s < *mask.(*bitMask) {
return true
}
return s.IsLessThan(mask)
}
return s.Count() < mask.Count()
}
// IsLessThan checks which bitmask has more lower-numbered bits set.
func (s *bitMask) IsLessThan(mask BitMask) bool {
return *s < *mask.(*bitMask)
}
// IsGreaterThan checks which bitmask has more higher-numbered bits set.
func (s *bitMask) IsGreaterThan(mask BitMask) bool {
return *s > *mask.(*bitMask)
}
// String converts mask to string
func (s *bitMask) String() string {
grouping := 2

View File

@ -630,3 +630,75 @@ func TestIterateBitMasks(t *testing.T) {
}
}
}
func TestIsLessThan(t *testing.T) {
tcases := []struct {
name string
firstMask []int
secondMask []int
expectedFirstLower bool
}{
{
name: "Check which value is lower of masks with equal bits set 1/1",
firstMask: []int{0},
secondMask: []int{0},
expectedFirstLower: false,
},
{
name: "Check which value is lower of masks with unequal bits set 2/1",
firstMask: []int{1},
secondMask: []int{0},
expectedFirstLower: false,
},
{
name: "Check which value is lower of masks with unequal bits set 1/2",
firstMask: []int{0},
secondMask: []int{1},
expectedFirstLower: true,
},
}
for _, tc := range tcases {
firstMask, _ := NewBitMask(tc.firstMask...)
secondMask, _ := NewBitMask(tc.secondMask...)
expectedFirstLower := firstMask.IsLessThan(secondMask)
if expectedFirstLower != tc.expectedFirstLower {
t.Errorf("Expected value to be %v, got %v", tc.expectedFirstLower, expectedFirstLower)
}
}
}
func TestIsGreaterThan(t *testing.T) {
tcases := []struct {
name string
firstMask []int
secondMask []int
expectedFirstGreater bool
}{
{
name: "Check which value is greater of masks with equal bits set 1/1",
firstMask: []int{0},
secondMask: []int{0},
expectedFirstGreater: false,
},
{
name: "Check which value is greater of masks with unequal bits set 2/1",
firstMask: []int{1},
secondMask: []int{0},
expectedFirstGreater: true,
},
{
name: "Check which value is greater of masks with unequal bits set 1/2",
firstMask: []int{0},
secondMask: []int{1},
expectedFirstGreater: false,
},
}
for _, tc := range tcases {
firstMask, _ := NewBitMask(tc.firstMask...)
secondMask, _ := NewBitMask(tc.secondMask...)
expectedFirstGreater := firstMask.IsGreaterThan(secondMask)
if expectedFirstGreater != tc.expectedFirstGreater {
t.Errorf("Expected value to be %v, got %v", tc.expectedFirstGreater, expectedFirstGreater)
}
}
}

View File

@ -0,0 +1,150 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package topologymanager
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
)
const (
defaultNodeDir = "/sys/devices/system/node/"
)
type NUMADistances [][]uint64
type NUMAInfo struct {
Nodes []int
NUMADistances NUMADistances
}
type NUMASysFs struct {
nodeDir string
}
func NewNUMAInfo(topology []cadvisorapi.Node) (*NUMAInfo, error) {
return newNUMAInfo(topology, &NUMASysFs{nodeDir: defaultNodeDir})
}
func newNUMAInfo(topology []cadvisorapi.Node, sysFs *NUMASysFs) (*NUMAInfo, error) {
var numaNodes []int
distances := make([][]uint64, len(topology))
for _, node := range topology {
numaNodes = append(numaNodes, node.Id)
// Populate the NUMA distances
// For now we need to retrieve this information from sysfs.
// TODO: Update this as follows once a version of cadvisor with this commit is vendored in https://github.com/google/cadvisor/commit/24dd1de08a72cfee661f6178454db995900c0fee
// distances[node.Id] = node.Distances[:]
nodeDistance, err := sysFs.GetDistances(node.Id)
if err != nil {
return nil, fmt.Errorf("error getting NUMA distances from sysfs: %w", err)
}
distances[node.Id] = nodeDistance
}
numaInfo := &NUMAInfo{
Nodes: numaNodes,
NUMADistances: distances,
}
return numaInfo, nil
}
func (n *NUMAInfo) Narrowest(m1 bitmask.BitMask, m2 bitmask.BitMask) bitmask.BitMask {
if m1.IsNarrowerThan(m2) {
return m1
}
return m2
}
func (n *NUMAInfo) Closest(m1 bitmask.BitMask, m2 bitmask.BitMask) bitmask.BitMask {
// If the length of both bitmasks aren't the same, choose the one that is narrowest.
if m1.Count() != m2.Count() {
return n.Narrowest(m1, m2)
}
m1Distance := n.NUMADistances.CalculateAverageFor(m1)
m2Distance := n.NUMADistances.CalculateAverageFor(m2)
// If average distance is the same, take bitmask with more lower-number bits set.
if m1Distance == m2Distance {
if m1.IsLessThan(m2) {
return m1
}
return m2
}
// Otherwise, return the bitmask with the shortest average distance between NUMA nodes.
if m1Distance < m2Distance {
return m1
}
return m2
}
func (n NUMAInfo) DefaultAffinityMask() bitmask.BitMask {
defaultAffinity, _ := bitmask.NewBitMask(n.Nodes...)
return defaultAffinity
}
func (d NUMADistances) CalculateAverageFor(bm bitmask.BitMask) float64 {
// This should never happen, but just in case make sure we do not divide by zero.
if bm.Count() == 0 {
return 0
}
var count float64 = 0
var sum float64 = 0
for _, node1 := range bm.GetBits() {
for _, node2 := range bm.GetBits() {
sum += float64(d[node1][node2])
count++
}
}
return sum / count
}
func (s NUMASysFs) GetDistances(nodeId int) ([]uint64, error) {
distancePath := fmt.Sprintf("%s/node%d/distance", s.nodeDir, nodeId)
distance, err := ioutil.ReadFile(distancePath)
if err != nil {
return nil, fmt.Errorf("problem reading %s: %w", distancePath, err)
}
rawDistances := strings.TrimSpace(string(distance))
return splitDistances(rawDistances)
}
func splitDistances(rawDistances string) ([]uint64, error) {
distances := []uint64{}
for _, distance := range strings.Split(rawDistances, " ") {
distanceUint, err := strconv.ParseUint(distance, 10, 64)
if err != nil {
return nil, fmt.Errorf("cannot convert %s to int", distance)
}
distances = append(distances, distanceUint)
}
return distances, nil
}

View File

@ -0,0 +1,584 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package topologymanager
import (
"bytes"
"fmt"
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"testing"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
)
func TestNUMAInfo(t *testing.T) {
tcases := []struct {
name string
topology []cadvisorapi.Node
expectedNUMAInfo *NUMAInfo
expectedErr error
}{
{
name: "positive test 1 node",
topology: []cadvisorapi.Node{
{
Id: 0,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0},
NUMADistances: NUMADistances{
{
10,
11,
12,
12,
},
},
},
},
{
name: "positive test 2 nodes",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 1,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1},
NUMADistances: NUMADistances{
{
10,
11,
12,
12,
},
{
11,
10,
12,
12,
},
},
},
},
{
name: "positive test 3 nodes",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 1,
},
{
Id: 2,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1, 2},
NUMADistances: NUMADistances{
{
10,
11,
12,
12,
},
{
11,
10,
12,
12,
},
{
12,
12,
10,
11,
},
},
},
},
{
name: "positive test 4 nodes",
topology: []cadvisorapi.Node{
{
Id: 0,
},
{
Id: 1,
},
{
Id: 2,
},
{
Id: 3,
},
},
expectedNUMAInfo: &NUMAInfo{
Nodes: []int{0, 1, 2, 3},
NUMADistances: NUMADistances{
{
10,
11,
12,
12,
},
{
11,
10,
12,
12,
},
{
12,
12,
10,
11,
},
{
12,
12,
11,
10,
},
},
},
},
{
name: "negative test 1 node",
topology: []cadvisorapi.Node{
{
Id: 9,
},
},
expectedNUMAInfo: nil,
expectedErr: fmt.Errorf("no such file or directory"),
},
}
nodeDir, err := os.MkdirTemp("", "TestNUMAInfo")
if err != nil {
t.Fatalf("Unable to create temporary directory: %v", err)
}
defer os.RemoveAll(nodeDir)
numaDistances := map[int]string{
0: "10 11 12 12",
1: "11 10 12 12",
2: "12 12 10 11",
3: "12 12 11 10",
}
for i, distances := range numaDistances {
numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", i))
if err := os.Mkdir(numaDir, 0700); err != nil {
t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
}
distanceFile := filepath.Join(numaDir, "distance")
if err = os.WriteFile(distanceFile, []byte(distances), 0644); err != nil {
t.Fatalf("Unable to create test distanceFile: %v", err)
}
}
// stub sysFs to read from temp dir
sysFs := &NUMASysFs{nodeDir: nodeDir}
for _, tcase := range tcases {
topology, err := newNUMAInfo(tcase.topology, sysFs)
if tcase.expectedErr == nil && err != nil {
t.Fatalf("Expected err to equal nil, not %v", err)
} else if tcase.expectedErr != nil && err == nil {
t.Fatalf("Expected err to equal %v, not nil", tcase.expectedErr)
} else if tcase.expectedErr != nil {
if !strings.Contains(err.Error(), tcase.expectedErr.Error()) {
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tcase.expectedErr.Error())
}
}
if !reflect.DeepEqual(topology, tcase.expectedNUMAInfo) {
t.Fatalf("Expected topology to equal %v, not %v", tcase.expectedNUMAInfo, topology)
}
}
}
func TestCalculateAvgDistanceFor(t *testing.T) {
tcases := []struct {
name string
bm []int
distance [][]uint64
expectedAvg float64
}{
{
name: "1 NUMA node",
bm: []int{
0,
},
distance: [][]uint64{
{
10,
},
},
expectedAvg: 10,
},
{
name: "2 NUMA node, 1 set in bitmask",
bm: []int{
0,
},
distance: [][]uint64{
{
10,
11,
},
{
11,
10,
},
},
expectedAvg: 10,
},
{
name: "2 NUMA node, 2 set in bitmask",
bm: []int{
0,
1,
},
distance: [][]uint64{
{
10,
11,
},
{
11,
10,
},
},
expectedAvg: 10.5,
},
{
name: "4 NUMA node, 2 set in bitmask",
bm: []int{
0,
2,
},
distance: [][]uint64{
{
10,
11,
12,
12,
},
{
11,
10,
12,
12,
},
{
12,
12,
10,
11,
},
{
12,
12,
11,
10,
},
},
expectedAvg: 11,
},
{
name: "4 NUMA node, 3 set in bitmask",
bm: []int{
0,
2,
3,
},
distance: [][]uint64{
{
10,
11,
12,
12,
},
{
11,
10,
12,
12,
},
{
12,
12,
10,
11,
},
{
12,
12,
11,
10,
},
},
expectedAvg: 11.11111111111111,
},
{
name: "0 NUMA node, 0 set in bitmask",
bm: []int{},
distance: [][]uint64{},
expectedAvg: 0,
},
}
for _, tcase := range tcases {
bm, err := bitmask.NewBitMask(tcase.bm...)
if err != nil {
t.Errorf("no error expected got %v", err)
}
numaInfo := NUMAInfo{
Nodes: tcase.bm,
NUMADistances: tcase.distance,
}
result := numaInfo.NUMADistances.CalculateAverageFor(bm)
if result != tcase.expectedAvg {
t.Errorf("Expected result to equal %g, not %g", tcase.expectedAvg, result)
}
}
}
func TestGetDistances(t *testing.T) {
testCases := []struct {
name string
expectedErr bool
expectedDistances []uint64
nodeId int
nodeExists bool
}{
{
name: "reading proper distance file",
expectedErr: false,
expectedDistances: []uint64{10, 11, 12, 13},
nodeId: 0,
nodeExists: true,
},
{
name: "no distance file",
expectedErr: true,
expectedDistances: nil,
nodeId: 99,
},
}
for _, tcase := range testCases {
t.Run(tcase.name, func(t *testing.T) {
var err error
nodeDir, err := os.MkdirTemp("", "TestGetDistances")
if err != nil {
t.Fatalf("Unable to create temporary directory: %v", err)
}
defer os.RemoveAll(nodeDir)
if tcase.nodeExists {
numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", tcase.nodeId))
if err := os.Mkdir(numaDir, 0700); err != nil {
t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
}
distanceFile := filepath.Join(numaDir, "distance")
var buffer bytes.Buffer
for i, distance := range tcase.expectedDistances {
buffer.WriteString(strconv.Itoa(int(distance)))
if i != len(tcase.expectedDistances)-1 {
buffer.WriteString(" ")
}
}
if err = os.WriteFile(distanceFile, buffer.Bytes(), 0644); err != nil {
t.Fatalf("Unable to create test distanceFile: %v", err)
}
}
sysFs := &NUMASysFs{nodeDir: nodeDir}
distances, err := sysFs.GetDistances(tcase.nodeId)
if !tcase.expectedErr && err != nil {
t.Fatalf("Expected err to equal nil, not %v", err)
} else if tcase.expectedErr && err == nil {
t.Fatalf("Expected err to equal %v, not nil", tcase.expectedErr)
}
if !tcase.expectedErr && !reflect.DeepEqual(distances, tcase.expectedDistances) {
t.Fatalf("Expected distances to equal %v, not %v", tcase.expectedDistances, distances)
}
})
}
}
func TestSplitDistances(t *testing.T) {
tcases := []struct {
description string
rawDistances string
expected []uint64
expectedErr error
}{
{
description: "read one distance",
rawDistances: "10",
expected: []uint64{10},
expectedErr: nil,
},
{
description: "read two distances",
rawDistances: "10 20",
expected: []uint64{10, 20},
expectedErr: nil,
},
{
description: "can't convert negative number to uint64",
rawDistances: "10 -20",
expected: nil,
expectedErr: fmt.Errorf("cannot conver"),
},
}
for _, tc := range tcases {
result, err := splitDistances(tc.rawDistances)
if tc.expectedErr == nil && err != nil {
t.Fatalf("Expected err to equal nil, not %v", err)
} else if tc.expectedErr != nil && err == nil {
t.Fatalf("Expected err to equal %v, not nil", tc.expectedErr)
} else if tc.expectedErr != nil {
if !strings.Contains(err.Error(), tc.expectedErr.Error()) {
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tc.expectedErr.Error())
}
}
if !reflect.DeepEqual(tc.expected, result) {
t.Fatalf("Expected distances to equal: %v, got: %v", tc.expected, result)
}
}
}
func TestClosest(t *testing.T) {
tcases := []struct {
description string
current bitmask.BitMask
candidate bitmask.BitMask
expected string
numaInfo *NUMAInfo
}{
{
description: "current and candidate length is not the same, current narrower",
current: NewTestBitMask(0),
candidate: NewTestBitMask(0, 2),
expected: "current",
numaInfo: &NUMAInfo{},
},
{
description: "current and candidate length is the same, distance is the same, current more lower bits set",
current: NewTestBitMask(0, 1),
candidate: NewTestBitMask(0, 2),
expected: "current",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 10, 10},
{10, 10, 10},
{10, 10, 10},
},
},
},
{
description: "current and candidate length is the same, distance is the same, candidate more lower bits set",
current: NewTestBitMask(0, 3),
candidate: NewTestBitMask(0, 2),
expected: "candidate",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 10, 10, 10},
{10, 10, 10, 10},
{10, 10, 10, 10},
{10, 10, 10, 10},
},
},
},
{
description: "current and candidate length is the same, candidate average distance is smaller",
current: NewTestBitMask(0, 3),
candidate: NewTestBitMask(0, 1),
expected: "candidate",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 11, 12, 12},
{11, 10, 12, 12},
{12, 12, 10, 11},
{12, 12, 11, 10},
},
},
},
{
description: "current and candidate length is the same, current average distance is smaller",
current: NewTestBitMask(2, 3),
candidate: NewTestBitMask(0, 3),
expected: "current",
numaInfo: &NUMAInfo{
NUMADistances: [][]uint64{
{10, 11, 12, 12},
{11, 10, 12, 12},
{12, 12, 10, 11},
{12, 12, 11, 10},
},
},
},
}
for _, tc := range tcases {
t.Run(tc.description, func(t *testing.T) {
result := tc.numaInfo.Closest(tc.candidate, tc.current)
if result != tc.current && result != tc.candidate {
t.Errorf("Expected result to be either 'current' or 'candidate' hint")
}
if tc.expected == "current" && result != tc.current {
t.Errorf("Expected result to be %v, got %v", tc.current, result)
}
if tc.expected == "candidate" && result != tc.candidate {
t.Errorf("Expected result to be %v, got %v", tc.candidate, result)
}
})
}
}