Improved multi-numa alignment in Topology Manager: add NUMAInfo

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
2025-07-29 14:37:00 +00:00 · 2022-10-27 10:37:20 +02:00 · 2022-10-27 10:37:20 +02:00 · 58ef3f202a
commit 58ef3f202a
parent daee219210
4 changed files with 819 additions and 3 deletions
--- a/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go
+++ b/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go
@ -35,6 +35,8 @@ type BitMask interface {
 	IsSet(bit int) bool
 	AnySet(bits []int) bool
 	IsNarrowerThan(mask BitMask) bool
 	IsLessThan(mask BitMask) bool
 	IsGreaterThan(mask BitMask) bool
 	String() string
 	Count() int
 	GetBits() []int
@ -143,13 +145,21 @@ func (s *bitMask) IsEqual(mask BitMask) bool {
 // lower-numbered bits set wins out.
 func (s *bitMask) IsNarrowerThan(mask BitMask) bool {
 	if s.Count() == mask.Count() {
-		if *s < *mask.(*bitMask) {
+		return s.IsLessThan(mask)
 			return true
 		}
 	}
 	return s.Count() < mask.Count()
 }
 // IsLessThan checks which bitmask has more lower-numbered bits set.
 func (s *bitMask) IsLessThan(mask BitMask) bool {
 	return *s < *mask.(*bitMask)
 }
 // IsGreaterThan checks which bitmask has more higher-numbered bits set.
 func (s *bitMask) IsGreaterThan(mask BitMask) bool {
 	return *s > *mask.(*bitMask)
 }
 // String converts mask to string
 func (s *bitMask) String() string {
 	grouping := 2
--- a/pkg/kubelet/cm/topologymanager/bitmask/bitmask_test.go
+++ b/pkg/kubelet/cm/topologymanager/bitmask/bitmask_test.go
@ -630,3 +630,75 @@ func TestIterateBitMasks(t *testing.T) {
 		}
 	}
 }
 func TestIsLessThan(t *testing.T) {
 	tcases := []struct {
 		name               string
 		firstMask          []int
 		secondMask         []int
 		expectedFirstLower bool
 	}{
 		{
 			name:               "Check which value is lower of masks with equal bits set 1/1",
 			firstMask:          []int{0},
 			secondMask:         []int{0},
 			expectedFirstLower: false,
 		},
 		{
 			name:               "Check which value is lower of masks with unequal bits set 2/1",
 			firstMask:          []int{1},
 			secondMask:         []int{0},
 			expectedFirstLower: false,
 		},
 		{
 			name:               "Check which value is lower of masks with unequal bits set 1/2",
 			firstMask:          []int{0},
 			secondMask:         []int{1},
 			expectedFirstLower: true,
 		},
 	}
 	for _, tc := range tcases {
 		firstMask, _ := NewBitMask(tc.firstMask...)
 		secondMask, _ := NewBitMask(tc.secondMask...)
 		expectedFirstLower := firstMask.IsLessThan(secondMask)
 		if expectedFirstLower != tc.expectedFirstLower {
 			t.Errorf("Expected value to be %v, got %v", tc.expectedFirstLower, expectedFirstLower)
 		}
 	}
 }
 func TestIsGreaterThan(t *testing.T) {
 	tcases := []struct {
 		name                 string
 		firstMask            []int
 		secondMask           []int
 		expectedFirstGreater bool
 	}{
 		{
 			name:                 "Check which value is greater of masks with equal bits set 1/1",
 			firstMask:            []int{0},
 			secondMask:           []int{0},
 			expectedFirstGreater: false,
 		},
 		{
 			name:                 "Check which value is greater of masks with unequal bits set 2/1",
 			firstMask:            []int{1},
 			secondMask:           []int{0},
 			expectedFirstGreater: true,
 		},
 		{
 			name:                 "Check which value is greater of masks with unequal bits set 1/2",
 			firstMask:            []int{0},
 			secondMask:           []int{1},
 			expectedFirstGreater: false,
 		},
 	}
 	for _, tc := range tcases {
 		firstMask, _ := NewBitMask(tc.firstMask...)
 		secondMask, _ := NewBitMask(tc.secondMask...)
 		expectedFirstGreater := firstMask.IsGreaterThan(secondMask)
 		if expectedFirstGreater != tc.expectedFirstGreater {
 			t.Errorf("Expected value to be %v, got %v", tc.expectedFirstGreater, expectedFirstGreater)
 		}
 	}
 }
--- a/pkg/kubelet/cm/topologymanager/numa_info.go
+++ b/pkg/kubelet/cm/topologymanager/numa_info.go
@ -0,0 +1,150 @@
 /*
 Copyright 2022 The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package topologymanager
 import (
 	"fmt"
 	"io/ioutil"
 	"strconv"
 	"strings"
 	cadvisorapi "github.com/google/cadvisor/info/v1"
 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
 )
 const (
 	defaultNodeDir = "/sys/devices/system/node/"
 )
 type NUMADistances [][]uint64
 type NUMAInfo struct {
 	Nodes         []int
 	NUMADistances NUMADistances
 }
 type NUMASysFs struct {
 	nodeDir string
 }
 func NewNUMAInfo(topology []cadvisorapi.Node) (*NUMAInfo, error) {
 	return newNUMAInfo(topology, &NUMASysFs{nodeDir: defaultNodeDir})
 }
 func newNUMAInfo(topology []cadvisorapi.Node, sysFs *NUMASysFs) (*NUMAInfo, error) {
 	var numaNodes []int
 	distances := make([][]uint64, len(topology))
 	for _, node := range topology {
 		numaNodes = append(numaNodes, node.Id)
 		// Populate the NUMA distances
 		// For now we need to retrieve this information from sysfs.
 		// TODO: Update this as follows once a version of cadvisor with this commit is vendored in https://github.com/google/cadvisor/commit/24dd1de08a72cfee661f6178454db995900c0fee
 		//	distances[node.Id] = node.Distances[:]
 		nodeDistance, err := sysFs.GetDistances(node.Id)
 		if err != nil {
 			return nil, fmt.Errorf("error getting NUMA distances from sysfs: %w", err)
 		}
 		distances[node.Id] = nodeDistance
 	}
 	numaInfo := &NUMAInfo{
 		Nodes:         numaNodes,
 		NUMADistances: distances,
 	}
 	return numaInfo, nil
 }
 func (n *NUMAInfo) Narrowest(m1 bitmask.BitMask, m2 bitmask.BitMask) bitmask.BitMask {
 	if m1.IsNarrowerThan(m2) {
 		return m1
 	}
 	return m2
 }
 func (n *NUMAInfo) Closest(m1 bitmask.BitMask, m2 bitmask.BitMask) bitmask.BitMask {
 	// If the length of both bitmasks aren't the same, choose the one that is narrowest.
 	if m1.Count() != m2.Count() {
 		return n.Narrowest(m1, m2)
 	}
 	m1Distance := n.NUMADistances.CalculateAverageFor(m1)
 	m2Distance := n.NUMADistances.CalculateAverageFor(m2)
 	// If average distance is the same, take bitmask with more lower-number bits set.
 	if m1Distance == m2Distance {
 		if m1.IsLessThan(m2) {
 			return m1
 		}
 		return m2
 	}
 	// Otherwise, return the bitmask with the shortest average distance between NUMA nodes.
 	if m1Distance < m2Distance {
 		return m1
 	}
 	return m2
 }
 func (n NUMAInfo) DefaultAffinityMask() bitmask.BitMask {
 	defaultAffinity, _ := bitmask.NewBitMask(n.Nodes...)
 	return defaultAffinity
 }
 func (d NUMADistances) CalculateAverageFor(bm bitmask.BitMask) float64 {
 	// This should never happen, but just in case make sure we do not divide by zero.
 	if bm.Count() == 0 {
 		return 0
 	}
 	var count float64 = 0
 	var sum float64 = 0
 	for _, node1 := range bm.GetBits() {
 		for _, node2 := range bm.GetBits() {
 			sum += float64(d[node1][node2])
 			count++
 		}
 	}
 	return sum / count
 }
 func (s NUMASysFs) GetDistances(nodeId int) ([]uint64, error) {
 	distancePath := fmt.Sprintf("%s/node%d/distance", s.nodeDir, nodeId)
 	distance, err := ioutil.ReadFile(distancePath)
 	if err != nil {
 		return nil, fmt.Errorf("problem reading %s: %w", distancePath, err)
 	}
 	rawDistances := strings.TrimSpace(string(distance))
 	return splitDistances(rawDistances)
 }
 func splitDistances(rawDistances string) ([]uint64, error) {
 	distances := []uint64{}
 	for _, distance := range strings.Split(rawDistances, " ") {
 		distanceUint, err := strconv.ParseUint(distance, 10, 64)
 		if err != nil {
 			return nil, fmt.Errorf("cannot convert %s to int", distance)
 		}
 		distances = append(distances, distanceUint)
 	}
 	return distances, nil
 }
--- a/pkg/kubelet/cm/topologymanager/numa_info_test.go
+++ b/pkg/kubelet/cm/topologymanager/numa_info_test.go
@ -0,0 +1,584 @@
 /*
 Copyright 2022 The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package topologymanager
 import (
 	"bytes"
 	"fmt"
 	"os"
 	"path/filepath"
 	"reflect"
 	"strconv"
 	"strings"
 	"testing"
 	cadvisorapi "github.com/google/cadvisor/info/v1"
 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
 )
 func TestNUMAInfo(t *testing.T) {
 	tcases := []struct {
 		name             string
 		topology         []cadvisorapi.Node
 		expectedNUMAInfo *NUMAInfo
 		expectedErr      error
 	}{
 		{
 			name: "positive test 1 node",
 			topology: []cadvisorapi.Node{
 				{
 					Id: 0,
 				},
 			},
 			expectedNUMAInfo: &NUMAInfo{
 				Nodes: []int{0},
 				NUMADistances: NUMADistances{
 					{
 						10,
 						11,
 						12,
 						12,
 					},
 				},
 			},
 		},
 		{
 			name: "positive test 2 nodes",
 			topology: []cadvisorapi.Node{
 				{
 					Id: 0,
 				},
 				{
 					Id: 1,
 				},
 			},
 			expectedNUMAInfo: &NUMAInfo{
 				Nodes: []int{0, 1},
 				NUMADistances: NUMADistances{
 					{
 						10,
 						11,
 						12,
 						12,
 					},
 					{
 						11,
 						10,
 						12,
 						12,
 					},
 				},
 			},
 		},
 		{
 			name: "positive test 3 nodes",
 			topology: []cadvisorapi.Node{
 				{
 					Id: 0,
 				},
 				{
 					Id: 1,
 				},
 				{
 					Id: 2,
 				},
 			},
 			expectedNUMAInfo: &NUMAInfo{
 				Nodes: []int{0, 1, 2},
 				NUMADistances: NUMADistances{
 					{
 						10,
 						11,
 						12,
 						12,
 					},
 					{
 						11,
 						10,
 						12,
 						12,
 					},
 					{
 						12,
 						12,
 						10,
 						11,
 					},
 				},
 			},
 		},
 		{
 			name: "positive test 4 nodes",
 			topology: []cadvisorapi.Node{
 				{
 					Id: 0,
 				},
 				{
 					Id: 1,
 				},
 				{
 					Id: 2,
 				},
 				{
 					Id: 3,
 				},
 			},
 			expectedNUMAInfo: &NUMAInfo{
 				Nodes: []int{0, 1, 2, 3},
 				NUMADistances: NUMADistances{
 					{
 						10,
 						11,
 						12,
 						12,
 					},
 					{
 						11,
 						10,
 						12,
 						12,
 					},
 					{
 						12,
 						12,
 						10,
 						11,
 					},
 					{
 						12,
 						12,
 						11,
 						10,
 					},
 				},
 			},
 		},
 		{
 			name: "negative test 1 node",
 			topology: []cadvisorapi.Node{
 				{
 					Id: 9,
 				},
 			},
 			expectedNUMAInfo: nil,
 			expectedErr:      fmt.Errorf("no such file or directory"),
 		},
 	}
 	nodeDir, err := os.MkdirTemp("", "TestNUMAInfo")
 	if err != nil {
 		t.Fatalf("Unable to create temporary directory: %v", err)
 	}
 	defer os.RemoveAll(nodeDir)
 	numaDistances := map[int]string{
 		0: "10 11 12 12",
 		1: "11 10 12 12",
 		2: "12 12 10 11",
 		3: "12 12 11 10",
 	}
 	for i, distances := range numaDistances {
 		numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", i))
 		if err := os.Mkdir(numaDir, 0700); err != nil {
 			t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
 		}
 		distanceFile := filepath.Join(numaDir, "distance")
 		if err = os.WriteFile(distanceFile, []byte(distances), 0644); err != nil {
 			t.Fatalf("Unable to create test distanceFile: %v", err)
 		}
 	}
 	// stub sysFs to read from temp dir
 	sysFs := &NUMASysFs{nodeDir: nodeDir}
 	for _, tcase := range tcases {
 		topology, err := newNUMAInfo(tcase.topology, sysFs)
 		if tcase.expectedErr == nil && err != nil {
 			t.Fatalf("Expected err to equal nil, not %v", err)
 		} else if tcase.expectedErr != nil && err == nil {
 			t.Fatalf("Expected err to equal %v, not nil", tcase.expectedErr)
 		} else if tcase.expectedErr != nil {
 			if !strings.Contains(err.Error(), tcase.expectedErr.Error()) {
 				t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tcase.expectedErr.Error())
 			}
 		}
 		if !reflect.DeepEqual(topology, tcase.expectedNUMAInfo) {
 			t.Fatalf("Expected topology to equal %v, not %v", tcase.expectedNUMAInfo, topology)
 		}
 	}
 }
 func TestCalculateAvgDistanceFor(t *testing.T) {
 	tcases := []struct {
 		name        string
 		bm          []int
 		distance    [][]uint64
 		expectedAvg float64
 	}{
 		{
 			name: "1 NUMA node",
 			bm: []int{
 				0,
 			},
 			distance: [][]uint64{
 				{
 					10,
 				},
 			},
 			expectedAvg: 10,
 		},
 		{
 			name: "2 NUMA node, 1 set in bitmask",
 			bm: []int{
 				0,
 			},
 			distance: [][]uint64{
 				{
 					10,
 					11,
 				},
 				{
 					11,
 					10,
 				},
 			},
 			expectedAvg: 10,
 		},
 		{
 			name: "2 NUMA node, 2 set in bitmask",
 			bm: []int{
 				0,
 				1,
 			},
 			distance: [][]uint64{
 				{
 					10,
 					11,
 				},
 				{
 					11,
 					10,
 				},
 			},
 			expectedAvg: 10.5,
 		},
 		{
 			name: "4 NUMA node, 2 set in bitmask",
 			bm: []int{
 				0,
 				2,
 			},
 			distance: [][]uint64{
 				{
 					10,
 					11,
 					12,
 					12,
 				},
 				{
 					11,
 					10,
 					12,
 					12,
 				},
 				{
 					12,
 					12,
 					10,
 					11,
 				},
 				{
 					12,
 					12,
 					11,
 					10,
 				},
 			},
 			expectedAvg: 11,
 		},
 		{
 			name: "4 NUMA node, 3 set in bitmask",
 			bm: []int{
 				0,
 				2,
 				3,
 			},
 			distance: [][]uint64{
 				{
 					10,
 					11,
 					12,
 					12,
 				},
 				{
 					11,
 					10,
 					12,
 					12,
 				},
 				{
 					12,
 					12,
 					10,
 					11,
 				},
 				{
 					12,
 					12,
 					11,
 					10,
 				},
 			},
 			expectedAvg: 11.11111111111111,
 		},
 		{
 			name:        "0 NUMA node, 0 set in bitmask",
 			bm:          []int{},
 			distance:    [][]uint64{},
 			expectedAvg: 0,
 		},
 	}
 	for _, tcase := range tcases {
 		bm, err := bitmask.NewBitMask(tcase.bm...)
 		if err != nil {
 			t.Errorf("no error expected got %v", err)
 		}
 		numaInfo := NUMAInfo{
 			Nodes:         tcase.bm,
 			NUMADistances: tcase.distance,
 		}
 		result := numaInfo.NUMADistances.CalculateAverageFor(bm)
 		if result != tcase.expectedAvg {
 			t.Errorf("Expected result to equal %g, not %g", tcase.expectedAvg, result)
 		}
 	}
 }
 func TestGetDistances(t *testing.T) {
 	testCases := []struct {
 		name              string
 		expectedErr       bool
 		expectedDistances []uint64
 		nodeId            int
 		nodeExists        bool
 	}{
 		{
 			name:              "reading proper distance file",
 			expectedErr:       false,
 			expectedDistances: []uint64{10, 11, 12, 13},
 			nodeId:            0,
 			nodeExists:        true,
 		},
 		{
 			name:              "no distance file",
 			expectedErr:       true,
 			expectedDistances: nil,
 			nodeId:            99,
 		},
 	}
 	for _, tcase := range testCases {
 		t.Run(tcase.name, func(t *testing.T) {
 			var err error
 			nodeDir, err := os.MkdirTemp("", "TestGetDistances")
 			if err != nil {
 				t.Fatalf("Unable to create temporary directory: %v", err)
 			}
 			defer os.RemoveAll(nodeDir)
 			if tcase.nodeExists {
 				numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", tcase.nodeId))
 				if err := os.Mkdir(numaDir, 0700); err != nil {
 					t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
 				}
 				distanceFile := filepath.Join(numaDir, "distance")
 				var buffer bytes.Buffer
 				for i, distance := range tcase.expectedDistances {
 					buffer.WriteString(strconv.Itoa(int(distance)))
 					if i != len(tcase.expectedDistances)-1 {
 						buffer.WriteString(" ")
 					}
 				}
 				if err = os.WriteFile(distanceFile, buffer.Bytes(), 0644); err != nil {
 					t.Fatalf("Unable to create test distanceFile: %v", err)
 				}
 			}
 			sysFs := &NUMASysFs{nodeDir: nodeDir}
 			distances, err := sysFs.GetDistances(tcase.nodeId)
 			if !tcase.expectedErr && err != nil {
 				t.Fatalf("Expected err to equal nil, not %v", err)
 			} else if tcase.expectedErr && err == nil {
 				t.Fatalf("Expected err to equal %v, not nil", tcase.expectedErr)
 			}
 			if !tcase.expectedErr && !reflect.DeepEqual(distances, tcase.expectedDistances) {
 				t.Fatalf("Expected distances to equal %v, not %v", tcase.expectedDistances, distances)
 			}
 		})
 	}
 }
 func TestSplitDistances(t *testing.T) {
 	tcases := []struct {
 		description  string
 		rawDistances string
 		expected     []uint64
 		expectedErr  error
 	}{
 		{
 			description:  "read one distance",
 			rawDistances: "10",
 			expected:     []uint64{10},
 			expectedErr:  nil,
 		},
 		{
 			description:  "read two distances",
 			rawDistances: "10 20",
 			expected:     []uint64{10, 20},
 			expectedErr:  nil,
 		},
 		{
 			description:  "can't convert negative number to uint64",
 			rawDistances: "10 -20",
 			expected:     nil,
 			expectedErr:  fmt.Errorf("cannot conver"),
 		},
 	}
 	for _, tc := range tcases {
 		result, err := splitDistances(tc.rawDistances)
 		if tc.expectedErr == nil && err != nil {
 			t.Fatalf("Expected err to equal nil, not %v", err)
 		} else if tc.expectedErr != nil && err == nil {
 			t.Fatalf("Expected err to equal %v, not nil", tc.expectedErr)
 		} else if tc.expectedErr != nil {
 			if !strings.Contains(err.Error(), tc.expectedErr.Error()) {
 				t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tc.expectedErr.Error())
 			}
 		}
 		if !reflect.DeepEqual(tc.expected, result) {
 			t.Fatalf("Expected distances to equal: %v, got: %v", tc.expected, result)
 		}
 	}
 }
 func TestClosest(t *testing.T) {
 	tcases := []struct {
 		description string
 		current     bitmask.BitMask
 		candidate   bitmask.BitMask
 		expected    string
 		numaInfo    *NUMAInfo
 	}{
 		{
 			description: "current and candidate length is not the same, current narrower",
 			current:     NewTestBitMask(0),
 			candidate:   NewTestBitMask(0, 2),
 			expected:    "current",
 			numaInfo:    &NUMAInfo{},
 		},
 		{
 			description: "current and candidate length is the same, distance is the same, current more lower bits set",
 			current:     NewTestBitMask(0, 1),
 			candidate:   NewTestBitMask(0, 2),
 			expected:    "current",
 			numaInfo: &NUMAInfo{
 				NUMADistances: [][]uint64{
 					{10, 10, 10},
 					{10, 10, 10},
 					{10, 10, 10},
 				},
 			},
 		},
 		{
 			description: "current and candidate length is the same, distance is the same, candidate more lower bits set",
 			current:     NewTestBitMask(0, 3),
 			candidate:   NewTestBitMask(0, 2),
 			expected:    "candidate",
 			numaInfo: &NUMAInfo{
 				NUMADistances: [][]uint64{
 					{10, 10, 10, 10},
 					{10, 10, 10, 10},
 					{10, 10, 10, 10},
 					{10, 10, 10, 10},
 				},
 			},
 		},
 		{
 			description: "current and candidate length is the same, candidate average distance is smaller",
 			current:     NewTestBitMask(0, 3),
 			candidate:   NewTestBitMask(0, 1),
 			expected:    "candidate",
 			numaInfo: &NUMAInfo{
 				NUMADistances: [][]uint64{
 					{10, 11, 12, 12},
 					{11, 10, 12, 12},
 					{12, 12, 10, 11},
 					{12, 12, 11, 10},
 				},
 			},
 		},
 		{
 			description: "current and candidate length is the same, current average distance is smaller",
 			current:     NewTestBitMask(2, 3),
 			candidate:   NewTestBitMask(0, 3),
 			expected:    "current",
 			numaInfo: &NUMAInfo{
 				NUMADistances: [][]uint64{
 					{10, 11, 12, 12},
 					{11, 10, 12, 12},
 					{12, 12, 10, 11},
 					{12, 12, 11, 10},
 				},
 			},
 		},
 	}
 	for _, tc := range tcases {
 		t.Run(tc.description, func(t *testing.T) {
 			result := tc.numaInfo.Closest(tc.candidate, tc.current)
 			if result != tc.current && result != tc.candidate {
 				t.Errorf("Expected result to be either 'current' or 'candidate' hint")
 			}
 			if tc.expected == "current" && result != tc.current {
 				t.Errorf("Expected result to be %v, got %v", tc.current, result)
 			}
 			if tc.expected == "candidate" && result != tc.candidate {
 				t.Errorf("Expected result to be %v, got %v", tc.candidate, result)
 			}
 		})
 	}
 }