[topologymanager] rely on Cadvisor to calculate NUMA distance

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
This commit is contained in:
PiotrProkop 2022-11-09 17:43:48 +01:00
parent 315f0dc6f1
commit 540b5bd308
2 changed files with 95 additions and 200 deletions

View File

@ -18,18 +18,11 @@ package topologymanager
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
)
const (
defaultNodeDir = "/sys/devices/system/node/"
)
type NUMADistances map[int][]uint64
type NUMAInfo struct {
@ -37,30 +30,17 @@ type NUMAInfo struct {
NUMADistances NUMADistances
}
type NUMASysFs struct {
nodeDir string
}
func NewNUMAInfo(topology []cadvisorapi.Node, opts PolicyOptions) (*NUMAInfo, error) {
return newNUMAInfo(topology, &NUMASysFs{nodeDir: defaultNodeDir}, opts)
}
func newNUMAInfo(topology []cadvisorapi.Node, sysFs *NUMASysFs, opts PolicyOptions) (*NUMAInfo, error) {
var numaNodes []int
distances := map[int][]uint64{}
for _, node := range topology {
numaNodes = append(numaNodes, node.Id)
// Populate the NUMA distances
// For now we need to retrieve this information from sysfs.
// TODO: Update this as follows once a version of cadvisor with this commit is vendored in https://github.com/google/cadvisor/commit/24dd1de08a72cfee661f6178454db995900c0fee
// distances[node.Id] = node.Distances[:]
var nodeDistance []uint64
if opts.PreferClosestNUMA {
var err error
nodeDistance, err = sysFs.GetDistances(node.Id)
if err != nil {
return nil, fmt.Errorf("error getting NUMA distances from sysfs: %w", err)
nodeDistance = node.Distances
if nodeDistance == nil {
return nil, fmt.Errorf("error getting NUMA distances from cadvisor")
}
}
distances[node.Id] = nodeDistance
@ -127,28 +107,3 @@ func (d NUMADistances) CalculateAverageFor(bm bitmask.BitMask) float64 {
return sum / count
}
func (s NUMASysFs) GetDistances(nodeId int) ([]uint64, error) {
distancePath := fmt.Sprintf("%s/node%d/distance", s.nodeDir, nodeId)
distance, err := ioutil.ReadFile(distancePath)
if err != nil {
return nil, fmt.Errorf("problem reading %s: %w", distancePath, err)
}
rawDistances := strings.TrimSpace(string(distance))
return splitDistances(rawDistances)
}
func splitDistances(rawDistances string) ([]uint64, error) {
distances := []uint64{}
for _, distance := range strings.Split(rawDistances, " ") {
distanceUint, err := strconv.ParseUint(distance, 10, 64)
if err != nil {
return nil, fmt.Errorf("cannot convert %s to int", distance)
}
distances = append(distances, distanceUint)
}
return distances, nil
}

View File

@ -17,12 +17,8 @@ limitations under the License.
package topologymanager
import (
"bytes"
"fmt"
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"testing"
@ -58,6 +54,12 @@ func TestNUMAInfo(t *testing.T) {
topology: []cadvisorapi.Node{
{
Id: 0,
Distances: []uint64{
10,
11,
12,
12,
},
},
},
expectedNUMAInfo: &NUMAInfo{
@ -98,9 +100,21 @@ func TestNUMAInfo(t *testing.T) {
topology: []cadvisorapi.Node{
{
Id: 0,
Distances: []uint64{
10,
11,
12,
12,
},
},
{
Id: 1,
Distances: []uint64{
11,
10,
12,
12,
},
},
},
expectedNUMAInfo: &NUMAInfo{
@ -151,12 +165,30 @@ func TestNUMAInfo(t *testing.T) {
topology: []cadvisorapi.Node{
{
Id: 0,
Distances: []uint64{
10,
11,
12,
12,
},
},
{
Id: 1,
Distances: []uint64{
11,
10,
12,
12,
},
},
{
Id: 2,
Distances: []uint64{
12,
12,
10,
11,
},
},
},
expectedNUMAInfo: &NUMAInfo{
@ -217,15 +249,39 @@ func TestNUMAInfo(t *testing.T) {
topology: []cadvisorapi.Node{
{
Id: 0,
Distances: []uint64{
10,
11,
12,
12,
},
},
{
Id: 1,
Distances: []uint64{
11,
10,
12,
12,
},
},
{
Id: 2,
Distances: []uint64{
12,
12,
10,
11,
},
},
{
Id: 3,
Distances: []uint64{
12,
12,
11,
10,
},
},
},
expectedNUMAInfo: &NUMAInfo{
@ -269,7 +325,7 @@ func TestNUMAInfo(t *testing.T) {
},
},
expectedNUMAInfo: nil,
expectedErr: fmt.Errorf("no such file or directory"),
expectedErr: fmt.Errorf("error getting NUMA distances from cadvisor"),
opts: PolicyOptions{
PreferClosestNUMA: true,
},
@ -293,6 +349,12 @@ func TestNUMAInfo(t *testing.T) {
topology: []cadvisorapi.Node{
{
Id: 1,
Distances: []uint64{
11,
10,
12,
12,
},
},
},
expectedNUMAInfo: &NUMAInfo{
@ -315,9 +377,21 @@ func TestNUMAInfo(t *testing.T) {
topology: []cadvisorapi.Node{
{
Id: 0,
Distances: []uint64{
10,
11,
12,
12,
},
},
{
Id: 2,
Distances: []uint64{
12,
12,
10,
11,
},
},
},
expectedNUMAInfo: &NUMAInfo{
@ -333,9 +407,21 @@ func TestNUMAInfo(t *testing.T) {
topology: []cadvisorapi.Node{
{
Id: 0,
Distances: []uint64{
10,
11,
12,
12,
},
},
{
Id: 2,
Distances: []uint64{
12,
12,
10,
11,
},
},
},
expectedNUMAInfo: &NUMAInfo{
@ -361,37 +447,8 @@ func TestNUMAInfo(t *testing.T) {
},
}
nodeDir, err := os.MkdirTemp("", "TestNUMAInfo")
if err != nil {
t.Fatalf("Unable to create temporary directory: %v", err)
}
defer os.RemoveAll(nodeDir)
numaDistances := map[int]string{
0: "10 11 12 12",
1: "11 10 12 12",
2: "12 12 10 11",
3: "12 12 11 10",
}
for i, distances := range numaDistances {
numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", i))
if err := os.Mkdir(numaDir, 0700); err != nil {
t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
}
distanceFile := filepath.Join(numaDir, "distance")
if err = os.WriteFile(distanceFile, []byte(distances), 0644); err != nil {
t.Fatalf("Unable to create test distanceFile: %v", err)
}
}
// stub sysFs to read from temp dir
sysFs := &NUMASysFs{nodeDir: nodeDir}
for _, tcase := range tcases {
topology, err := newNUMAInfo(tcase.topology, sysFs, tcase.opts)
topology, err := NewNUMAInfo(tcase.topology, tcase.opts)
if tcase.expectedErr == nil && err != nil {
t.Fatalf("Expected err to equal nil, not %v", err)
} else if tcase.expectedErr != nil && err == nil {
@ -559,123 +616,6 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
}
func TestGetDistances(t *testing.T) {
testCases := []struct {
name string
expectedErr bool
expectedDistances []uint64
nodeId int
nodeExists bool
}{
{
name: "reading proper distance file",
expectedErr: false,
expectedDistances: []uint64{10, 11, 12, 13},
nodeId: 0,
nodeExists: true,
},
{
name: "no distance file",
expectedErr: true,
expectedDistances: nil,
nodeId: 99,
},
}
for _, tcase := range testCases {
t.Run(tcase.name, func(t *testing.T) {
var err error
nodeDir, err := os.MkdirTemp("", "TestGetDistances")
if err != nil {
t.Fatalf("Unable to create temporary directory: %v", err)
}
defer os.RemoveAll(nodeDir)
if tcase.nodeExists {
numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", tcase.nodeId))
if err := os.Mkdir(numaDir, 0700); err != nil {
t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
}
distanceFile := filepath.Join(numaDir, "distance")
var buffer bytes.Buffer
for i, distance := range tcase.expectedDistances {
buffer.WriteString(strconv.Itoa(int(distance)))
if i != len(tcase.expectedDistances)-1 {
buffer.WriteString(" ")
}
}
if err = os.WriteFile(distanceFile, buffer.Bytes(), 0644); err != nil {
t.Fatalf("Unable to create test distanceFile: %v", err)
}
}
sysFs := &NUMASysFs{nodeDir: nodeDir}
distances, err := sysFs.GetDistances(tcase.nodeId)
if !tcase.expectedErr && err != nil {
t.Fatalf("Expected err to equal nil, not %v", err)
} else if tcase.expectedErr && err == nil {
t.Fatalf("Expected err to equal %v, not nil", tcase.expectedErr)
}
if !tcase.expectedErr && !reflect.DeepEqual(distances, tcase.expectedDistances) {
t.Fatalf("Expected distances to equal %v, not %v", tcase.expectedDistances, distances)
}
})
}
}
func TestSplitDistances(t *testing.T) {
tcases := []struct {
description string
rawDistances string
expected []uint64
expectedErr error
}{
{
description: "read one distance",
rawDistances: "10",
expected: []uint64{10},
expectedErr: nil,
},
{
description: "read two distances",
rawDistances: "10 20",
expected: []uint64{10, 20},
expectedErr: nil,
},
{
description: "can't convert negative number to uint64",
rawDistances: "10 -20",
expected: nil,
expectedErr: fmt.Errorf("cannot conver"),
},
}
for _, tc := range tcases {
result, err := splitDistances(tc.rawDistances)
if tc.expectedErr == nil && err != nil {
t.Fatalf("Expected err to equal nil, not %v", err)
} else if tc.expectedErr != nil && err == nil {
t.Fatalf("Expected err to equal %v, not nil", tc.expectedErr)
} else if tc.expectedErr != nil {
if !strings.Contains(err.Error(), tc.expectedErr.Error()) {
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tc.expectedErr.Error())
}
}
if !reflect.DeepEqual(tc.expected, result) {
t.Fatalf("Expected distances to equal: %v, got: %v", tc.expected, result)
}
}
}
func TestClosest(t *testing.T) {
tcases := []struct {
description string