mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 10:51:29 +00:00
[topologymanager] rely on Cadvisor to calculate NUMA distance
Signed-off-by: PiotrProkop <pprokop@nvidia.com>
This commit is contained in:
parent
315f0dc6f1
commit
540b5bd308
@ -18,18 +18,11 @@ package topologymanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultNodeDir = "/sys/devices/system/node/"
|
||||
)
|
||||
|
||||
type NUMADistances map[int][]uint64
|
||||
|
||||
type NUMAInfo struct {
|
||||
@ -37,30 +30,17 @@ type NUMAInfo struct {
|
||||
NUMADistances NUMADistances
|
||||
}
|
||||
|
||||
type NUMASysFs struct {
|
||||
nodeDir string
|
||||
}
|
||||
|
||||
func NewNUMAInfo(topology []cadvisorapi.Node, opts PolicyOptions) (*NUMAInfo, error) {
|
||||
return newNUMAInfo(topology, &NUMASysFs{nodeDir: defaultNodeDir}, opts)
|
||||
}
|
||||
|
||||
func newNUMAInfo(topology []cadvisorapi.Node, sysFs *NUMASysFs, opts PolicyOptions) (*NUMAInfo, error) {
|
||||
var numaNodes []int
|
||||
distances := map[int][]uint64{}
|
||||
for _, node := range topology {
|
||||
numaNodes = append(numaNodes, node.Id)
|
||||
|
||||
// Populate the NUMA distances
|
||||
// For now we need to retrieve this information from sysfs.
|
||||
// TODO: Update this as follows once a version of cadvisor with this commit is vendored in https://github.com/google/cadvisor/commit/24dd1de08a72cfee661f6178454db995900c0fee
|
||||
// distances[node.Id] = node.Distances[:]
|
||||
var nodeDistance []uint64
|
||||
if opts.PreferClosestNUMA {
|
||||
var err error
|
||||
nodeDistance, err = sysFs.GetDistances(node.Id)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting NUMA distances from sysfs: %w", err)
|
||||
nodeDistance = node.Distances
|
||||
if nodeDistance == nil {
|
||||
return nil, fmt.Errorf("error getting NUMA distances from cadvisor")
|
||||
}
|
||||
}
|
||||
distances[node.Id] = nodeDistance
|
||||
@ -127,28 +107,3 @@ func (d NUMADistances) CalculateAverageFor(bm bitmask.BitMask) float64 {
|
||||
|
||||
return sum / count
|
||||
}
|
||||
|
||||
func (s NUMASysFs) GetDistances(nodeId int) ([]uint64, error) {
|
||||
distancePath := fmt.Sprintf("%s/node%d/distance", s.nodeDir, nodeId)
|
||||
distance, err := ioutil.ReadFile(distancePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("problem reading %s: %w", distancePath, err)
|
||||
}
|
||||
|
||||
rawDistances := strings.TrimSpace(string(distance))
|
||||
|
||||
return splitDistances(rawDistances)
|
||||
}
|
||||
|
||||
func splitDistances(rawDistances string) ([]uint64, error) {
|
||||
distances := []uint64{}
|
||||
for _, distance := range strings.Split(rawDistances, " ") {
|
||||
distanceUint, err := strconv.ParseUint(distance, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot convert %s to int", distance)
|
||||
}
|
||||
distances = append(distances, distanceUint)
|
||||
}
|
||||
|
||||
return distances, nil
|
||||
}
|
||||
|
@ -17,12 +17,8 @@ limitations under the License.
|
||||
package topologymanager
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@ -58,6 +54,12 @@ func TestNUMAInfo(t *testing.T) {
|
||||
topology: []cadvisorapi.Node{
|
||||
{
|
||||
Id: 0,
|
||||
Distances: []uint64{
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: &NUMAInfo{
|
||||
@ -98,9 +100,21 @@ func TestNUMAInfo(t *testing.T) {
|
||||
topology: []cadvisorapi.Node{
|
||||
{
|
||||
Id: 0,
|
||||
Distances: []uint64{
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 1,
|
||||
Distances: []uint64{
|
||||
11,
|
||||
10,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: &NUMAInfo{
|
||||
@ -151,12 +165,30 @@ func TestNUMAInfo(t *testing.T) {
|
||||
topology: []cadvisorapi.Node{
|
||||
{
|
||||
Id: 0,
|
||||
Distances: []uint64{
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 1,
|
||||
Distances: []uint64{
|
||||
11,
|
||||
10,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 2,
|
||||
Distances: []uint64{
|
||||
12,
|
||||
12,
|
||||
10,
|
||||
11,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: &NUMAInfo{
|
||||
@ -217,15 +249,39 @@ func TestNUMAInfo(t *testing.T) {
|
||||
topology: []cadvisorapi.Node{
|
||||
{
|
||||
Id: 0,
|
||||
Distances: []uint64{
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 1,
|
||||
Distances: []uint64{
|
||||
11,
|
||||
10,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 2,
|
||||
Distances: []uint64{
|
||||
12,
|
||||
12,
|
||||
10,
|
||||
11,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 3,
|
||||
Distances: []uint64{
|
||||
12,
|
||||
12,
|
||||
11,
|
||||
10,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: &NUMAInfo{
|
||||
@ -269,7 +325,7 @@ func TestNUMAInfo(t *testing.T) {
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: nil,
|
||||
expectedErr: fmt.Errorf("no such file or directory"),
|
||||
expectedErr: fmt.Errorf("error getting NUMA distances from cadvisor"),
|
||||
opts: PolicyOptions{
|
||||
PreferClosestNUMA: true,
|
||||
},
|
||||
@ -293,6 +349,12 @@ func TestNUMAInfo(t *testing.T) {
|
||||
topology: []cadvisorapi.Node{
|
||||
{
|
||||
Id: 1,
|
||||
Distances: []uint64{
|
||||
11,
|
||||
10,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: &NUMAInfo{
|
||||
@ -315,9 +377,21 @@ func TestNUMAInfo(t *testing.T) {
|
||||
topology: []cadvisorapi.Node{
|
||||
{
|
||||
Id: 0,
|
||||
Distances: []uint64{
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 2,
|
||||
Distances: []uint64{
|
||||
12,
|
||||
12,
|
||||
10,
|
||||
11,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: &NUMAInfo{
|
||||
@ -333,9 +407,21 @@ func TestNUMAInfo(t *testing.T) {
|
||||
topology: []cadvisorapi.Node{
|
||||
{
|
||||
Id: 0,
|
||||
Distances: []uint64{
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
},
|
||||
},
|
||||
{
|
||||
Id: 2,
|
||||
Distances: []uint64{
|
||||
12,
|
||||
12,
|
||||
10,
|
||||
11,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedNUMAInfo: &NUMAInfo{
|
||||
@ -361,37 +447,8 @@ func TestNUMAInfo(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
nodeDir, err := os.MkdirTemp("", "TestNUMAInfo")
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to create temporary directory: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(nodeDir)
|
||||
|
||||
numaDistances := map[int]string{
|
||||
0: "10 11 12 12",
|
||||
1: "11 10 12 12",
|
||||
2: "12 12 10 11",
|
||||
3: "12 12 11 10",
|
||||
}
|
||||
|
||||
for i, distances := range numaDistances {
|
||||
numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", i))
|
||||
if err := os.Mkdir(numaDir, 0700); err != nil {
|
||||
t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
|
||||
}
|
||||
|
||||
distanceFile := filepath.Join(numaDir, "distance")
|
||||
|
||||
if err = os.WriteFile(distanceFile, []byte(distances), 0644); err != nil {
|
||||
t.Fatalf("Unable to create test distanceFile: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// stub sysFs to read from temp dir
|
||||
sysFs := &NUMASysFs{nodeDir: nodeDir}
|
||||
|
||||
for _, tcase := range tcases {
|
||||
topology, err := newNUMAInfo(tcase.topology, sysFs, tcase.opts)
|
||||
topology, err := NewNUMAInfo(tcase.topology, tcase.opts)
|
||||
if tcase.expectedErr == nil && err != nil {
|
||||
t.Fatalf("Expected err to equal nil, not %v", err)
|
||||
} else if tcase.expectedErr != nil && err == nil {
|
||||
@ -559,123 +616,6 @@ func TestCalculateAvgDistanceFor(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestGetDistances(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
expectedErr bool
|
||||
expectedDistances []uint64
|
||||
nodeId int
|
||||
nodeExists bool
|
||||
}{
|
||||
{
|
||||
name: "reading proper distance file",
|
||||
expectedErr: false,
|
||||
expectedDistances: []uint64{10, 11, 12, 13},
|
||||
nodeId: 0,
|
||||
nodeExists: true,
|
||||
},
|
||||
{
|
||||
name: "no distance file",
|
||||
expectedErr: true,
|
||||
expectedDistances: nil,
|
||||
nodeId: 99,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tcase := range testCases {
|
||||
t.Run(tcase.name, func(t *testing.T) {
|
||||
var err error
|
||||
|
||||
nodeDir, err := os.MkdirTemp("", "TestGetDistances")
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to create temporary directory: %v", err)
|
||||
}
|
||||
|
||||
defer os.RemoveAll(nodeDir)
|
||||
|
||||
if tcase.nodeExists {
|
||||
numaDir := filepath.Join(nodeDir, fmt.Sprintf("node%d", tcase.nodeId))
|
||||
if err := os.Mkdir(numaDir, 0700); err != nil {
|
||||
t.Fatalf("Unable to create numaDir %s: %v", numaDir, err)
|
||||
}
|
||||
|
||||
distanceFile := filepath.Join(numaDir, "distance")
|
||||
|
||||
var buffer bytes.Buffer
|
||||
for i, distance := range tcase.expectedDistances {
|
||||
buffer.WriteString(strconv.Itoa(int(distance)))
|
||||
if i != len(tcase.expectedDistances)-1 {
|
||||
buffer.WriteString(" ")
|
||||
}
|
||||
}
|
||||
|
||||
if err = os.WriteFile(distanceFile, buffer.Bytes(), 0644); err != nil {
|
||||
t.Fatalf("Unable to create test distanceFile: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
sysFs := &NUMASysFs{nodeDir: nodeDir}
|
||||
|
||||
distances, err := sysFs.GetDistances(tcase.nodeId)
|
||||
if !tcase.expectedErr && err != nil {
|
||||
t.Fatalf("Expected err to equal nil, not %v", err)
|
||||
} else if tcase.expectedErr && err == nil {
|
||||
t.Fatalf("Expected err to equal %v, not nil", tcase.expectedErr)
|
||||
}
|
||||
|
||||
if !tcase.expectedErr && !reflect.DeepEqual(distances, tcase.expectedDistances) {
|
||||
t.Fatalf("Expected distances to equal %v, not %v", tcase.expectedDistances, distances)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitDistances(t *testing.T) {
|
||||
tcases := []struct {
|
||||
description string
|
||||
rawDistances string
|
||||
expected []uint64
|
||||
expectedErr error
|
||||
}{
|
||||
{
|
||||
description: "read one distance",
|
||||
rawDistances: "10",
|
||||
expected: []uint64{10},
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
description: "read two distances",
|
||||
rawDistances: "10 20",
|
||||
expected: []uint64{10, 20},
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
description: "can't convert negative number to uint64",
|
||||
rawDistances: "10 -20",
|
||||
expected: nil,
|
||||
expectedErr: fmt.Errorf("cannot conver"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tcases {
|
||||
result, err := splitDistances(tc.rawDistances)
|
||||
|
||||
if tc.expectedErr == nil && err != nil {
|
||||
t.Fatalf("Expected err to equal nil, not %v", err)
|
||||
} else if tc.expectedErr != nil && err == nil {
|
||||
t.Fatalf("Expected err to equal %v, not nil", tc.expectedErr)
|
||||
} else if tc.expectedErr != nil {
|
||||
if !strings.Contains(err.Error(), tc.expectedErr.Error()) {
|
||||
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tc.expectedErr.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(tc.expected, result) {
|
||||
t.Fatalf("Expected distances to equal: %v, got: %v", tc.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClosest(t *testing.T) {
|
||||
tcases := []struct {
|
||||
description string
|
||||
|
Loading…
Reference in New Issue
Block a user