mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 13:37:30 +00:00
Merge pull request #53328 from intelsdi-x/lscpu_fix
Automatic merge from submit-queue (batch tested with PRs 53297, 53328). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Cpu Manager - make CoreID's platform unique **What this PR does / why we need it**: Cpu Manager uses topology from cAdvisor(`/proc/cpuinfo`) where coreID's are socket unique - not platform unique - this causes problems on multi-socket platforms. All code assumes unique coreID's (on platform) - `Discovery` function has been changed to assign CoreID as the lowest cpuID from all cpus belonging to the same core. This can be expressed as: `CoreID=min(cpuID's on the same core)` Since cpuID's are platform unique - above gives us guarantee that CoreID's will also be platform unique. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #53323
This commit is contained in:
commit
ec116fdc73
@ -9,6 +9,7 @@ go_library(
|
|||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
"//pkg/kubelet/cm/cpuset:go_default_library",
|
"//pkg/kubelet/cm/cpuset:go_default_library",
|
||||||
|
"//vendor/github.com/golang/glog:go_default_library",
|
||||||
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
|
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -18,7 +18,9 @@ package topology
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||||
)
|
)
|
||||||
@ -145,15 +147,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CPUDetails := CPUDetails{}
|
CPUDetails := CPUDetails{}
|
||||||
|
|
||||||
numCPUs := machineInfo.NumCores
|
numCPUs := machineInfo.NumCores
|
||||||
numPhysicalCores := 0
|
numPhysicalCores := 0
|
||||||
|
var coreID int
|
||||||
|
var err error
|
||||||
|
|
||||||
for _, socket := range machineInfo.Topology {
|
for _, socket := range machineInfo.Topology {
|
||||||
numPhysicalCores += len(socket.Cores)
|
numPhysicalCores += len(socket.Cores)
|
||||||
for _, core := range socket.Cores {
|
for _, core := range socket.Cores {
|
||||||
|
if coreID, err = getUniqueCoreID(core.Threads); err != nil {
|
||||||
|
glog.Errorf("could not get unique coreID for socket: %d core %d threads: %v",
|
||||||
|
socket.Id, core.Id, core.Threads)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
for _, cpu := range core.Threads {
|
for _, cpu := range core.Threads {
|
||||||
CPUDetails[cpu] = CPUInfo{
|
CPUDetails[cpu] = CPUInfo{
|
||||||
CoreID: core.Id,
|
CoreID: coreID,
|
||||||
SocketID: socket.Id,
|
SocketID: socket.Id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -167,3 +176,22 @@ func Discover(machineInfo *cadvisorapi.MachineInfo) (*CPUTopology, error) {
|
|||||||
CPUDetails: CPUDetails,
|
CPUDetails: CPUDetails,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getUniqueCoreID computes coreId as the lowest cpuID
|
||||||
|
// for a given Threads []int slice. This will assure that coreID's are
|
||||||
|
// platform unique (opposite to what cAdvisor reports - socket unique)
|
||||||
|
func getUniqueCoreID(threads []int) (coreID int, err error) {
|
||||||
|
err = nil
|
||||||
|
if len(threads) == 0 {
|
||||||
|
return 0, fmt.Errorf("no cpus provided")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(threads) != cpuset.NewCPUSet(threads...).Size() {
|
||||||
|
return 0, fmt.Errorf("cpus provided are not unique")
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpThreads := make([]int, len(threads))
|
||||||
|
copy(tmpThreads, threads)
|
||||||
|
sort.Ints(tmpThreads)
|
||||||
|
return tmpThreads[0], err
|
||||||
|
}
|
||||||
|
@ -103,6 +103,84 @@ func Test_Discover(t *testing.T) {
|
|||||||
},
|
},
|
||||||
wantErr: false,
|
wantErr: false,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "DualSocketHT - non unique Core'ID's",
|
||||||
|
args: &cadvisorapi.MachineInfo{
|
||||||
|
NumCores: 12,
|
||||||
|
Topology: []cadvisorapi.Node{
|
||||||
|
{Id: 0,
|
||||||
|
Cores: []cadvisorapi.Core{
|
||||||
|
{Id: 0, Threads: []int{0, 6}},
|
||||||
|
{Id: 1, Threads: []int{1, 7}},
|
||||||
|
{Id: 2, Threads: []int{2, 8}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{Id: 1,
|
||||||
|
Cores: []cadvisorapi.Core{
|
||||||
|
{Id: 0, Threads: []int{3, 9}},
|
||||||
|
{Id: 1, Threads: []int{4, 10}},
|
||||||
|
{Id: 2, Threads: []int{5, 11}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: &CPUTopology{
|
||||||
|
NumCPUs: 12,
|
||||||
|
NumSockets: 2,
|
||||||
|
NumCores: 6,
|
||||||
|
CPUDetails: map[int]CPUInfo{
|
||||||
|
0: {CoreID: 0, SocketID: 0},
|
||||||
|
1: {CoreID: 1, SocketID: 0},
|
||||||
|
2: {CoreID: 2, SocketID: 0},
|
||||||
|
3: {CoreID: 3, SocketID: 1},
|
||||||
|
4: {CoreID: 4, SocketID: 1},
|
||||||
|
5: {CoreID: 5, SocketID: 1},
|
||||||
|
6: {CoreID: 0, SocketID: 0},
|
||||||
|
7: {CoreID: 1, SocketID: 0},
|
||||||
|
8: {CoreID: 2, SocketID: 0},
|
||||||
|
9: {CoreID: 3, SocketID: 1},
|
||||||
|
10: {CoreID: 4, SocketID: 1},
|
||||||
|
11: {CoreID: 5, SocketID: 1},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "OneSocketHT fail",
|
||||||
|
args: &cadvisorapi.MachineInfo{
|
||||||
|
NumCores: 8,
|
||||||
|
Topology: []cadvisorapi.Node{
|
||||||
|
{Id: 0,
|
||||||
|
Cores: []cadvisorapi.Core{
|
||||||
|
{Id: 0, Threads: []int{0, 4}},
|
||||||
|
{Id: 1, Threads: []int{1, 5}},
|
||||||
|
{Id: 2, Threads: []int{2, 2}}, // Wrong case - should fail here
|
||||||
|
{Id: 3, Threads: []int{3, 7}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: &CPUTopology{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "OneSocketHT fail",
|
||||||
|
args: &cadvisorapi.MachineInfo{
|
||||||
|
NumCores: 8,
|
||||||
|
Topology: []cadvisorapi.Node{
|
||||||
|
{Id: 0,
|
||||||
|
Cores: []cadvisorapi.Core{
|
||||||
|
{Id: 0, Threads: []int{0, 4}},
|
||||||
|
{Id: 1, Threads: []int{1, 5}},
|
||||||
|
{Id: 2, Threads: []int{2, 6}},
|
||||||
|
{Id: 3, Threads: []int{}}, // Wrong case - should fail here
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: &CPUTopology{},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user