mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-19 08:40:42 +00:00
node: cpumgr: metrics: add metrics for allocation per NUMA
Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
This commit is contained in:
parent
0446f6c146
commit
f1031be019
@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
@ -389,7 +390,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs)
|
||||
p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs)
|
||||
p.updateMetricsOnAllocate(cpuAllocation)
|
||||
p.updateMetricsOnAllocate(s, cpuAllocation)
|
||||
|
||||
klog.V(4).InfoS("Allocated exclusive CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", cpuAllocation.CPUs.String())
|
||||
return nil
|
||||
@ -416,7 +417,8 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||
// Mutate the shared pool, adding released cpus.
|
||||
toRelease = toRelease.Difference(cpusInUse)
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||
p.updateMetricsOnRelease(toRelease)
|
||||
p.updateMetricsOnRelease(s, toRelease)
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -755,33 +757,60 @@ func (p *staticPolicy) getAlignedCPUs(numaAffinity bitmask.BitMask, allocatableC
|
||||
|
||||
func (p *staticPolicy) initializeMetrics(s state.State) {
|
||||
metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000))
|
||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(countExclusiveCPUs(s)))
|
||||
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
|
||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
|
||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Add(0) // ensure the value exists
|
||||
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
|
||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(totalAssignedCPUs.Size()))
|
||||
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) updateMetricsOnAllocate(cpuAlloc topology.Allocation) {
|
||||
func (p *staticPolicy) updateMetricsOnAllocate(s state.State, cpuAlloc topology.Allocation) {
|
||||
ncpus := cpuAlloc.CPUs.Size()
|
||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(ncpus))
|
||||
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(-ncpus * 1000))
|
||||
if cpuAlloc.Aligned.UncoreCache {
|
||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Inc()
|
||||
}
|
||||
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
|
||||
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) updateMetricsOnRelease(cset cpuset.CPUSet) {
|
||||
func (p *staticPolicy) updateMetricsOnRelease(s state.State, cset cpuset.CPUSet) {
|
||||
ncpus := cset.Size()
|
||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(-ncpus))
|
||||
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(ncpus * 1000))
|
||||
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
|
||||
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs.Difference(cset))
|
||||
}
|
||||
|
||||
func countExclusiveCPUs(s state.State) int {
|
||||
exclusiveCPUs := 0
|
||||
for _, cpuAssign := range s.GetCPUAssignments() {
|
||||
for _, cset := range cpuAssign {
|
||||
exclusiveCPUs += cset.Size()
|
||||
func getTotalAssignedExclusiveCPUs(s state.State) cpuset.CPUSet {
|
||||
totalAssignedCPUs := cpuset.New()
|
||||
for _, assignment := range s.GetCPUAssignments() {
|
||||
for _, cset := range assignment {
|
||||
totalAssignedCPUs = totalAssignedCPUs.Union(cset)
|
||||
}
|
||||
|
||||
}
|
||||
return totalAssignedCPUs
|
||||
}
|
||||
|
||||
func updateAllocationPerNUMAMetric(topo *topology.CPUTopology, allocatedCPUs cpuset.CPUSet) {
|
||||
numaCount := make(map[int]int)
|
||||
|
||||
// Count CPUs allocated per NUMA node
|
||||
for _, cpuID := range allocatedCPUs.UnsortedList() {
|
||||
numaNode, err := topo.CPUNUMANodeID(cpuID)
|
||||
if err != nil {
|
||||
//NOTE: We are logging the error but it is highly unlikely to happen as the CPUset
|
||||
// is already computed, evaluated and there is no room for user tampering.
|
||||
klog.ErrorS(err, "Unable to determine NUMA node", "cpuID", cpuID)
|
||||
}
|
||||
numaCount[numaNode]++
|
||||
}
|
||||
|
||||
// Update metric
|
||||
for numaNode, count := range numaCount {
|
||||
metrics.CPUManagerAllocationPerNUMA.WithLabelValues(strconv.Itoa(numaNode)).Set(float64(count))
|
||||
}
|
||||
return exclusiveCPUs
|
||||
}
|
||||
|
@ -113,6 +113,7 @@ const (
|
||||
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
||||
CPUManagerSharedPoolSizeMilliCoresKey = "cpu_manager_shared_pool_size_millicores"
|
||||
CPUManagerExclusiveCPUsAllocationCountKey = "cpu_manager_exclusive_cpu_allocation_count"
|
||||
CPUManagerAllocationPerNUMAKey = "cpu_manager_allocation_per_numa"
|
||||
|
||||
// Metrics to track the Memory manager behavior
|
||||
MemoryManagerPinningRequestsTotalKey = "memory_manager_pinning_requests_total"
|
||||
@ -815,6 +816,17 @@ var (
|
||||
},
|
||||
)
|
||||
|
||||
// CPUManagerAllocationPerNUMA tracks the count of CPUs allocated per NUMA node
|
||||
CPUManagerAllocationPerNUMA = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: CPUManagerAllocationPerNUMAKey,
|
||||
Help: "Number of CPUs allocated per NUMA node",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{AlignedNUMANode},
|
||||
)
|
||||
|
||||
// ContainerAlignedComputeResources reports the count of resources allocation which granted aligned resources, per alignment boundary
|
||||
ContainerAlignedComputeResources = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
@ -1126,6 +1138,7 @@ func Register(collectors ...metrics.StableCollector) {
|
||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||
legacyregistry.MustRegister(CPUManagerSharedPoolSizeMilliCores)
|
||||
legacyregistry.MustRegister(CPUManagerExclusiveCPUsAllocationCount)
|
||||
legacyregistry.MustRegister(CPUManagerAllocationPerNUMA)
|
||||
legacyregistry.MustRegister(ContainerAlignedComputeResources)
|
||||
legacyregistry.MustRegister(ContainerAlignedComputeResourcesFailure)
|
||||
legacyregistry.MustRegister(MemoryManagerPinningRequestTotal)
|
||||
|
Loading…
Reference in New Issue
Block a user