mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-19 08:40:42 +00:00
node: cpumgr: metrics: add metrics for allocation per NUMA
Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
This commit is contained in:
parent
0446f6c146
commit
f1031be019
@ -18,6 +18,7 @@ package cpumanager
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||||
@ -389,7 +390,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
|||||||
|
|
||||||
s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs)
|
s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs)
|
||||||
p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs)
|
p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs)
|
||||||
p.updateMetricsOnAllocate(cpuAllocation)
|
p.updateMetricsOnAllocate(s, cpuAllocation)
|
||||||
|
|
||||||
klog.V(4).InfoS("Allocated exclusive CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", cpuAllocation.CPUs.String())
|
klog.V(4).InfoS("Allocated exclusive CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", cpuAllocation.CPUs.String())
|
||||||
return nil
|
return nil
|
||||||
@ -416,7 +417,8 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
|||||||
// Mutate the shared pool, adding released cpus.
|
// Mutate the shared pool, adding released cpus.
|
||||||
toRelease = toRelease.Difference(cpusInUse)
|
toRelease = toRelease.Difference(cpusInUse)
|
||||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||||
p.updateMetricsOnRelease(toRelease)
|
p.updateMetricsOnRelease(s, toRelease)
|
||||||
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -755,33 +757,60 @@ func (p *staticPolicy) getAlignedCPUs(numaAffinity bitmask.BitMask, allocatableC
|
|||||||
|
|
||||||
func (p *staticPolicy) initializeMetrics(s state.State) {
|
func (p *staticPolicy) initializeMetrics(s state.State) {
|
||||||
metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000))
|
metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000))
|
||||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(countExclusiveCPUs(s)))
|
|
||||||
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
|
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
|
||||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
|
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
|
||||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Add(0) // ensure the value exists
|
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Add(0) // ensure the value exists
|
||||||
|
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
|
||||||
|
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(totalAssignedCPUs.Size()))
|
||||||
|
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *staticPolicy) updateMetricsOnAllocate(cpuAlloc topology.Allocation) {
|
func (p *staticPolicy) updateMetricsOnAllocate(s state.State, cpuAlloc topology.Allocation) {
|
||||||
ncpus := cpuAlloc.CPUs.Size()
|
ncpus := cpuAlloc.CPUs.Size()
|
||||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(ncpus))
|
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(ncpus))
|
||||||
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(-ncpus * 1000))
|
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(-ncpus * 1000))
|
||||||
if cpuAlloc.Aligned.UncoreCache {
|
if cpuAlloc.Aligned.UncoreCache {
|
||||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Inc()
|
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Inc()
|
||||||
}
|
}
|
||||||
|
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
|
||||||
|
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *staticPolicy) updateMetricsOnRelease(cset cpuset.CPUSet) {
|
func (p *staticPolicy) updateMetricsOnRelease(s state.State, cset cpuset.CPUSet) {
|
||||||
ncpus := cset.Size()
|
ncpus := cset.Size()
|
||||||
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(-ncpus))
|
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(-ncpus))
|
||||||
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(ncpus * 1000))
|
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(ncpus * 1000))
|
||||||
|
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
|
||||||
|
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs.Difference(cset))
|
||||||
}
|
}
|
||||||
|
|
||||||
func countExclusiveCPUs(s state.State) int {
|
func getTotalAssignedExclusiveCPUs(s state.State) cpuset.CPUSet {
|
||||||
exclusiveCPUs := 0
|
totalAssignedCPUs := cpuset.New()
|
||||||
for _, cpuAssign := range s.GetCPUAssignments() {
|
for _, assignment := range s.GetCPUAssignments() {
|
||||||
for _, cset := range cpuAssign {
|
for _, cset := range assignment {
|
||||||
exclusiveCPUs += cset.Size()
|
totalAssignedCPUs = totalAssignedCPUs.Union(cset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return totalAssignedCPUs
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateAllocationPerNUMAMetric(topo *topology.CPUTopology, allocatedCPUs cpuset.CPUSet) {
|
||||||
|
numaCount := make(map[int]int)
|
||||||
|
|
||||||
|
// Count CPUs allocated per NUMA node
|
||||||
|
for _, cpuID := range allocatedCPUs.UnsortedList() {
|
||||||
|
numaNode, err := topo.CPUNUMANodeID(cpuID)
|
||||||
|
if err != nil {
|
||||||
|
//NOTE: We are logging the error but it is highly unlikely to happen as the CPUset
|
||||||
|
// is already computed, evaluated and there is no room for user tampering.
|
||||||
|
klog.ErrorS(err, "Unable to determine NUMA node", "cpuID", cpuID)
|
||||||
|
}
|
||||||
|
numaCount[numaNode]++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update metric
|
||||||
|
for numaNode, count := range numaCount {
|
||||||
|
metrics.CPUManagerAllocationPerNUMA.WithLabelValues(strconv.Itoa(numaNode)).Set(float64(count))
|
||||||
}
|
}
|
||||||
return exclusiveCPUs
|
|
||||||
}
|
}
|
||||||
|
@ -113,6 +113,7 @@ const (
|
|||||||
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
||||||
CPUManagerSharedPoolSizeMilliCoresKey = "cpu_manager_shared_pool_size_millicores"
|
CPUManagerSharedPoolSizeMilliCoresKey = "cpu_manager_shared_pool_size_millicores"
|
||||||
CPUManagerExclusiveCPUsAllocationCountKey = "cpu_manager_exclusive_cpu_allocation_count"
|
CPUManagerExclusiveCPUsAllocationCountKey = "cpu_manager_exclusive_cpu_allocation_count"
|
||||||
|
CPUManagerAllocationPerNUMAKey = "cpu_manager_allocation_per_numa"
|
||||||
|
|
||||||
// Metrics to track the Memory manager behavior
|
// Metrics to track the Memory manager behavior
|
||||||
MemoryManagerPinningRequestsTotalKey = "memory_manager_pinning_requests_total"
|
MemoryManagerPinningRequestsTotalKey = "memory_manager_pinning_requests_total"
|
||||||
@ -815,6 +816,17 @@ var (
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// CPUManagerAllocationPerNUMA tracks the count of CPUs allocated per NUMA node
|
||||||
|
CPUManagerAllocationPerNUMA = metrics.NewGaugeVec(
|
||||||
|
&metrics.GaugeOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: CPUManagerAllocationPerNUMAKey,
|
||||||
|
Help: "Number of CPUs allocated per NUMA node",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
[]string{AlignedNUMANode},
|
||||||
|
)
|
||||||
|
|
||||||
// ContainerAlignedComputeResources reports the count of resources allocation which granted aligned resources, per alignment boundary
|
// ContainerAlignedComputeResources reports the count of resources allocation which granted aligned resources, per alignment boundary
|
||||||
ContainerAlignedComputeResources = metrics.NewCounterVec(
|
ContainerAlignedComputeResources = metrics.NewCounterVec(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
@ -1126,6 +1138,7 @@ func Register(collectors ...metrics.StableCollector) {
|
|||||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||||
legacyregistry.MustRegister(CPUManagerSharedPoolSizeMilliCores)
|
legacyregistry.MustRegister(CPUManagerSharedPoolSizeMilliCores)
|
||||||
legacyregistry.MustRegister(CPUManagerExclusiveCPUsAllocationCount)
|
legacyregistry.MustRegister(CPUManagerExclusiveCPUsAllocationCount)
|
||||||
|
legacyregistry.MustRegister(CPUManagerAllocationPerNUMA)
|
||||||
legacyregistry.MustRegister(ContainerAlignedComputeResources)
|
legacyregistry.MustRegister(ContainerAlignedComputeResources)
|
||||||
legacyregistry.MustRegister(ContainerAlignedComputeResourcesFailure)
|
legacyregistry.MustRegister(ContainerAlignedComputeResourcesFailure)
|
||||||
legacyregistry.MustRegister(MemoryManagerPinningRequestTotal)
|
legacyregistry.MustRegister(MemoryManagerPinningRequestTotal)
|
||||||
|
Loading…
Reference in New Issue
Block a user