mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 06:54:01 +00:00
node: metrics: cpumanager: add pinning metrics
In order to improve the observability of the cpumanager, add and populate metrics to track if the combination of the kubelet configuration and podspec would trigger exclusive core allocation and pinning. We should avoid leaking any node/machine specific information (e.g. core ids, even though this is admittedly an extreme example); tracking these metrics seems to be a good first step, because it allows us to get feedback without exposing details. Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
parent
5539a5b80f
commit
47d3299781
@ -27,6 +27,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -252,10 +253,21 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
||||
p.cpusToReuse[string(pod.UID)] = p.cpusToReuse[string(pod.UID)].Difference(cset)
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
|
||||
numCPUs := p.guaranteedCPUs(pod, container)
|
||||
if numCPUs == 0 {
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.InfoS("Static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
// container belongs in an exclusively allocated pool
|
||||
metrics.CPUManagerPinningRequestsTotal.Inc()
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
metrics.CPUManagerPinningErrorsTotal.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
if p.options.FullPhysicalCPUsOnly && ((numCPUs % p.topology.CPUsPerCore()) != 0) {
|
||||
// Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted
|
||||
@ -291,8 +303,6 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
p.updateCPUsToReuse(pod, container, cpuset)
|
||||
|
||||
}
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -86,6 +86,10 @@ const (
|
||||
// Metrics to track ephemeral container usage by this kubelet
|
||||
ManagedEphemeralContainersKey = "managed_ephemeral_containers"
|
||||
|
||||
// Metrics to track the CPU manager behavior
|
||||
CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total"
|
||||
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
||||
|
||||
// Values used in metric labels
|
||||
Container = "container"
|
||||
InitContainer = "init_container"
|
||||
@ -506,6 +510,26 @@ var (
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
|
||||
// CPUManagerPinningRequestsTotal tracks the number of times the pod spec will cause the cpu manager to pin cores
|
||||
CPUManagerPinningRequestsTotal = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: CPUManagerPinningRequestsTotalKey,
|
||||
Help: "The number of cpu core allocations which required pinning.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
|
||||
// CPUManagerPinningErrorsTotal tracks the number of times the pod spec required the cpu manager to pin cores, but the allocation failed
|
||||
CPUManagerPinningErrorsTotal = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: CPUManagerPinningErrorsTotalKey,
|
||||
Help: "The number of cpu core allocations which required pinning failed.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
@ -570,6 +594,11 @@ func Register(collectors ...metrics.StableCollector) {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ConsistentHTTPGetHandlers) {
|
||||
legacyregistry.MustRegister(LifecycleHandlerHTTPFallbacks)
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.CPUManager) {
|
||||
legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
|
||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user