mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
Merge pull request #127155 from ffromani/alignment-metrics
node: metrics: add resource alignment metrics
This commit is contained in:
commit
c6669ea7d6
@ -312,7 +312,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
defer func() {
|
||||
if rerr != nil {
|
||||
metrics.CPUManagerPinningErrorsTotal.Inc()
|
||||
return
|
||||
}
|
||||
if !p.options.FullPhysicalCPUsOnly {
|
||||
// increment only if we know we allocate aligned resources
|
||||
return
|
||||
}
|
||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc()
|
||||
}()
|
||||
|
||||
if p.options.FullPhysicalCPUsOnly {
|
||||
|
@ -30,6 +30,14 @@ type Policy interface {
|
||||
Merge(providersHints []map[string][]TopologyHint) (TopologyHint, bool)
|
||||
}
|
||||
|
||||
// IsAlignmentGuaranteed return true if the given policy guarantees that either
|
||||
// the compute resources will be allocated within a NUMA boundary, or the allocation will fail at all.
|
||||
func IsAlignmentGuaranteed(p Policy) bool {
|
||||
// We are abusing the name, but atm this matches almost 1:1 the policy name
|
||||
// so we are not adding new fields for now.
|
||||
return p.Name() == PolicySingleNumaNode
|
||||
}
|
||||
|
||||
// Merge a TopologyHints permutation to a single hint by performing a bitwise-AND
|
||||
// of their affinity masks. The hint shall be preferred if all hits in the permutation
|
||||
// are preferred.
|
||||
|
@ -61,6 +61,10 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
|
||||
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
|
||||
return admission.GetPodAdmitResult(err)
|
||||
}
|
||||
|
||||
if IsAlignmentGuaranteed(s.policy) {
|
||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Inc()
|
||||
}
|
||||
}
|
||||
return admission.GetPodAdmitResult(nil)
|
||||
}
|
||||
|
@ -62,6 +62,10 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
|
||||
return admission.GetPodAdmitResult(err)
|
||||
}
|
||||
}
|
||||
if IsAlignmentGuaranteed(s.policy) {
|
||||
// increment only if we know we allocate aligned resources.
|
||||
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Inc()
|
||||
}
|
||||
return admission.GetPodAdmitResult(nil)
|
||||
}
|
||||
|
||||
|
@ -127,10 +127,21 @@ const (
|
||||
// Metric for tracking garbage collected images
|
||||
ImageGarbageCollectedTotalKey = "image_garbage_collected_total"
|
||||
|
||||
// Metric for tracking aligment of compute resources
|
||||
ContainerAlignedComputeResourcesNameKey = "container_aligned_compute_resources_count"
|
||||
ContainerAlignedComputeResourcesScopeLabelKey = "scope"
|
||||
ContainerAlignedComputeResourcesBoundaryLabelKey = "boundary"
|
||||
|
||||
// Values used in metric labels
|
||||
Container = "container"
|
||||
InitContainer = "init_container"
|
||||
EphemeralContainer = "ephemeral_container"
|
||||
|
||||
AlignScopePod = "pod"
|
||||
AlignScopeContainer = "container"
|
||||
|
||||
AlignedPhysicalCPU = "physical_cpu"
|
||||
AlignedNUMANode = "numa_node"
|
||||
)
|
||||
|
||||
type imageSizeBucket struct {
|
||||
@ -762,6 +773,16 @@ var (
|
||||
},
|
||||
)
|
||||
|
||||
ContainerAlignedComputeResources = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: ContainerAlignedComputeResourcesNameKey,
|
||||
Help: "Cumulative number of aligned compute resources allocated to containers by alignment type.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
[]string{ContainerAlignedComputeResourcesScopeLabelKey, ContainerAlignedComputeResourcesBoundaryLabelKey},
|
||||
)
|
||||
|
||||
// MemoryManagerPinningRequestTotal tracks the number of times the pod spec required the memory manager to pin memory pages
|
||||
MemoryManagerPinningRequestTotal = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
@ -985,6 +1006,7 @@ func Register(collectors ...metrics.StableCollector) {
|
||||
legacyregistry.MustRegister(RunPodSandboxErrors)
|
||||
legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
|
||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||
legacyregistry.MustRegister(ContainerAlignedComputeResources)
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.MemoryManager) {
|
||||
legacyregistry.MustRegister(MemoryManagerPinningRequestTotal)
|
||||
legacyregistry.MustRegister(MemoryManagerPinningErrorsTotal)
|
||||
|
@ -91,6 +91,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa
|
||||
ginkgo.AfterEach(func(ctx context.Context) {
|
||||
if testPod != nil {
|
||||
deletePodSyncByName(ctx, f, testPod.Name)
|
||||
waitForContainerRemoval(ctx, testPod.Spec.Containers[0].Name, testPod.Name, testPod.Namespace)
|
||||
}
|
||||
updateKubeletConfig(ctx, f, oldCfg, true)
|
||||
})
|
||||
@ -160,11 +161,32 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa
|
||||
ginkgo.By("Ensuring the metrics match the expectations a few more times")
|
||||
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
|
||||
})
|
||||
|
||||
ginkgo.It("should return updated alignment counters when pod successfully run", func(ctx context.Context) {
|
||||
ginkgo.By("Creating the test pod")
|
||||
testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("count-align-smt-ok", smtLevel))
|
||||
|
||||
// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
|
||||
// being [Serial], we can also assume noone else but us is running pods.
|
||||
ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
|
||||
|
||||
idFn := makeCustomPairID("scope", "boundary")
|
||||
matchAlignmentMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
||||
"kubelet_container_aligned_compute_resources_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{
|
||||
"container::physical_cpu": timelessSample(1),
|
||||
}),
|
||||
})
|
||||
|
||||
ginkgo.By("Giving the Kubelet time to update the alignment metrics")
|
||||
gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
|
||||
ginkgo.By("Ensuring the metrics match the expectations about alignment metrics a few more times")
|
||||
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
func getKubeletMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
|
||||
ginkgo.By("getting Kubelet metrics from the metrics API")
|
||||
ginkgo.By("Getting Kubelet metrics from the metrics API")
|
||||
return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics")
|
||||
}
|
||||
|
||||
@ -189,7 +211,7 @@ func makeGuaranteedCPUExclusiveSleeperPod(name string, cpus int) *v1.Pod {
|
||||
v1.ResourceMemory: resource.MustParse("64Mi"),
|
||||
},
|
||||
},
|
||||
Command: []string{"sh", "-c", "sleep", "1d"},
|
||||
Command: []string{"sh", "-c", "sleep 1d"},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -157,6 +157,13 @@ func containerID(element interface{}) string {
|
||||
return fmt.Sprintf("%s::%s::%s", el.Metric["namespace"], el.Metric["pod"], el.Metric["container"])
|
||||
}
|
||||
|
||||
func makeCustomPairID(pri, sec string) func(interface{}) string {
|
||||
return func(element interface{}) string {
|
||||
el := element.(*model.Sample)
|
||||
return fmt.Sprintf("%s::%s", el.Metric[model.LabelName(pri)], el.Metric[model.LabelName(sec)])
|
||||
}
|
||||
}
|
||||
|
||||
func boundedSample(lower, upper interface{}) types.GomegaMatcher {
|
||||
return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
|
||||
// We already check Metric when matching the Id
|
||||
|
@ -28,6 +28,7 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/test/e2e/feature"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
@ -152,6 +153,26 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature.
|
||||
ginkgo.By("Ensuring the metrics match the expectations a few more times")
|
||||
gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
|
||||
})
|
||||
|
||||
ginkgo.It("[alignment] should return updated alignment counters when pod successfully run", func(ctx context.Context) {
|
||||
ginkgo.By("Creating the test pod")
|
||||
testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("count-align-numa-ok", cpusNumPerNUMA))
|
||||
|
||||
// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
|
||||
// being [Serial], we can also assume noone else but us is running pods.
|
||||
ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
|
||||
|
||||
matchAlignmentMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
||||
"kubelet_container_aligned_compute_resources_count": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||
metrics.AlignedNUMANode: timelessSample(1),
|
||||
}),
|
||||
})
|
||||
|
||||
ginkgo.By("Giving the Kubelet time to update the alignment metrics")
|
||||
gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
|
||||
ginkgo.By("Ensuring the metrics match the expectations about alignment metrics a few more times")
|
||||
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user