mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 18:02:01 +00:00
Merge pull request #115590 from swatisehgal/topology-mgr-duration-metrics
node: topology-mgr: Add metric to measure topology manager admission latency
This commit is contained in:
commit
e18fa74551
@ -18,6 +18,7 @@ package topologymanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
@ -209,9 +210,11 @@ func (m *manager) RemoveContainer(containerID string) error {
|
||||
|
||||
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
||||
klog.InfoS("Topology Admit Handler")
|
||||
|
||||
metrics.TopologyManagerAdmissionRequestsTotal.Inc()
|
||||
pod := attrs.Pod
|
||||
|
||||
return m.scope.Admit(pod)
|
||||
startTime := time.Now()
|
||||
podAdmitResult := m.scope.Admit(attrs.Pod)
|
||||
metrics.TopologyManagerAdmissionDuration.Observe(float64(time.Since(startTime).Milliseconds()))
|
||||
|
||||
return podAdmitResult
|
||||
}
|
||||
|
@ -94,6 +94,7 @@ const (
|
||||
// Metrics to track the Topology manager behavior
|
||||
TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
|
||||
TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total"
|
||||
TopologyManagerAdmissionDurationKey = "topology_manager_admission_duration_ms"
|
||||
|
||||
// Values used in metric labels
|
||||
Container = "container"
|
||||
@ -573,6 +574,17 @@ var (
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
|
||||
// TopologyManagerAdmissionDuration is a Histogram that tracks the duration (in seconds) to serve a pod admission request.
|
||||
TopologyManagerAdmissionDuration = metrics.NewHistogram(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: TopologyManagerAdmissionDurationKey,
|
||||
Help: "Duration in milliseconds to serve a pod admission request.",
|
||||
Buckets: metrics.ExponentialBuckets(.05, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
@ -626,6 +638,7 @@ func Register(collectors ...metrics.StableCollector) {
|
||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||
legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
|
||||
legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)
|
||||
legacyregistry.MustRegister(TopologyManagerAdmissionDuration)
|
||||
|
||||
for _, collector := range collectors {
|
||||
legacyregistry.CustomMustRegister(collector)
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
"github.com/onsi/gomega/gstruct"
|
||||
"github.com/onsi/gomega/types"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
@ -85,6 +86,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
||||
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||
"": timelessSample(0),
|
||||
}),
|
||||
"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
|
||||
"": timelessSample(0),
|
||||
}),
|
||||
})
|
||||
|
||||
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
||||
@ -108,6 +112,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
||||
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||
"": timelessSample(1),
|
||||
}),
|
||||
"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
|
||||
"": checkMetricValueGreaterThan(0),
|
||||
}),
|
||||
})
|
||||
|
||||
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
||||
@ -122,7 +129,7 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
||||
|
||||
// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
|
||||
// being [Serial], we can also assume noone else but us is running pods.
|
||||
ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
|
||||
ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod should be admitted")
|
||||
|
||||
matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
||||
"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||
@ -131,6 +138,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
||||
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||
"": timelessSample(0),
|
||||
}),
|
||||
"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
|
||||
"": checkMetricValueGreaterThan(0),
|
||||
}),
|
||||
})
|
||||
|
||||
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
||||
@ -157,3 +167,12 @@ func hostCheck() (int, int) {
|
||||
|
||||
return numaNodes, coreCount
|
||||
}
|
||||
|
||||
func checkMetricValueGreaterThan(value interface{}) types.GomegaMatcher {
|
||||
return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
|
||||
// We already check Metric when matching the Id
|
||||
"Metric": gstruct.Ignore(),
|
||||
"Value": gomega.BeNumerically(">", value),
|
||||
"Timestamp": gstruct.Ignore(),
|
||||
}))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user