mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Merge pull request #115590 from swatisehgal/topology-mgr-duration-metrics
node: topology-mgr: Add metric to measure topology manager admission latency
This commit is contained in:
commit
e18fa74551
@ -18,6 +18,7 @@ package topologymanager
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
@ -209,9 +210,11 @@ func (m *manager) RemoveContainer(containerID string) error {
|
|||||||
|
|
||||||
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
||||||
klog.InfoS("Topology Admit Handler")
|
klog.InfoS("Topology Admit Handler")
|
||||||
|
|
||||||
metrics.TopologyManagerAdmissionRequestsTotal.Inc()
|
metrics.TopologyManagerAdmissionRequestsTotal.Inc()
|
||||||
pod := attrs.Pod
|
|
||||||
|
|
||||||
return m.scope.Admit(pod)
|
startTime := time.Now()
|
||||||
|
podAdmitResult := m.scope.Admit(attrs.Pod)
|
||||||
|
metrics.TopologyManagerAdmissionDuration.Observe(float64(time.Since(startTime).Milliseconds()))
|
||||||
|
|
||||||
|
return podAdmitResult
|
||||||
}
|
}
|
||||||
|
@ -94,6 +94,7 @@ const (
|
|||||||
// Metrics to track the Topology manager behavior
|
// Metrics to track the Topology manager behavior
|
||||||
TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
|
TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
|
||||||
TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total"
|
TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total"
|
||||||
|
TopologyManagerAdmissionDurationKey = "topology_manager_admission_duration_ms"
|
||||||
|
|
||||||
// Values used in metric labels
|
// Values used in metric labels
|
||||||
Container = "container"
|
Container = "container"
|
||||||
@ -573,6 +574,17 @@ var (
|
|||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TopologyManagerAdmissionDuration is a Histogram that tracks the duration (in seconds) to serve a pod admission request.
|
||||||
|
TopologyManagerAdmissionDuration = metrics.NewHistogram(
|
||||||
|
&metrics.HistogramOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: TopologyManagerAdmissionDurationKey,
|
||||||
|
Help: "Duration in milliseconds to serve a pod admission request.",
|
||||||
|
Buckets: metrics.ExponentialBuckets(.05, 2, 15),
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
var registerMetrics sync.Once
|
var registerMetrics sync.Once
|
||||||
@ -626,6 +638,7 @@ func Register(collectors ...metrics.StableCollector) {
|
|||||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||||
legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
|
legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
|
||||||
legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)
|
legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)
|
||||||
|
legacyregistry.MustRegister(TopologyManagerAdmissionDuration)
|
||||||
|
|
||||||
for _, collector := range collectors {
|
for _, collector := range collectors {
|
||||||
legacyregistry.CustomMustRegister(collector)
|
legacyregistry.CustomMustRegister(collector)
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
"github.com/onsi/ginkgo/v2"
|
"github.com/onsi/ginkgo/v2"
|
||||||
"github.com/onsi/gomega"
|
"github.com/onsi/gomega"
|
||||||
"github.com/onsi/gomega/gstruct"
|
"github.com/onsi/gomega/gstruct"
|
||||||
|
"github.com/onsi/gomega/types"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||||
@ -85,6 +86,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
|||||||
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||||
"": timelessSample(0),
|
"": timelessSample(0),
|
||||||
}),
|
}),
|
||||||
|
"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
|
||||||
|
"": timelessSample(0),
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
||||||
@ -108,6 +112,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
|||||||
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||||
"": timelessSample(1),
|
"": timelessSample(1),
|
||||||
}),
|
}),
|
||||||
|
"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
|
||||||
|
"": checkMetricValueGreaterThan(0),
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
||||||
@ -122,7 +129,7 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
|||||||
|
|
||||||
// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
|
// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
|
||||||
// being [Serial], we can also assume noone else but us is running pods.
|
// being [Serial], we can also assume noone else but us is running pods.
|
||||||
ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
|
ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod should be admitted")
|
||||||
|
|
||||||
matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
|
||||||
"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||||
@ -131,6 +138,9 @@ var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]"
|
|||||||
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
|
||||||
"": timelessSample(0),
|
"": timelessSample(0),
|
||||||
}),
|
}),
|
||||||
|
"kubelet_topology_manager_admission_duration_ms_count": gstruct.MatchElements(nodeID, gstruct.IgnoreExtras, gstruct.Elements{
|
||||||
|
"": checkMetricValueGreaterThan(0),
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
|
||||||
@ -157,3 +167,12 @@ func hostCheck() (int, int) {
|
|||||||
|
|
||||||
return numaNodes, coreCount
|
return numaNodes, coreCount
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func checkMetricValueGreaterThan(value interface{}) types.GomegaMatcher {
|
||||||
|
return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
|
||||||
|
// We already check Metric when matching the Id
|
||||||
|
"Metric": gstruct.Ignore(),
|
||||||
|
"Value": gomega.BeNumerically(">", value),
|
||||||
|
"Timestamp": gstruct.Ignore(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user