mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-04 18:00:08 +00:00
Add metric for e2e pod startup latency including image pull
Signed-off-by: ruiwen-zhao <ruiwen@google.com>
This commit is contained in:
parent
af52a7052b
commit
1165609036
@ -41,6 +41,7 @@ const (
|
|||||||
PodWorkerDurationKey = "pod_worker_duration_seconds"
|
PodWorkerDurationKey = "pod_worker_duration_seconds"
|
||||||
PodStartDurationKey = "pod_start_duration_seconds"
|
PodStartDurationKey = "pod_start_duration_seconds"
|
||||||
PodStartSLIDurationKey = "pod_start_sli_duration_seconds"
|
PodStartSLIDurationKey = "pod_start_sli_duration_seconds"
|
||||||
|
PodStartTotalDurationKey = "pod_start_total_duration_seconds"
|
||||||
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
|
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
|
||||||
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
|
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
|
||||||
PodStatusSyncDurationKey = "pod_status_sync_duration_seconds"
|
PodStatusSyncDurationKey = "pod_status_sync_duration_seconds"
|
||||||
@ -125,6 +126,10 @@ const (
|
|||||||
EphemeralContainer = "ephemeral_container"
|
EphemeralContainer = "ephemeral_container"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
podStartupDurationBuckets = []float64{0.5, 1, 2, 3, 4, 5, 6, 8, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600}
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// NodeName is a Gauge that tracks the ode's name. The count is always 1.
|
// NodeName is a Gauge that tracks the ode's name. The count is always 1.
|
||||||
NodeName = metrics.NewGaugeVec(
|
NodeName = metrics.NewGaugeVec(
|
||||||
@ -165,7 +170,7 @@ var (
|
|||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: PodStartDurationKey,
|
Name: PodStartDurationKey,
|
||||||
Help: "Duration in seconds from kubelet seeing a pod for the first time to the pod starting to run",
|
Help: "Duration in seconds from kubelet seeing a pod for the first time to the pod starting to run",
|
||||||
Buckets: []float64{0.5, 1, 2, 3, 4, 5, 6, 8, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600},
|
Buckets: podStartupDurationBuckets,
|
||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -182,11 +187,30 @@ var (
|
|||||||
Subsystem: KubeletSubsystem,
|
Subsystem: KubeletSubsystem,
|
||||||
Name: PodStartSLIDurationKey,
|
Name: PodStartSLIDurationKey,
|
||||||
Help: "Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch",
|
Help: "Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch",
|
||||||
Buckets: []float64{0.5, 1, 2, 3, 4, 5, 6, 8, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600},
|
Buckets: podStartupDurationBuckets,
|
||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
[]string{},
|
[]string{},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// PodStartTotalDuration is a Histogram that tracks the duration (in seconds) it takes for a single pod to run
|
||||||
|
// since creation, including the time for image pulling.
|
||||||
|
//
|
||||||
|
// The histogram bucket boundaries for pod startup latency metrics, measured in seconds. These are hand-picked
|
||||||
|
// so as to be roughly exponential but still round numbers in everyday units. This is to minimise the number
|
||||||
|
// of buckets while allowing accurate measurement of thresholds which might be used in SLOs
|
||||||
|
// e.g. x% of pods start up within 30 seconds, or 15 minutes, etc.
|
||||||
|
PodStartTotalDuration = metrics.NewHistogramVec(
|
||||||
|
&metrics.HistogramOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: PodStartTotalDurationKey,
|
||||||
|
Help: "Duration in seconds to start a pod since creation, including time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch",
|
||||||
|
Buckets: podStartupDurationBuckets,
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
[]string{},
|
||||||
|
)
|
||||||
|
|
||||||
// CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete.
|
// CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete.
|
||||||
// Broken down by method.
|
// Broken down by method.
|
||||||
CgroupManagerDuration = metrics.NewHistogramVec(
|
CgroupManagerDuration = metrics.NewHistogramVec(
|
||||||
@ -810,6 +834,7 @@ func Register(collectors ...metrics.StableCollector) {
|
|||||||
legacyregistry.MustRegister(PodWorkerDuration)
|
legacyregistry.MustRegister(PodWorkerDuration)
|
||||||
legacyregistry.MustRegister(PodStartDuration)
|
legacyregistry.MustRegister(PodStartDuration)
|
||||||
legacyregistry.MustRegister(PodStartSLIDuration)
|
legacyregistry.MustRegister(PodStartSLIDuration)
|
||||||
|
legacyregistry.MustRegister(PodStartTotalDuration)
|
||||||
legacyregistry.MustRegister(NodeStartupPreKubeletDuration)
|
legacyregistry.MustRegister(NodeStartupPreKubeletDuration)
|
||||||
legacyregistry.MustRegister(NodeStartupPreRegistrationDuration)
|
legacyregistry.MustRegister(NodeStartupPreRegistrationDuration)
|
||||||
legacyregistry.MustRegister(NodeStartupRegistrationDuration)
|
legacyregistry.MustRegister(NodeStartupRegistrationDuration)
|
||||||
|
@ -102,6 +102,7 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
|
|||||||
klog.InfoS("Observed pod startup duration",
|
klog.InfoS("Observed pod startup duration",
|
||||||
"pod", klog.KObj(pod),
|
"pod", klog.KObj(pod),
|
||||||
"podStartSLOduration", podStartSLOduration,
|
"podStartSLOduration", podStartSLOduration,
|
||||||
|
"podStartE2EDuration", podStartingDuration,
|
||||||
"podCreationTimestamp", pod.CreationTimestamp.Time,
|
"podCreationTimestamp", pod.CreationTimestamp.Time,
|
||||||
"firstStartedPulling", state.firstStartedPulling,
|
"firstStartedPulling", state.firstStartedPulling,
|
||||||
"lastFinishedPulling", state.lastFinishedPulling,
|
"lastFinishedPulling", state.lastFinishedPulling,
|
||||||
@ -109,6 +110,7 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
|
|||||||
"watchObservedRunningTime", when)
|
"watchObservedRunningTime", when)
|
||||||
|
|
||||||
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration)
|
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration)
|
||||||
|
metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds())
|
||||||
state.metricRecorded = true
|
state.metricRecorded = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user