mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 04:33:26 +00:00
Merge pull request #86251 from bboreham/pleg-last-seen-metric
Kubelet: add a metric to observe time since PLEG last seen
This commit is contained in:
commit
49bc696614
@ -44,6 +44,7 @@ const (
|
|||||||
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
|
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
|
||||||
PLEGDiscardEventsKey = "pleg_discard_events"
|
PLEGDiscardEventsKey = "pleg_discard_events"
|
||||||
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
||||||
|
PLEGLastSeenKey = "pleg_last_seen_seconds"
|
||||||
EvictionsKey = "evictions"
|
EvictionsKey = "evictions"
|
||||||
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
||||||
PreemptionsKey = "preemptions"
|
PreemptionsKey = "preemptions"
|
||||||
@ -186,6 +187,16 @@ var (
|
|||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
|
||||||
|
// Pod Lifecycle Event Generator (PLEG) was last seen active.
|
||||||
|
PLEGLastSeen = metrics.NewGauge(
|
||||||
|
&metrics.GaugeOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: PLEGLastSeenKey,
|
||||||
|
Help: "Timestamp in seconds when PLEG was last seen active.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
|
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
|
||||||
// Broken down by operation type.
|
// Broken down by operation type.
|
||||||
RuntimeOperations = metrics.NewCounterVec(
|
RuntimeOperations = metrics.NewCounterVec(
|
||||||
@ -522,6 +533,7 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...metrics.S
|
|||||||
legacyregistry.MustRegister(PLEGRelistDuration)
|
legacyregistry.MustRegister(PLEGRelistDuration)
|
||||||
legacyregistry.MustRegister(PLEGDiscardEvents)
|
legacyregistry.MustRegister(PLEGDiscardEvents)
|
||||||
legacyregistry.MustRegister(PLEGRelistInterval)
|
legacyregistry.MustRegister(PLEGRelistInterval)
|
||||||
|
legacyregistry.MustRegister(PLEGLastSeen)
|
||||||
legacyregistry.MustRegister(RuntimeOperations)
|
legacyregistry.MustRegister(RuntimeOperations)
|
||||||
legacyregistry.MustRegister(RuntimeOperationsDuration)
|
legacyregistry.MustRegister(RuntimeOperationsDuration)
|
||||||
legacyregistry.MustRegister(RuntimeOperationsErrors)
|
legacyregistry.MustRegister(RuntimeOperationsErrors)
|
||||||
|
@ -138,6 +138,8 @@ func (g *GenericPLEG) Healthy() (bool, error) {
|
|||||||
if relistTime.IsZero() {
|
if relistTime.IsZero() {
|
||||||
return false, fmt.Errorf("pleg has yet to be successful")
|
return false, fmt.Errorf("pleg has yet to be successful")
|
||||||
}
|
}
|
||||||
|
// Expose as metric so you can alert on `time()-pleg_last_seen_seconds > nn`
|
||||||
|
metrics.PLEGLastSeen.Set(float64(relistTime.Unix()))
|
||||||
elapsed := g.clock.Since(relistTime)
|
elapsed := g.clock.Since(relistTime)
|
||||||
if elapsed > relistThreshold {
|
if elapsed > relistThreshold {
|
||||||
return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
|
return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
|
||||||
|
Loading…
Reference in New Issue
Block a user