mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #86251 from bboreham/pleg-last-seen-metric
Kubelet: add a metric to observe time since PLEG last seen
This commit is contained in:
commit
49bc696614
@ -44,6 +44,7 @@ const (
|
||||
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
|
||||
PLEGDiscardEventsKey = "pleg_discard_events"
|
||||
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
||||
PLEGLastSeenKey = "pleg_last_seen_seconds"
|
||||
EvictionsKey = "evictions"
|
||||
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
||||
PreemptionsKey = "preemptions"
|
||||
@ -186,6 +187,16 @@ var (
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
|
||||
// Pod Lifecycle Event Generator (PLEG) was last seen active.
|
||||
PLEGLastSeen = metrics.NewGauge(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: PLEGLastSeenKey,
|
||||
Help: "Timestamp in seconds when PLEG was last seen active.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
|
||||
// Broken down by operation type.
|
||||
RuntimeOperations = metrics.NewCounterVec(
|
||||
@ -522,6 +533,7 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...metrics.S
|
||||
legacyregistry.MustRegister(PLEGRelistDuration)
|
||||
legacyregistry.MustRegister(PLEGDiscardEvents)
|
||||
legacyregistry.MustRegister(PLEGRelistInterval)
|
||||
legacyregistry.MustRegister(PLEGLastSeen)
|
||||
legacyregistry.MustRegister(RuntimeOperations)
|
||||
legacyregistry.MustRegister(RuntimeOperationsDuration)
|
||||
legacyregistry.MustRegister(RuntimeOperationsErrors)
|
||||
|
@ -138,6 +138,8 @@ func (g *GenericPLEG) Healthy() (bool, error) {
|
||||
if relistTime.IsZero() {
|
||||
return false, fmt.Errorf("pleg has yet to be successful")
|
||||
}
|
||||
// Expose as metric so you can alert on `time()-pleg_last_seen_seconds > nn`
|
||||
metrics.PLEGLastSeen.Set(float64(relistTime.Unix()))
|
||||
elapsed := g.clock.Since(relistTime)
|
||||
if elapsed > relistThreshold {
|
||||
return false, fmt.Errorf("pleg was last seen active %v ago; threshold is %v", elapsed, relistThreshold)
|
||||
|
Loading…
Reference in New Issue
Block a user