mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 18:02:01 +00:00
Merge pull request #115967 from harche/evented_pleg_metrics
Graduate Evented PLEG to Beta
This commit is contained in:
commit
c6f3007071
@ -259,6 +259,7 @@ const (
|
|||||||
// owner: @harche
|
// owner: @harche
|
||||||
// kep: http://kep.k8s.io/3386
|
// kep: http://kep.k8s.io/3386
|
||||||
// alpha: v1.25
|
// alpha: v1.25
|
||||||
|
// beta: v1.27
|
||||||
//
|
//
|
||||||
// Allows using event-driven PLEG (pod lifecycle event generator) through kubelet
|
// Allows using event-driven PLEG (pod lifecycle event generator) through kubelet
|
||||||
// which avoids frequent relisting of containers which helps optimize performance.
|
// which avoids frequent relisting of containers which helps optimize performance.
|
||||||
@ -937,7 +938,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
|||||||
|
|
||||||
DynamicResourceAllocation: {Default: false, PreRelease: featuregate.Alpha},
|
DynamicResourceAllocation: {Default: false, PreRelease: featuregate.Alpha},
|
||||||
|
|
||||||
EventedPLEG: {Default: false, PreRelease: featuregate.Alpha},
|
EventedPLEG: {Default: false, PreRelease: featuregate.Beta}, // off by default, requires CRI Runtime support
|
||||||
|
|
||||||
ExecProbeTimeout: {Default: true, PreRelease: featuregate.GA}, // lock to default and remove after v1.22 based on KEP #1972 update
|
ExecProbeTimeout: {Default: true, PreRelease: featuregate.GA}, // lock to default and remove after v1.22 based on KEP #1972 update
|
||||||
|
|
||||||
|
@ -37,8 +37,10 @@ import (
|
|||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
"k8s.io/kubernetes/pkg/features"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/util"
|
"k8s.io/kubernetes/pkg/kubelet/util"
|
||||||
"k8s.io/kubernetes/pkg/probe/exec"
|
"k8s.io/kubernetes/pkg/probe/exec"
|
||||||
|
|
||||||
utilexec "k8s.io/utils/exec"
|
utilexec "k8s.io/utils/exec"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -797,6 +799,9 @@ func (r *remoteRuntimeService) GetContainerEvents(containerEventsCh chan *runtim
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The connection is successfully established and we have a streaming client ready for use.
|
||||||
|
metrics.EventedPLEGConn.Inc()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
resp, err := containerEventsStreamingClient.Recv()
|
resp, err := containerEventsStreamingClient.Recv()
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
|
@ -43,6 +43,9 @@ const (
|
|||||||
PLEGDiscardEventsKey = "pleg_discard_events"
|
PLEGDiscardEventsKey = "pleg_discard_events"
|
||||||
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
||||||
PLEGLastSeenKey = "pleg_last_seen_seconds"
|
PLEGLastSeenKey = "pleg_last_seen_seconds"
|
||||||
|
EventedPLEGConnErrKey = "evented_pleg_connection_error_count"
|
||||||
|
EventedPLEGConnKey = "evented_pleg_connection_success_count"
|
||||||
|
EventedPLEGConnLatencyKey = "evented_pleg_connection_latency_seconds"
|
||||||
EvictionsKey = "evictions"
|
EvictionsKey = "evictions"
|
||||||
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
||||||
PreemptionsKey = "preemptions"
|
PreemptionsKey = "preemptions"
|
||||||
@ -250,6 +253,41 @@ var (
|
|||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// EventedPLEGConnErr is a Counter that tracks the number of errors encountered during
|
||||||
|
// the establishment of streaming connection with the CRI runtime.
|
||||||
|
EventedPLEGConnErr = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: EventedPLEGConnErrKey,
|
||||||
|
Help: "The number of errors encountered during the establishment of streaming connection with the CRI runtime.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
// EventedPLEGConn is a Counter that tracks the number of times a streaming client
|
||||||
|
// was obtained to receive CRI Events.
|
||||||
|
EventedPLEGConn = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: EventedPLEGConnKey,
|
||||||
|
Help: "The number of times a streaming client was obtained to receive CRI Events.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
// EventedPLEGConnLatency is a Histogram that tracks the latency of streaming connection
|
||||||
|
// with the CRI runtime, measured in seconds.
|
||||||
|
EventedPLEGConnLatency = metrics.NewHistogram(
|
||||||
|
&metrics.HistogramOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: EventedPLEGConnLatencyKey,
|
||||||
|
Help: "The latency of streaming connection with the CRI runtime, measured in seconds.",
|
||||||
|
Buckets: metrics.DefBuckets,
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
|
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
|
||||||
// Broken down by operation type.
|
// Broken down by operation type.
|
||||||
RuntimeOperations = metrics.NewCounterVec(
|
RuntimeOperations = metrics.NewCounterVec(
|
||||||
@ -692,6 +730,9 @@ func Register(collectors ...metrics.StableCollector) {
|
|||||||
legacyregistry.MustRegister(PLEGDiscardEvents)
|
legacyregistry.MustRegister(PLEGDiscardEvents)
|
||||||
legacyregistry.MustRegister(PLEGRelistInterval)
|
legacyregistry.MustRegister(PLEGRelistInterval)
|
||||||
legacyregistry.MustRegister(PLEGLastSeen)
|
legacyregistry.MustRegister(PLEGLastSeen)
|
||||||
|
legacyregistry.MustRegister(EventedPLEGConnErr)
|
||||||
|
legacyregistry.MustRegister(EventedPLEGConn)
|
||||||
|
legacyregistry.MustRegister(EventedPLEGConnLatency)
|
||||||
legacyregistry.MustRegister(RuntimeOperations)
|
legacyregistry.MustRegister(RuntimeOperations)
|
||||||
legacyregistry.MustRegister(RuntimeOperationsDuration)
|
legacyregistry.MustRegister(RuntimeOperationsDuration)
|
||||||
legacyregistry.MustRegister(RuntimeOperationsErrors)
|
legacyregistry.MustRegister(RuntimeOperationsErrors)
|
||||||
|
@ -190,6 +190,7 @@ func (e *EventedPLEG) watchEventsChannel() {
|
|||||||
|
|
||||||
err := e.runtimeService.GetContainerEvents(containerEventsResponseCh)
|
err := e.runtimeService.GetContainerEvents(containerEventsResponseCh)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metrics.EventedPLEGConnErr.Inc()
|
||||||
numAttempts++
|
numAttempts++
|
||||||
e.Relist() // Force a relist to get the latest container and pods running metric.
|
e.Relist() // Force a relist to get the latest container and pods running metric.
|
||||||
klog.V(4).InfoS("Evented PLEG: Failed to get container events, retrying: ", "err", err)
|
klog.V(4).InfoS("Evented PLEG: Failed to get container events, retrying: ", "err", err)
|
||||||
@ -245,6 +246,7 @@ func (e *EventedPLEG) processCRIEvents(containerEventsResponseCh chan *runtimeap
|
|||||||
|
|
||||||
e.updateRunningPodMetric(status)
|
e.updateRunningPodMetric(status)
|
||||||
e.updateRunningContainerMetric(status)
|
e.updateRunningContainerMetric(status)
|
||||||
|
e.updateLatencyMetric(event)
|
||||||
|
|
||||||
if event.ContainerEventType == runtimeapi.ContainerEventType_CONTAINER_DELETED_EVENT {
|
if event.ContainerEventType == runtimeapi.ContainerEventType_CONTAINER_DELETED_EVENT {
|
||||||
for _, sandbox := range status.SandboxStatuses {
|
for _, sandbox := range status.SandboxStatuses {
|
||||||
@ -410,6 +412,11 @@ func (e *EventedPLEG) updateRunningContainerMetric(podStatus *kubecontainer.PodS
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *EventedPLEG) updateLatencyMetric(event *runtimeapi.ContainerEventResponse) {
|
||||||
|
duration := time.Duration(time.Now().UnixNano()-event.CreatedAt) * time.Nanosecond
|
||||||
|
metrics.EventedPLEGConnLatency.Observe(duration.Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
func (e *EventedPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) (error, bool) {
|
func (e *EventedPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) (error, bool) {
|
||||||
return fmt.Errorf("not implemented"), false
|
return fmt.Errorf("not implemented"), false
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user