mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 18:31:15 +00:00
Add connection related metrics to EventedPLEG
Signed-off-by: Harshal Patil <harpatil@redhat.com>
This commit is contained in:
parent
d971809b49
commit
412b4b3329
@ -37,8 +37,10 @@ import (
|
|||||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
"k8s.io/kubernetes/pkg/features"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/util"
|
"k8s.io/kubernetes/pkg/kubelet/util"
|
||||||
"k8s.io/kubernetes/pkg/probe/exec"
|
"k8s.io/kubernetes/pkg/probe/exec"
|
||||||
|
|
||||||
utilexec "k8s.io/utils/exec"
|
utilexec "k8s.io/utils/exec"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -797,6 +799,9 @@ func (r *remoteRuntimeService) GetContainerEvents(containerEventsCh chan *runtim
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The connection is successfully established and we have a streaming client ready for use.
|
||||||
|
metrics.EventedPLEGConn.Inc()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
resp, err := containerEventsStreamingClient.Recv()
|
resp, err := containerEventsStreamingClient.Recv()
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
|
@ -43,6 +43,9 @@ const (
|
|||||||
PLEGDiscardEventsKey = "pleg_discard_events"
|
PLEGDiscardEventsKey = "pleg_discard_events"
|
||||||
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
|
||||||
PLEGLastSeenKey = "pleg_last_seen_seconds"
|
PLEGLastSeenKey = "pleg_last_seen_seconds"
|
||||||
|
EventedPLEGConnErrKey = "evented_pleg_connection_error_count"
|
||||||
|
EventedPLEGConnKey = "evented_pleg_connection_success_count"
|
||||||
|
EventedPLEGConnLatencyKey = "evented_pleg_connection_latency_seconds"
|
||||||
EvictionsKey = "evictions"
|
EvictionsKey = "evictions"
|
||||||
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
EvictionStatsAgeKey = "eviction_stats_age_seconds"
|
||||||
PreemptionsKey = "preemptions"
|
PreemptionsKey = "preemptions"
|
||||||
@ -240,6 +243,41 @@ var (
|
|||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// EventedPLEGConnErr is a Counter that tracks the number of errors encountered during
|
||||||
|
// the establishment of streaming connection with the CRI runtime.
|
||||||
|
EventedPLEGConnErr = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: EventedPLEGConnErrKey,
|
||||||
|
Help: "The number of errors encountered during the establishment of streaming connection with the CRI runtime.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
// EventedPLEGConn is a Counter that tracks the number of times a streaming client
|
||||||
|
// was obtained to receive CRI Events.
|
||||||
|
EventedPLEGConn = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: EventedPLEGConnKey,
|
||||||
|
Help: "The number of times a streaming client was obtained to receive CRI Events.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
// EventedPLEGConnLatency is a Histogram that tracks the latency of streaming connection
|
||||||
|
// with the CRI runtime, measured in seconds.
|
||||||
|
EventedPLEGConnLatency = metrics.NewHistogram(
|
||||||
|
&metrics.HistogramOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: EventedPLEGConnLatencyKey,
|
||||||
|
Help: "The latency of streaming connection with the CRI runtime, measured in seconds.",
|
||||||
|
Buckets: metrics.DefBuckets,
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
|
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
|
||||||
// Broken down by operation type.
|
// Broken down by operation type.
|
||||||
RuntimeOperations = metrics.NewCounterVec(
|
RuntimeOperations = metrics.NewCounterVec(
|
||||||
@ -605,6 +643,9 @@ func Register(collectors ...metrics.StableCollector) {
|
|||||||
legacyregistry.MustRegister(PLEGDiscardEvents)
|
legacyregistry.MustRegister(PLEGDiscardEvents)
|
||||||
legacyregistry.MustRegister(PLEGRelistInterval)
|
legacyregistry.MustRegister(PLEGRelistInterval)
|
||||||
legacyregistry.MustRegister(PLEGLastSeen)
|
legacyregistry.MustRegister(PLEGLastSeen)
|
||||||
|
legacyregistry.MustRegister(EventedPLEGConnErr)
|
||||||
|
legacyregistry.MustRegister(EventedPLEGConn)
|
||||||
|
legacyregistry.MustRegister(EventedPLEGConnLatency)
|
||||||
legacyregistry.MustRegister(RuntimeOperations)
|
legacyregistry.MustRegister(RuntimeOperations)
|
||||||
legacyregistry.MustRegister(RuntimeOperationsDuration)
|
legacyregistry.MustRegister(RuntimeOperationsDuration)
|
||||||
legacyregistry.MustRegister(RuntimeOperationsErrors)
|
legacyregistry.MustRegister(RuntimeOperationsErrors)
|
||||||
|
@ -190,6 +190,7 @@ func (e *EventedPLEG) watchEventsChannel() {
|
|||||||
|
|
||||||
err := e.runtimeService.GetContainerEvents(containerEventsResponseCh)
|
err := e.runtimeService.GetContainerEvents(containerEventsResponseCh)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metrics.EventedPLEGConnErr.Inc()
|
||||||
numAttempts++
|
numAttempts++
|
||||||
e.Relist() // Force a relist to get the latest container and pods running metric.
|
e.Relist() // Force a relist to get the latest container and pods running metric.
|
||||||
klog.V(4).InfoS("Evented PLEG: Failed to get container events, retrying: ", "err", err)
|
klog.V(4).InfoS("Evented PLEG: Failed to get container events, retrying: ", "err", err)
|
||||||
@ -245,6 +246,7 @@ func (e *EventedPLEG) processCRIEvents(containerEventsResponseCh chan *runtimeap
|
|||||||
|
|
||||||
e.updateRunningPodMetric(status)
|
e.updateRunningPodMetric(status)
|
||||||
e.updateRunningContainerMetric(status)
|
e.updateRunningContainerMetric(status)
|
||||||
|
e.updateLatencyMetric(event)
|
||||||
|
|
||||||
if event.ContainerEventType == runtimeapi.ContainerEventType_CONTAINER_DELETED_EVENT {
|
if event.ContainerEventType == runtimeapi.ContainerEventType_CONTAINER_DELETED_EVENT {
|
||||||
for _, sandbox := range status.SandboxStatuses {
|
for _, sandbox := range status.SandboxStatuses {
|
||||||
@ -410,6 +412,11 @@ func (e *EventedPLEG) updateRunningContainerMetric(podStatus *kubecontainer.PodS
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *EventedPLEG) updateLatencyMetric(event *runtimeapi.ContainerEventResponse) {
|
||||||
|
duration := time.Duration(time.Now().UnixNano()-event.CreatedAt) * time.Nanosecond
|
||||||
|
metrics.EventedPLEGConnLatency.Observe(duration.Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
func (e *EventedPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) (error, bool) {
|
func (e *EventedPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) (error, bool) {
|
||||||
return fmt.Errorf("not implemented"), false
|
return fmt.Errorf("not implemented"), false
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user